Skip to the content.

0.环境准备和规划

拓扑图

规划表

ID hostname IP Address disk manager
1 c01 10.10.1.173 sda:20GB;sdb:20GB quorum-manager
2 c02 10.10.1.174 sda:20GB;sdb:20GB quorum-manager
3 c03 10.10.1.175 sda:20GB;sdb:20GB quorum
name value
集群名称 3NodeCluster
主节点 c01
远程命令 ssh,scp
nsd配置文件 /etc/gpfs/3node_nsd_input

内核

[root@c01 ~]# uname -r
2.6.32-431.el6.x86_64
[root@c01 ~]# cat /etc/redhat-release
CentOS release 6.5 (Final)

1.安装部署

在三个节点安装下面的内容,这里是c01,c02,c03

关闭SELINUX

[root@c01 ~]# sed -i "s#SELINUX=enforcing#SELINUX=disabled#g" /etc/selinux/config
[root@c01 ~]# cat /etc/selinux/config |grep SELINUX=
# SELINUX= can take one of these three values:
SELINUX=disabled

修改hosts表

[root@c01 ~]# echo "10.10.1.173 c01" >> /etc/hosts
[root@c01 ~]# echo "10.10.1.174 c02" >> /etc/hosts
[root@c01 ~]# echo "10.10.1.175 c03" >> /etc/hosts
[root@c01 ~]# cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
10.10.1.173 c01
10.10.1.174 c02
10.10.1.175 c03

准备安装包

[root@c01 ~]# yum -y install unzip
[root@c01 ~]# unzip gpfs_install-3.5.zip

# 解压安装包 --silent 直接接受许可,不再提示
[root@c01 ~]# ./gpfs_install-3.5.0-0_x86_64 --silent

Extracting License Acceptance Process Tool to /usr/lpp/mmfs/3.5 ...
tail -n +429 ./gpfs_install-3.5.0-0_x86_64 | /bin/tar -C /usr/lpp/mmfs/3.5 -xvz --exclude=*rpm --exclude=*tgz 2> /dev/null 1> /dev/null

Installing JRE ...
tail -n +429 ./gpfs_install-3.5.0-0_x86_64 | /bin/tar -C /usr/lpp/mmfs/3.5 --wildcards -xvz  ./ibm-java*tgz 2> /dev/null 1> /dev/null

Invoking License Acceptance Process Tool ...
/usr/lpp/mmfs/3.5/ibm-java-x86_64-60/jre/bin/java -cp /usr/lpp/mmfs/3.5/LAP_HOME/LAPApp.jar com.ibm.lex.lapapp.LAP -l /usr/lpp/mmfs/3.5/LA_HOME -m /usr/lpp/mmfs/3.5 -s /usr/lpp/mmfs/3.5 -t 5

License Agreement Terms accepted.

Extracting Product RPMs to /usr/lpp/mmfs/3.5 ...
tail -n +429 ./gpfs_install-3.5.0-0_x86_64 | /bin/tar -C /usr/lpp/mmfs/3.5 --wildcards -xvz  ./gpfs.base-3.5.0-3.x86_64.rpm ./gpfs.docs-3.5.0-3.noarch.rpm ./gpfs.gpl-3.5.0-3.noarch.rpm ./gpfs.msg.en_US-3.5.0-3.noarch.rpm 2> /dev/null 1> /dev/null

   - gpfs.base-3.5.0-3.x86_64.rpm
   - gpfs.docs-3.5.0-3.noarch.rpm
   - gpfs.gpl-3.5.0-3.noarch.rpm
   - gpfs.msg.en_US-3.5.0-3.noarch.rpm

Removing License Acceptance Process Tool from /usr/lpp/mmfs/3.5 ...

rm -rf  /usr/lpp/mmfs/3.5/LAP_HOME /usr/lpp/mmfs/3.5/LA_HOME

Removing JRE from /usr/lpp/mmfs/3.5 ...

rm -rf /usr/lpp/mmfs/3.5/ibm-java*tgz
[root@c01 ~]# cd /usr/lpp/mmfs/3.5/
[root@c01 3.5]# ls -l
total 12952
-rw-r--r-- 1 root root 12405953 Jan 19  2013 gpfs.base-3.5.0-3.x86_64.rpm
-rw-r--r-- 1 root root   230114 Jan 19  2013 gpfs.docs-3.5.0-3.noarch.rpm
-rw-r--r-- 1 root root   509477 Jan 19  2013 gpfs.gpl-3.5.0-3.noarch.rpm
-rw-r--r-- 1 root root    99638 Jan 19  2013 gpfs.msg.en_US-3.5.0-3.noarch.rpm
drwxr-xr-x 2 root root     4096 Sep 27 06:31 license
-rw-r--r-- 1 root root       39 Sep 27 06:31 status.dat
[root@c01 ~]# yum -y install ksh
...
[root@c01 3.5]# rpm -ivh gpfs.base-3.5.0-3.x86_64.rpm
Preparing...                ########################################### [100%]
   1:gpfs.base              ########################################### [100%]
[root@c01 3.5]# rpm -ivh gpfs.docs-3.5.0-3.noarch.rpm
Preparing...                ########################################### [100%]
   1:gpfs.docs              ########################################### [100%]
[root@c01 3.5]# rpm -ivh gpfs.gpl-3.5.0-3.noarch.rpm
Preparing...                ########################################### [100%]
   1:gpfs.gpl               ########################################### [100%]
[root@c01 3.5]# rpm -ivh gpfs.msg.en_US-3.5.0-3.noarch.rpm
Preparing...                ########################################### [100%]
   1:gpfs.msg.en_US         ########################################### [100%]

编译和安装GPL层二进制文件

[root@c02 ~]# cd /usr/lpp/mmfs/src/
[root@c01 src]# yum -y install cpp gcc gcc-c++
...

[root@c01 src]# make LINUX_DISTRIBUTION=REDHAT_AS_LINUX Autoconfig World InstallImages \
2>/tmp/make_err 1>/tmp/make_out

# 验证
[root@c01 src]# ls -al /lib/modules/2.6.32-696.10.2.el6.x86_64/extra/
total 6016
drwxr-xr-x 2 root root    4096 Sep 27 07:08 .
drwxr-xr-x 7 root root    4096 Sep 27 07:08 ..
-rw-r--r-- 1 root root 2894349 Sep 27 07:08 mmfs26.ko
-rw-r--r-- 1 root root 2547296 Sep 27 07:08 mmfslinux.ko
-rw-r--r-- 1 root root  708128 Sep 27 07:08 tracedev.ko

配置ssh免密码

在每个节点上都要运行,比较繁琐。


# 创建密钥对
[root@c01 ~]# ssh-keygen
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
6b:e2:e6:ac:9f:9e:86:5b:74:20:c3:69:76:0d:8d:9a root@c01
The key's randomart image is:
+--[ RSA 2048]----+
|      .o         |
|   . ..o.        |
|    Boo .        |
|   oE+ .         |
|      . S        |
|     . . .       |
|     .o o        |
|    .+o=         |
|    oOO          |
+-----------------+

# 出现一对秘钥对
[root@c01 ~]# cat .ssh/id_rsa
id_rsa      id_rsa.pub

# 把公钥复制到authorized_keys下
[root@c01 .ssh]# ssh-copy-id c01
The authenticity of host 'c01 (10.10.1.173)' can't be established.
RSA key fingerprint is 73:57:3c:94:e3:c8:62:2b:9c:0a:bc:87:a5:07:92:14.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'c01,10.10.1.173' (RSA) to the list of known hosts.
root@c01's password:
Now try logging into the machine, with "ssh 'c01'", and check in:

  .ssh/authorized_keys

to make sure we haven't added extra keys that you weren't expecting.

# 第一次都会有下面这么一串:
The authenticity of host 'c01 (10.10.1.173)' can't be established.
RSA key fingerprint is 73:57:3c:94:e3:c8:62:2b:9c:0a:bc:87:a5:07:92:14.
Are you sure you want to continue connecting (yes/no)? 
如果不想不想这么麻烦,可以在配置文件里删除一下,StrictHostKeyChecking改为no即可:
[root@c01 .ssh]# cat /etc/ssh/ssh_config  |grep HostKey
StrictHostKeyChecking no

[root@c01 ~]# ssh-copy-id c02
Warning: Permanently added 'c02,10.10.1.174' (RSA) to the list of known hosts.
root@c02's password:
Now try logging into the machine, with "ssh 'c02'", and check in:

  .ssh/authorized_keys

to make sure we haven't added extra keys that you weren't expecting.

[root@c01 ~]# ssh-copy-id c03
Warning: Permanently added 'c03,10.10.1.175' (RSA) to the list of known hosts.
root@c03's password:
Now try logging into the machine, with "ssh 'c03'", and check in:

  .ssh/authorized_keys

to make sure we haven't added extra keys that you weren't expecting.

# 测试一下
[root@c01 ~]# for i in 1 2 3;do ssh c0$i date;done
Fri Sep 29 05:19:35 CST 2017
Fri Sep 29 05:19:36 CST 2017
Fri Sep 29 05:19:36 CST 2017

但是,这么弄得话,确实比较繁琐,特别是当节点更多的情况下: 比较好的解决办法是可以使用一个nfs,手动将authorized_keys文件做好后,复制到每个节点 的.ssh目录下面即可,这样不用每次都使用ssh-copy-id命令。可以参考SSH互信配置

2.测试

在一个节点中执行即可,这里是c01。

创建3节点GPFS集群

[root@c01 mmfs]# ./bin/mmcrcluster -N "c01:manager-quorum,c02:manager-quorum,c03:quorum" \
 -p c01 -r /usr/bin/ssh -R /usr/bin/scp -C 3NodeCluster
Fri Sep 29 05:42:52 CST 2017: mmcrcluster: Processing node c01
Fri Sep 29 05:42:52 CST 2017: mmcrcluster: Processing node c02
Fri Sep 29 05:42:55 CST 2017: mmcrcluster: Processing node c03
mmcrcluster: Command successfully completed
mmcrcluster: Warning: Not all nodes have proper GPFS license designations.
    Use the mmchlicense command to designate licenses as needed.
mmcrcluster: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.
[root@c01 mmfs]# ./bin/mmlscluster

===============================================================================
| Warning:                                                                    |
|   This cluster contains nodes that do not have a proper GPFS license        |
|   designation.  This violates the terms of the GPFS licensing agreement.    |
|   Use the mmchlicense command and assign the appropriate GPFS licenses      |
|   to each of the nodes in the cluster.  For more information about GPFS     |
|   license designation, see the Concepts, Planning, and Installation Guide.  |
===============================================================================


GPFS cluster information
========================
  GPFS cluster name:         3NodeCluster.c01
  GPFS cluster id:           5187696956017568988
  GPFS UID domain:           3NodeCluster.c01
  Remote shell command:      /usr/bin/ssh
  Remote file copy command:  /usr/bin/scp

GPFS cluster configuration servers:
-----------------------------------
  Primary server:    c01
  Secondary server:  (none)

 Node  Daemon node name  IP address   Admin node name  Designation
-------------------------------------------------------------------
   1   c01               10.10.1.173  c01              quorum-manager
   2   c02               10.10.1.174  c02              quorum-manager
   3   c03               10.10.1.175  c03              quorum

# 接受许可

[root@c01 mmfs]# ./bin/mmchlicense server --accept -N c01,c02,c03

The following nodes will be designated as possessing GPFS server licenses:
        c01
        c02
        c03
mmchlicense: Command successfully completed
mmchlicense: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.

[root@c01 mmfs]# ./bin/mmlslicense -L
 Node name                    Required license   Designated license
-------------------------------------------------------------------
c01                                server            server
c02                                server            server
c03                                server            server

 Summary information
---------------------
Number of nodes defined in the cluster:                          3
Number of nodes with server license designation:                 3
Number of nodes with client license designation:                 0
Number of nodes still requiring server license designation:      0
Number of nodes still requiring client license designation:      0


创建NSD(网络共享磁盘)

[root@c01 mmfs]# mkdir -p /etc/gpfs
[root@c01 mmfs]# echo "/dev/sdb:c01::dataAndMetadata:3001:c01nsd01" >>/etc/gpfs/3node_nsd_input
[root@c01 mmfs]# echo "/dev/sdb:c02::dataAndMetadata:3002:c02nsd01" >>/etc/gpfs/3node_nsd_input
[root@c01 mmfs]# echo "/dev/sdb:c03::dataAndMetadata:3003:c03nsd01" >>/etc/gpfs/3node_nsd_input
[root@c01 mmfs]# cat /etc/gpfs/3node_nsd_input
/dev/sdb:c01::dataAndMetadata:3001:c01nsd01
/dev/sdb:c02::dataAndMetadata:3002:c02nsd01
/dev/sdb:c03::dataAndMetadata:3003:c03nsd01
[root@c01 mmfs]# ./bin/mmcrnsd -F /etc/gpfs/3node_nsd_input -v yes
mmcrnsd: Processing disk sdb
mmcrnsd: Processing disk sdb
mmcrnsd: Processing disk sdb
mmcrnsd: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.
[root@c01 ~]# cd
[root@c01 ~]# cat /etc/gpfs/3node_nsd_input
# /dev/sdb:c01::dataAndMetadata:3001:c01nsd01
c01nsd01:::dataAndMetadata:3001::system
# /dev/sdb:c02::dataAndMetadata:3002:c02nsd01
c02nsd01:::dataAndMetadata:3002::system
# /dev/sdb:c03::dataAndMetadata:3003:c03nsd01
c03nsd01:::dataAndMetadata:3003::system

[root@c01 ~]# mmlsnsd

 File system   Disk name    NSD servers
---------------------------------------------------------------------------
 (free disk)   c01nsd01     c01
 (free disk)   c02nsd01     c02
 (free disk)   c03nsd01     c03


创建GPFS文件系统

[root@c01 ~]# mmcrfs /gpfs1 /dev/gpfs1 -F /etc/gpfs/3node_nsd_input  -B 256k \
 -n 80 -v no -R 2 -M 2 -r 2 -m 2 -A yes -Q no
mmcommon: tsctl command cannot be executed.  Either none of the
  nodes in the cluster are reachable, or GPFS is down on all of the nodes.
mmcrfs: Command failed.  Examine previous error messages to determine cause.

# 报错,cluster没启动

[root@c01 ~]# mmgetstate -aLs

 Node number  Node name       Quorum  Nodes up  Total nodes  GPFS state  Remarks   
------------------------------------------------------------------------------------
       1      c01                0        0          3       down        quorum node
       2      c02                0        0          3       unknown     quorum node
       3      c03                0        0          3       unknown     quorum node

 Summary information
---------------------
mmgetstate: Information cannot be displayed.  Either none of the
  nodes in the cluster are reachable, or GPFS is down on all of the nodes.

# 查看并关闭所有的节点的selinux和iptables
[root@c01 ~]# getenforce
Disabled
[root@c01 ~]# service iptables stop

# 重新启动集群
[root@c01 ~]# mmstartup -a
Fri Sep 29 06:11:32 CST 2017: mmstartup: Starting GPFS ...
[root@c01 ~]#
[root@c01 ~]# mmgetstate -aLs

# 重新创建文件系统
[root@c01 ~]# mmcrfs /gpfs1 /dev/gpfs1 -F /etc/gpfs/3node_nsd_input \
 -B 256k -n 80 -v no -R 2 -M 2 -r 2 -m 2 -A yes -Q no

The following disks of gpfs1 will be formatted on node c01:
    c01nsd01: size 20971520 KB
    c02nsd01: size 20971520 KB
    c03nsd01: size 20971520 KB
Formatting file system ...
Disks up to size 257 GB can be added to storage pool system.
Creating Inode File
Creating Allocation Maps
Creating Log Files
Clearing Inode Allocation Map
Clearing Block Allocation Map
Formatting Allocation Map for storage pool system
Completed creation of file system /dev/gpfs1.
mmcrfs: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.

# 挂载所有节点
[root@c01 ~]# mmmount gpfs1 -a
Fri Sep 29 06:15:33 CST 2017: mmmount: Mounting file systems ...
[root@c01 ~]# df -h
Filesystem                  Size  Used Avail Use% Mounted on
/dev/mapper/vg_c01-lv_root   18G  1.9G   15G  12% /
tmpfs                       491M     0  491M   0% /dev/shm
/dev/sda1                   477M   49M  404M  11% /boot
/dev/gpfs1                   60G  754M   60G   2% /gpfs1

测试

# 在c03上写文件
[root@c03 ~]# df -h
Filesystem                  Size  Used Avail Use% Mounted on
/dev/mapper/vg_c01-lv_root   18G  2.0G   15G  12% /
tmpfs                       491M     0  491M   0% /dev/shm
/dev/sda1                   477M   49M  404M  11% /boot
/dev/gpfs1                   60G   16G   45G  26% /gpfs1
[root@c03 ~]# cd /gpfs1/
[root@c03 gpfs1]# date >> test1.txt

# c01上查看
[root@c01 ~]# cat /gpfs1/test1.txt
Fri Sep 29 06:16:36 CST 2017

3.升级gpfs

本次升级测试为3.5.0.33.5.0.34。升级过程中应该停止gpfs的服务,升级完成后再全部启动,升级步骤如下:

# check gpfs version

[root@c01 ~]# mmlsconfig
Configuration data for cluster 3NodeCluster.c01:
------------------------------------------------
myNodeConfigNumber 1
clusterName 3NodeCluster.c01
clusterId 5187696956017568988
autoload no
dmapiFileHandleSize 32
minReleaseLevel 3.5.0.3
adminMode central

File systems in cluster 3NodeCluster.c01:
-----------------------------------------
/dev/gpfs1


# download update software from
# https://www.ibm.com/support/home/product/U962793A86377G52/General_Parallel_File_System
# required ibm id
# 上传并解压升级包

[root@c01 ~]# mkdir gpfs_update
[root@c01 ~]# tar -zxvf GPFS-3.5.0.34-x86_64-Linux.tar.gz -C gpfs_update
changelog
gpfs.base_3.5.0-34_amd64_update.deb
gpfs.base-3.5.0-34.x86_64.update.rpm
gpfs.docs_3.5.0-34_all.deb
gpfs.docs-3.5.0-34.noarch.rpm
gpfs.gpl_3.5.0-34_all.deb
gpfs.gpl-3.5.0-34.noarch.rpm
gpfs.msg.en-us_3.5.0-34_all.deb
gpfs.msg.en_US-3.5.0-34.noarch.rpm
README
[root@c01 ~]# cd gpfs_update/
[root@c01 gpfs_update]# ls -l
total 29424
-rw-r--r-- 1 root  root      283 Mar 23  2017 changelog
-rw-r--r-- 1 30007 bin  14003236 Mar 15  2017 gpfs.base_3.5.0-34_amd64_update.deb
-rw-r--r-- 1 30007 bin  14259235 Mar 15  2017 gpfs.base-3.5.0-34.x86_64.update.rpm
-rw-r--r-- 1 30007 bin    236234 Mar 15  2017 gpfs.docs_3.5.0-34_all.deb
-rw-r--r-- 1 30007 bin    254716 Mar 15  2017 gpfs.docs-3.5.0-34.noarch.rpm
-rw-r--r-- 1 30007 bin    555762 Mar 15  2017 gpfs.gpl_3.5.0-34_all.deb
-rw-r--r-- 1 30007 bin    579652 Mar 15  2017 gpfs.gpl-3.5.0-34.noarch.rpm
-rw-r--r-- 1 30007 bin    105508 Mar 15  2017 gpfs.msg.en-us_3.5.0-34_all.deb
-rw-r--r-- 1 30007 bin    107720 Mar 15  2017 gpfs.msg.en_US-3.5.0-34.noarch.rpm
-rw-r--r-- 1 root  root     5674 Feb  8  2017 README

# 查看已安装的gpfs包
[root@c01 gpfs_update]# rpm -qa |grep gpfs
gpfs.base-3.5.0-3.x86_64
gpfs.docs-3.5.0-3.noarch
gpfs.msg.en_US-3.5.0-3.noarch
gpfs.gpl-3.5.0-3.noarch

# 停止这个节点的gpfs,并进行安装
[root@c01 gpfs_update]# mmshutdown -N c01
Sun Oct  1 10:12:57 CST 2017: mmshutdown: Starting force unmount of GPFS file systems
Sun Oct  1 10:13:02 CST 2017: mmshutdown: Shutting down GPFS daemons
c01:  Shutting down!
c01:  Unloading modules from /lib/modules/2.6.32-696.10.2.el6.x86_64/extra
Sun Oct  1 10:13:10 CST 2017: mmshutdown: Finished
[root@c01 gpfs_update]# rpm -ivhU gpfs.base-3.5.0-34.x86_64.update.rpm
[root@c01 gpfs_update]# rpm -ivhU gpfs.docs-3.5.0-34.noarch.rpm
[root@c01 gpfs_update]# rpm -ivhU gpfs.gpl-3.5.0-34.noarch.rpm
[root@c01 gpfs_update]# rpm -ivhU gpfs.msg.en_US-3.5.0-34.noarch.rpm
[root@c01 gpfs_update]# rpm -qa | grep gpfs
gpfs.docs-3.5.0-34.noarch
gpfs.gpl-3.5.0-34.noarch
gpfs.base-3.5.0-34.x86_64
gpfs.msg.en_US-3.5.0-34.noarch

# 依次在其他节点上进行升级即可。
# 全部升级完成之后
# 需要重新编译gpl

[root@c01 src]# make LINUX_DISTRIBUTION=REDHAT_AS_LINUX Autoconfig World InstallImages \
2>/tmp/make_err 1>/tmp/make_out

# 启动gpfs
[root@c01 ~]# mmstartup -a
Sun Oct  1 10:31:45 CST 2017: mmstartup: Starting GPFS ...
[root@c01 ~]# mmgetstate -a

 Node number  Node name        GPFS state
------------------------------------------
       1      c01              active
       2      c02              active
       3      c03              active
[root@c01 ~]# df
Filesystem                 1K-blocks    Used Available Use% Mounted on
/dev/mapper/vg_c01-lv_root  18003272 2067416  15014668  13% /
tmpfs                         502056       0    502056   0% /dev/shm
/dev/sda1                     487652   49322    412730  11% /boot
/dev/gpfs1                  62914560  772096  62142464   2% /gpfs1

4.迁移磁盘

可以使用mmadddisk和mmdeldisk两个命令实现为gpfs集群更换新的nsd,在使用mmdeldisk进行删除nsd的时候 会自动进行数据的迁移,但是必须有足够的存储空间。主要步骤如下:

# 这里为c01增加一个新的磁盘
[root@c01 ~]# fdisk -l | grep /dev
Disk /dev/sda: 21.5 GB, 21474836480 bytes
/dev/sda1   *           1          64      512000   83  Linux
/dev/sda2              64        2611    20458496   8e  Linux LVM
Disk /dev/sdb: 21.5 GB, 21474836480 bytes
Disk /dev/sdc: 21.5 GB, 21474836480 bytes

# 创建新的nsd

[root@c01 ~]# echo "/dev/sdc:c01::dataAndMetadata:3001:c01nsd02" >/etc/gpfs/migrate_nsd_input
[root@c01 ~]# mmcrnsd -F /etc/gpfs/migrate_nsd_input
mmcrnsd: Processing disk sdc
mmcrnsd: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.
[root@c01 ~]# mmlsnsd

 File system   Disk name    NSD servers
---------------------------------------------------------------------------
 gpfs1         c01nsd01     c01
 gpfs1         c02nsd01     c02
 gpfs1         c03nsd01     c03
 (free disk)   c01nsd02     c01
 
# 将nsd增加到gpfs1中
[root@c01 ~]# mmadddisk gpfs1 -F /etc/gpfs/migrate_nsd_input
Unable to open disk 'c01nsd02' on node c02.
No such device
Error processing disks.
runRemoteCommand: c02: tsadddisk /dev/gpfs1 -F /var/mmfs/tmp/tsddFile.mmadddisk.7352  failed.
mmadddisk: tsadddisk failed.
Verifying file system configuration information ...
mmadddisk: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.
mmadddisk: Command failed.  Examine previous error messages to determine cause.

[root@c01 gpfs1]# mmadddisk gpfs1 c01nsd02

The following disks of gpfs1 will be formatted on node c01:
    c01nsd02: size 20971520 KB
Extending Allocation Map
Checking Allocation Map for storage pool system
Completed adding disks to file system gpfs1.
mmadddisk: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.
[root@c01 gpfs1]#
[root@c01 gpfs1]#
[root@c01 gpfs1]# mmlsnsd

 File system   Disk name    NSD servers
---------------------------------------------------------------------------
 gpfs1         c01nsd01     c01
 gpfs1         c02nsd01     c02
 gpfs1         c03nsd01     c03
 gpfs1         c01nsd02     c01

[root@c01 gpfs1]# mmdf gpfs1
disk                disk size  failure holds    holds              free KB             free KB
name                    in KB    group metadata data        in full blocks        in fragments
--------------- ------------- -------- -------- ----- -------------------- -------------------
Disks in storage pool: system (Maximum disk size allowed is 239 GB)
c01nsd02             20971520       -1 Yes      Yes        20969216 (100%)           248 ( 0%)
c01nsd01             20971520     3001 Yes      Yes        20713472 ( 99%)           488 ( 0%)
c02nsd01             20971520     3002 Yes      Yes        20714496 ( 99%)           440 ( 0%)
c03nsd01             20971520     3003 Yes      Yes        20714496 ( 99%)           456 ( 0%)
                -------------                         -------------------- -------------------
(pool total)         83886080                              83111680 ( 99%)          1632 ( 0%)

                =============                         ==================== ===================
(total)              83886080                              83111680 ( 99%)          1632 ( 0%)

Inode Information
-----------------
Number of used inodes:            4087
Number of free inodes:           81929
Number of allocated inodes:      86016
Maximum number of inodes:        86016

# 从gpfs1中删除其中一个nsd
[root@c01 gpfs1]# mmdeldisk
mmdeldisk: Missing arguments.
Usage:
  mmdeldisk Device {"DiskName[;DiskName...]" | -F DiskFile} [-a] [-c]
            [-m | -r | -b] [-N {Node[,Node...] | NodeFile | NodeClass}]
[root@c01 gpfs1]# mmdeldisk gpfs1 c01nsd01
Deleting disks ...
Scanning file system metadata, phase 1 ...
Scan completed successfully.
Scanning file system metadata, phase 2 ...
Scan completed successfully.
Scanning file system metadata, phase 3 ...
Scan completed successfully.
Scanning file system metadata, phase 4 ...
Scan completed successfully.
Scanning user file metadata ...
 100.00 % complete on Tue Oct  3 14:42:18 2017
Scan completed successfully.
Could not invalidate disk(s).
Checking Allocation Map for storage pool system
tsdeldisk completed.
mmdeldisk: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.
[root@c01 gpfs1]# mmlsnsd

 File system   Disk name    NSD servers
---------------------------------------------------------------------------
 gpfs1         c02nsd01     c02
 gpfs1         c03nsd01     c03
 gpfs1         c01nsd02     c01
 (free disk)   c01nsd01     c01

5.增加一个节点

为gpfs1集群增加一个客户端节点,主要步骤如下:

# 安装和其他节点相同版本的linux操作系统

# root用户远程ssh
[root@c04 ~]# cat /etc/ssh/sshd_config |grep Root
PermitRootLogin yes

[root@c04 ~]# service sshd restart
Stopping sshd:                                             [  OK  ]
Starting sshd:                                             [  OK  ]

# 安装一些准备的包
[root@c04 ~]# yum -y install ksh perl

[root@c04 ~]# yum -y install kernel kernel-headers kernel-devel

[root@c04 ~]# yum -y install cpp gcc gcc-c++

# 关闭selinux,完事重启
[root@c04 ~]# sed -i "s#SELINUX=enforcing#SELINUX=disabled#g" /etc/selinux/config

# 准备安装介质并安装
[root@c04 gpfs]# rpm -ivh *.rpm
Preparing...                ########################################### [100%]
   1:gpfs.base              ########################################### [ 25%]
   2:gpfs.gpl               ########################################### [ 50%]
   3:gpfs.msg.en_US         ########################################### [ 75%]
   4:gpfs.docs              ########################################### [100%]

# 升级
[root@c04 gpfs]# cd ../gpfs_update/
[root@c04 gpfs_update]# rpm -ivhU *.rpm
Preparing...                ########################################### [100%]
   1:gpfs.base              ########################################### [ 25%]
   2:gpfs.gpl               ########################################### [ 50%]
   3:gpfs.msg.en_US         ########################################### [ 75%]
   4:gpfs.docs              ########################################### [100%]

[root@c04 gpfs_update]# rpm -qa |grep gpfs
gpfs.base-3.5.0-34.x86_64
gpfs.msg.en_US-3.5.0-34.noarch
gpfs.gpl-3.5.0-34.noarch
gpfs.docs-3.5.0-34.noarch


# 编译&验证
[root@c04 src]# make LINUX_DISTRIBUTION=REDHAT_AS_LINUX Autoconfig World InstallImages
## 需要使用 kernel kernel-headers kernel-devel cpp gcc gcc-c++ 等安装包

[root@c04 src]# ls -l /lib/modules/2.6.32-696.10.3.el6.x86_64/extra/
total 6332
-rw-r--r-- 1 root root 2760629 Oct  1 23:09 mmfs26.ko
-rw-r--r-- 1 root root 2798374 Oct  1 23:09 mmfslinux.ko
-rw-r--r-- 1 root root  918694 Oct  1 23:09 tracedev.ko

# 修改host表,每个节点上均如下所示
[root@c04 src]# cat /etc/hosts |grep c0
10.10.1.173 c01
10.10.1.174 c02
10.10.1.175 c03
10.10.1.176 c04

# ssh互信
[root@c04 src]# ssh-keygen
[root@c04 src]# ssh-copy-id c01
[root@c04 src]# ssh-copy-id c02
[root@c04 src]# ssh-copy-id c03
[root@c04 src]# ssh-copy-id c04

[root@c01 src]# ssh-copy-id c04
[root@c02 src]# ssh-copy-id c04
[root@c03 src]# ssh-copy-id c04


# 将c04加入到cluster中,注意如果该节点是仲裁节点需要将整个集群关闭;
[root@c01 ~]# mmshutdown -a
...
# mmaddnode -N {NodeDesc[,NodeDesc...] | NodeFile}
[root@c01 ~]# mmaddnode -N c04:quorum
Sun Oct  1 12:35:00 CST 2017: mmaddnode: Processing node c04
Verifying GPFS is stopped on all nodes ...
mmaddnode: Command successfully completed
mmaddnode: Warning: Not all nodes have proper GPFS license designations.
    Use the mmchlicense command to designate licenses as needed.
mmaddnode: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.

[root@c01 ~]# mmchlicense server  --accept -N c04

The following nodes will be designated as possessing GPFS server licenses:
        c04
mmchlicense: Command successfully completed
mmchlicense: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.
  

[root@c01 ~]# mmstartup -a
Sun Oct  1 12:37:21 CST 2017: mmstartup: Starting GPFS ...

等20s后:
[root@c01 ~]# mmgetstate -a

 Node number  Node name        GPFS state
------------------------------------------
       1      c01              active
       2      c02              active
       3      c03              active
       4      c04              active
       
c04上可以看到文件系统
[root@c04 src]# df
Filesystem                 1K-blocks    Used Available Use% Mounted on
/dev/mapper/vg_c04-lv_root  18003272 1241608  15840476   8% /
tmpfs                         502056       0    502056   0% /dev/shm
/dev/sda1                     487652   51792    410260  12% /boot
/dev/gpfs1                  62914560  772096  62142464   2% /gpfs1
[root@c04 src]# cd /gpfs1/
[root@c04 gpfs1]# ls -l
total 0
-rw-r--r-- 1 root root 111 Oct  1 05:44 test1.txt

[root@c04 ~]# cd /usr/lpp/mmfs/bin/
[root@c04 bin]# ./mmgetstate -aL

 Node number  Node name       Quorum  Nodes up  Total nodes  GPFS state  Remarks
------------------------------------------------------------------------------------
       1      c01                3        4          4       active      quorum node
       2      c02                3        4          4       active      quorum node
       3      c03                3        4          4       active      quorum node
       4      c04                3        4          4       active      quorum node


# 修改c04的属性,改为普通节点

[root@c01 gpfs1]# mmchnode
mmchnode: Missing arguments.
Usage:
   mmchnode change-options -N {Node[,Node...] | NodeFile | NodeClass}
      or
   mmchnode {-S Filename | --spec-file=Filename}

[root@c01 gpfs1]# mmchnode -N c04 --nonquorum
Tue Oct  3 14:55:49 CST 2017: mmchnode: Processing node c04
mmchnode: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.

[root@c01 gpfs1]# mmgetstate -aL

 Node number  Node name       Quorum  Nodes up  Total nodes  GPFS state  Remarks
------------------------------------------------------------------------------------
       1      c01                2        3          4       active      quorum node
       2      c02                2        3          4       active      quorum node
       3      c03                2        3          4       active      quorum node
       4      c04                2        3          4       active

6.删除文件系统

# 在所有节点上umount gpfs文件系统
[root@c01 ~]# mmumount /dev/gpfs1
Tue Oct  3 16:04:51 CST 2017: mmumount: Unmounting file systems ...
# 删除文件系统,将导致nsd上的数据清除
[root@c01 ~]# mmdelfs /dev/gpfs1
All data on the following disks of gpfs1 will be destroyed:
    c02nsd01
    c03nsd01
    c01nsd02
Completed deletion of file system /dev/gpfs1.
mmdelfs: Propagating the cluster configuration data to all
  affected nodes.  This is an asynchronous process.

[root@c01 ~]# mmgetstate -aL

 Node number  Node name       Quorum  Nodes up  Total nodes  GPFS state  Remarks
------------------------------------------------------------------------------------
       1      c01                2        3          3       active      quorum node
       2      c02                2        3          3       active      quorum node
       3      c03                2        3          3       active      quorum node
[root@c01 ~]# mmlsnsd

 File system   Disk name    NSD servers
---------------------------------------------------------------------------
 (free disk)   c01nsd01     c01
 (free disk)   c01nsd02     c01
 (free disk)   c02nsd01     c02
 (free disk)   c03nsd01     c03

# 所有nsd都处于free disk的状态;