Skip to the content.

部署环境

安装软件包

yum -y install ceph-mon ceph-mgr ceph-osd

配置主机名

hostnamectl set-hostname host173
echo 10.128.11.173 host173 >> /etc/hosts

配置ceph.conf

mkdir -p /etc/ceph/
echo "[global]" > /etc/ceph/ceph.conf
echo "fsid = $(uuidgen)" >> /etc/ceph/ceph.conf
echo "mon initial members = $(hostname)" >> /etc/ceph/ceph.conf
echo "mon host = 10.128.11.173" >> /etc/ceph/ceph.conf

完成后ceph.conf配置如下:

# cat /etc/ceph/ceph.conf
fsid = f43b8ed6-26b4-4fc2-b95e-f2d4c8381fa0
mon initial members = host173
mon host = 10.128.11.173

配置ceph-mon

# 设置密钥环,使用ceph-authtool工具,默认集群名是ceph
ceph-authtool -C /etc/ceph/ceph.mon.keyring -g -n mon. --cap mon "allow *"
ceph-authtool -C /etc/ceph/ceph.client.admin.keyring -g -n client.admin --cap mon "allow *" --cap osd "allow *"
ceph-authtool /etc/ceph/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring
# 创建monmap
monmaptool --create --add host173 10.128.11.173 --fsid=f43b8ed6-26b4-4fc2-b95e-f2d4c8381fa0 /etc/ceph/monmap
monmaptool --print  /etc/ceph/monmap
# 初始化mon
mkdir -p /var/lib/ceph/mon/ceph-host173
chown -R ceph:ceph /var/lib/ceph /etc/ceph/ /var/log/ceph/
sudo -u ceph ceph-mon --mkfs -i host173 --monmap /etc/ceph/monmap --keyring /etc/ceph/ceph.mon.keyring
# 使用系统默认的systemctl配置文件,启动mon服务
systemctl start ceph-mon@host173

配置ceph-osd

for i in 0 1 2;
do
ceph osd create
ceph-authtool -C /etc/ceph/ceph.osd.$i.keyring -g -n osd.$i --cap mon "allow profile osd" --cap osd "allow *"
ceph auth import -i /etc/ceph/ceph.osd.$i.keyring
mkdir -p /var/lib/ceph/osd/ceph-$i
ceph auth get-or-create osd.$i -o /var/lib/ceph/osd/ceph-$i/keyring
chown -R ceph:ceph /var/lib/ceph /etc/ceph/ /var/log/ceph/
sleep 1;sudo -u ceph ceph-osd -i $i --mkfs
ceph osd crush add osd.$i 0.1 root=default host=host173
done

systemctl start ceph-osd@0
systemctl start ceph-osd@1
systemctl start ceph-osd@2

检查osd启动状态,已启动并生效3个osd

[root@localhost ~]# ceph -s |grep osd
    osd: 3 osds: 3 up (since 8s), 3 in (since 8s)

配置ceph-mgr

ceph-mgr需要下面两个python库,有警告

pip3  install werkzeug pecan

配置ceph-mgr

mkdir -p /var/lib/ceph/mgr/ceph-host173
chown -R ceph:ceph /var/lib/ceph/mgr/
ceph-authtool -C /etc/ceph/ceph.mgr.host173.keyring -g -n mgr.host173 --cap mon "allow profile mgr" --cap osd "allow *"
ceph auth import -i /etc/ceph/ceph.mgr.host173.keyring
ceph auth get-or-create mgr.host173 -o /var/lib/ceph/mgr/ceph-host173/keyring
systemctl start ceph-mgr@host173

创建pool和rbd

ceph osd pool create rbd 128 128
ceph osd pool application enable rbd rbd
rbd create -s 10G test

本机映射rbd,创建文件系统并使用

[root@host173 ceph]# rbd map test
/dev/rbd0
[root@host173 ceph]# ceph -s
  cluster:
    id:     f43b8ed6-26b4-4fc2-b95e-f2d4c8381fa0
    health: HEALTH_OK

  services:
    mon: 1 daemons, quorum host173 (age 13m)
    mgr: host173(active, since 12m)
    osd: 3 osds: 3 up (since 21m), 3 in (since 78m)

  data:
    pools:   2 pools, 89 pgs
    objects: 4 objects, 36 B
    usage:   3.1 GiB used, 297 GiB / 300 GiB avail
    pgs:     1.124% pgs not active
             88 active+clean
             1  peering

  progress:
    PG autoscaler decreasing pool 5 PGs from 128 to 32 (3m)
      [=========...................] (remaining: 5m)

[root@host173 ceph]# ls /dev/rbd0
/dev/rbd0
[root@host173 ceph]# mkfs.xfs /dev/rbd0
[root@host173 ceph]# mount /dev/rbd0 /mnt/
[root@host173 ceph]# ls /mnt/
[root@host173 ceph]# echo aaaa > /mnt/1.txt
[root@host173 ceph]# cat /mnt/1.txt
aaaa
[root@host173 ceph]# umount /mnt

错误处理

1. error reading config file

# 现象:
[root@localhost ~]# sudo -u ceph ceph-mon --mkfs -i host173 --monmap /etc/ceph/monmap --keyring /etc/ceph/ceph.mon.keyring
global_init: error reading config file.
# 处理方法:
I think it is trying to say that it can't find the [global] section in /etc/ceph/ceph.conf

2. 1 monitors have not enabled msgr2

现象:
[root@localhost ~]# ceph health
HEALTH_WARN mon is allowing insecure global_id reclaim; 1 monitors have not enabled msgr2
处理方法:
[root@localhost ~]# ceph mon enable-msgr2
[root@localhost ~]# ceph config set mon auth_allow_insecure_global_id_reclaim false
[root@localhost ~]# systemctl restart ceph-mon.target
[root@localhost ~]# ceph health
HEALTH_OK

3. failed to open block

现象:
2024-10-11T09:46:34.236+0800 7fff811a0000 -1 bluestore(/var/lib/ceph/osd/ceph-2/block) _read_bdev_label failed to open /var/lib/ceph/osd/ceph-2/block: (2) No such file or directory
2024-10-11T09:46:34.236+0800 7fff811a0000 -1 bluestore(/var/lib/ceph/osd/ceph-2/block) _read_bdev_label failed to open /var/lib/ceph/osd/ceph-2/block: (2) No such file or directory
2024-10-11T09:46:34.236+0800 7fff811a0000 -1 bluestore(/var/lib/ceph/osd/ceph-2/block) _read_bdev_label failed to open /var/lib/ceph/osd/ceph-2/block: (2) No such file or directory
2024-10-11T09:46:34.236+0800 7fff811a0000 -1 bluestore(/var/lib/ceph/osd/ceph-2) _read_fsid unparsable uuid
2024-10-11T09:46:34.246+0800 7fff811a0000 -1 freelist read_size_meta_from_db missing size meta in DB
方法:
重新执行 sudo -u ceph ceph-osd -i 2 --mkfs

4. 100.000% pgs not active

现象: 创建完成osd和mgr后,pgs未active
[root@localhost ~]# ceph -s

  cluster:
    id:     f43b8ed6-26b4-4fc2-b95e-f2d4c8381fa0
    health: HEALTH_WARN
            Module 'restful' has failed dependency: No module named 'pecan'

  services:
    mon: 1 daemons, quorum host173 (age 4m)
    mgr: host173(active, since 11s)
    osd: 3 osds: 3 up (since 7m), 3 in (since 14m)

  data:
    pools:   1 pools, 1 pgs
    objects: 0 objects, 0 B
    usage:   3.0 GiB used, 297 GiB / 300 GiB avail
    pgs:     100.000% pgs not active
             1 undersized+peered
处理方法:
[root@host173 ~]# ceph osd  getcrushmap -o /etc/ceph/crushmap
12
[root@host173 ~]# crushtool -d /etc/ceph/crushmap -o /etc/ceph/crushmap.txt
[root@host173 ~]# vim /etc/ceph/crushmap.txt
[root@host173 ~]# grep "step choose" /etc/ceph/crushmap.txt
        step chooseleaf firstn 0 type osd   
#由于ceph默认crushmap策略是基于host的故障域,在三副本的情况下,由于只有一个host,这时创建存储池后,ceph状态会变为HEALTH_WARN,而且一直无法重平衡PG
[root@host173 ~]# crushtool -c /etc/ceph/crushmap.txt -o /etc/ceph/crushmap.new
[root@host173 ~]# ceph osd setcrushmap -i /etc/ceph/crushmap.new
13

5. rbd: sysfs write failed

现象:
[root@host173 ceph]# rbd map test
rbd: sysfs write failed
RBD image feature set mismatch. You can disable features unsupported by the kernel with "rbd feature disable test object-map fast-diff deep-flatten".
In some cases useful info is found in syslog - try "dmesg | tail".
rbd: map failed: (6) No such device or address
处理方法:
[root@host173 ceph]# rbd feature disable test object-map fast-diff deep-flatten