CEPH - OSD 추가및 제거 + CRUSHMAP 시나리오 (Ver. 10.2.11_jewel, OS. Centos7) )
* [ 시나리오 ]
I. 현재 SATA 스토리지 만 제공하라는 방침으로 인해 적용했던 SSD 스토리지는 제거.
II. 언제 SSD 스토리지 를 제공하라고 할지 모르기 때문에... SSD rule 설정은 추가...;;
III. SSD 또는 SATA 스토리지를 증설할 수 있기 때문에 깔끔하게 OSD 추가진행.
[ 현재 상태 ] [root@MGMT03:20:16:~]# ceph -s cluster 427f2e6a-5722-4365-a475-8fcdc218a418 health HEALTH_OK monmap e2: 4 mons at {MON-0=192.168.1.13:6789/0,MON-1=192.168.1.14:6789/0,MON-2=192.168.1.15:6789/0,MON-3=192.168.1.16:6789/0} election epoch 6, quorum 0,1,2,3 MON-0,MON-1,MON-2,MON-3 osdmap e195: 6 osds: 6 up, 6 in flags sortbitwise,require_jewel_osds pgmap v1181: 256 pgs, 2 pools, 32 bytes data, 5 objects 674 MB used, 60483 GB / 60484 GB avail 256 active+clean [root@MGMT03:20:16:~]# ceph osd dump epoch 196 fsid 427f2e6a-5722-4365-a475-8fcdc218a418 created 2018-11-02 17:51:47.398137 modified 2018-11-05 15:20:17.528584 flags sortbitwise,require_jewel_osds pool 0 'hdd_pool' replicated size 2 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 128 pgp_num 128 last_change 127 flags hashpspool stripe_width 0 removed_snaps [1~3] pool 2 'ssd_pool' replicated size 2 min_size 1 crush_ruleset 1 object_hash rjenkins pg_num 128 pgp_num 128 last_change 195 flags hashpspool stripe_width 0 max_osd 6 osd.0 up in weight 1 up_from 116 up_thru 195 down_at 113 last_clean_interval [4,112) 192.168.1.17:6800/78512 192.168.1.17:6802/78512 192.168.1.17:6803/78512 192.168.1.17:6804/78512 exists,up 68757359-7180-4092-9687-4b5787cb89cd osd.1 up in weight 1 up_from 110 up_thru 195 down_at 107 last_clean_interval [8,106) 192.168.1.18:6800/78034 192.168.1.18:6802/78034 192.168.1.18:6803/78034 192.168.1.18:6804/78034 exists,up c1587381-1a3b-4271-846c-c572d4556fe8 osd.2 up in weight 1 up_from 106 up_thru 195 down_at 101 last_clean_interval [13,100) 192.168.1.19:6800/92762 192.168.1.19:6801/92762 192.168.1.19:6802/92762 192.168.1.19:6803/92762 exists,up 82df91cc-cd25-4ff9-8c6a-53e30df4c499 osd.3 up in weight 1 up_from 98 up_thru 195 down_at 95 last_clean_interval [18,94) 192.168.1.20:6800/97427 192.168.1.20:6801/97427 192.168.1.20:6802/97427 192.168.1.20:6803/97427 exists,up 57e998fb-77dd-48a5-8d17-5e6aa5509574 osd.4 up in weight 1 up_from 187 up_thru 193 down_at 0 last_clean_interval [0,0) 192.168.1.37:6800/50611 192.168.1.37:6801/50611 192.168.1.37:6802/50611 192.168.1.37:6803/50611 exists,up 5ced4724-ebcf-430c-be9a-332bf4bdbe17 osd.5 up in weight 1 up_from 189 up_thru 193 down_at 0 last_clean_interval [0,0) 192.168.1.38:6800/94910 192.168.1.38:6801/94910 192.168.1.38:6802/94910 192.168.1.38:6803/94910 exists,up 04c4dfe8-b9a8-4f45-b030-225ce078b5e0 [root@MGMT03:20:19:~]# ceph osd tree ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY -2 0.90799 root ssd -20 0.45399 ssd_osd OSD-20 4 0.45399 osd.4 up 1.00000 1.00000 -21 0.45399 ssd_osd OSD-21 5 0.45399 osd.5 up 1.00000 1.00000 -1 58.15997 root hdd -10 14.53999 hdd_osd OSD-0 0 14.53999 osd.0 up 1.00000 1.00000 -11 14.53999 hdd_osd OSD-1 1 14.53999 osd.1 up 1.00000 1.00000 -12 14.53999 hdd_osd OSD-2 2 14.53999 osd.2 up 1.00000 1.00000 -13 14.53999 hdd_osd OSD-3 3 14.53999 osd.3 up 1.00000 1.00000
[ SSD_OSD 제거 ]
* 반드시 선행 작업으로 SSD용 pool(ssd_pool)을 hdd 룰에 이동시킨 후 recovery가 끝나면 작업 시작.
해당 선행 작업은 굉장히 중요한 작업이기 때문에 무조건 해야함.!!!
그냥 osd 제거시 pool을 사용중이 었다면, 되돌릴 수 없는 강을 건널 수 있기 때문에 ssd_pool 역시 정상적으로 작동하게끔 만든 후 해당 작업을 진행하길 바람.
i. ssd_pool 을 hdd 룰셋인 0번으로 편입
[root@MGMT03:20:19:~]# ceph osd pool set ssd_pool crush_ruleset 0
ii. osd 제거
[root@MGMT03:20:19:~]# ceph osd out osd.4
[root@MGMT03:20:19:~]# ceph osd out osd.5
[root@MGMT03:21:29:~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-2 0.90799 root ssd
-20 0.45399 ssd_osd OSD-20
4 0.45399 osd.4 up 0 1.00000
-21 0.45399 ssd_osd OSD-21
5 0.45399 osd.5 up 0 1.00000
-1 58.15997 root hdd
-10 14.53999 hdd_osd OSD-0
0 14.53999 osd.0 up 1.00000 1.00000
-11 14.53999 hdd_osd OSD-1
1 14.53999 osd.1 up 1.00000 1.00000
-12 14.53999 hdd_osd OSD-2
2 14.53999 osd.2 up 1.00000 1.00000
-13 14.53999 hdd_osd OSD-3
3 14.53999 osd.3 up 1.00000 1.00000
[root@MGMT03:26:38:~]# ceph osd down osd.4
[root@MGMT03:26:38:~]# ceph osd down osd.5
* mgmt 서버에서 osd down 이 먹히지 않으면 각 osd에서 service ceph-osd@4 stop 하면됨
[root@MGMT03:26:38:~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-2 0.90799 root ssd
-20 0.45399 ssd_osd OSD-20
4 0.45399 osd.4 down 0 1.00000
-21 0.45399 ssd_osd OSD-21
5 0.45399 osd.5 down 0 1.00000
-1 58.15997 root hdd
-10 14.53999 hdd_osd OSD-0
0 14.53999 osd.0 up 1.00000 1.00000
-11 14.53999 hdd_osd OSD-1
1 14.53999 osd.1 up 1.00000 1.00000
-12 14.53999 hdd_osd OSD-2
2 14.53999 osd.2 up 1.00000 1.00000
-13 14.53999 hdd_osd OSD-3
3 14.53999 osd.3 up 1.00000 1.00000
[root@MGMT03:28:46:~]# ceph osd rm osd.4
removed osd.4
[root@MGMT03:29:59:~]# ceph osd rm osd.5
removed osd.5
[root@MGMT03:30:01:~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-2 0.90799 root ssd
-20 0.45399 ssd_osd OSD-20
4 0.45399 osd.4 DNE 0
-21 0.45399 ssd_osd OSD-21
5 0.45399 osd.5 DNE 0
-1 58.15997 root hdd
-10 14.53999 hdd_osd OSD-0
0 14.53999 osd.0 up 1.00000 1.00000
-11 14.53999 hdd_osd OSD-1
1 14.53999 osd.1 up 1.00000 1.00000
-12 14.53999 hdd_osd OSD-2
2 14.53999 osd.2 up 1.00000 1.00000
-13 14.53999 hdd_osd OSD-3
3 14.53999 osd.3 up 1.00000 1.00000
[root@MGMT03:31:45:~]# ceph osd crush remove osd.4
removed item id 4 name 'osd.4' from crush map
[root@MGMT03:31:45:~]# ceph osd crush remove osd.5
removed item id 5 name 'osd.5' from crush map
[root@MGMT03:31:57:~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-2 0 root ssd
-20 0 ssd_osd OSD-20
-21 0 ssd_osd OSD-21
-1 58.15997 root hdd
-10 14.53999 hdd_osd OSD-0
0 14.53999 osd.0 up 1.00000 1.00000
-11 14.53999 hdd_osd OSD-1
1 14.53999 osd.1 up 1.00000 1.00000
-12 14.53999 hdd_osd OSD-2
2 14.53999 osd.2 up 1.00000 1.00000
-13 14.53999 hdd_osd OSD-3
3 14.53999 osd.3 up 1.00000 1.00000
[root@MGMT03:32:00:~]# ceph osd crush remove OSD-20
removed item id -20 name 'OSD-20' from crush map
[root@MGMT03:32:35:~]# ceph osd crush remove OSD-21
removed item id -21 name 'OSD-21' from crush map
[root@MGMT03:32:37:~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-2 0 root ssd
-1 58.15997 root hdd
-10 14.53999 hdd_osd OSD-0
0 14.53999 osd.0 up 1.00000 1.00000
-11 14.53999 hdd_osd OSD-1
1 14.53999 osd.1 up 1.00000 1.00000
-12 14.53999 hdd_osd OSD-2
2 14.53999 osd.2 up 1.00000 1.00000
-13 14.53999 hdd_osd OSD-3
3 14.53999 osd.3 up 1.00000 1.00000
[root@MGMT03:32:39:~]# ceph -s
cluster 427f2e6a-5722-4365-a475-8fcdc218a418
health HEALTH_OK
monmap e2: 4 mons at {MON-0=192.168.1.13:6789/0,MON-1=192.168.1.14:6789/0,MON-2=192.168.1.15:6789/0,MON-3=192.168.1.16:6789/0}
election epoch 6, quorum 0,1,2,3 MON-0,MON-1,MON-2,MON-3
osdmap e217: 4 osds: 4 up, 4 in
flags sortbitwise,require_jewel_osds
pgmap v1487: 256 pgs, 2 pools, 32 bytes data, 5 objects
452 MB used, 59556 GB / 59556 GB avail
256 active+clean
* osd는 깔끔하게 제거됨. hdd 룰셋으로 편입된 ssd_pool은 hdd_osd에 데이터들이 배열되며, 추후에 ssd_osd 를 추가 시키고 해당 ssd 룰셋으로 변경 하게 되면 ssd_osd에 데이터들이 재배열됨.
* 사전 룰셋적용은 URL=ceph_rule 링크 에서 확인
iii. ssd_osd keyring 제거
* ceph auth list 에 등록 된 키값과 권한제거. 제거하지 않고 추후에 osd를 추가 하게 되면 osd.4, osd.5 를 건너뛰고 osd.6 부터 생성되기 때문에 관리가 지저분해짐.
[root@MGMT03:33:15:~]# ceph auth del osd.4
updated
[root@MGMT03:33:22:~]# ceph auth del osd.5
updated
iv. crushmap으로 현재값 확인
[root@MGMT03:52:56:~]# ceph osd getcrushmap -o /tmp/crushmap
got crush map from osdmap epoch 240
[root@MGMT03:53:12:~]# crushtool -d /tmp/crushmap -o /tmp/crushmap.txt
[root@MGMT03:53:16:~]# cat /tmp/crushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
# types
type 0 osd
type 1 ssd_osd
type 2 hdd_osd
type 3 root
# buckets
hdd_osd OSD-0 {
id -10 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.0 weight 14.540
}
hdd_osd OSD-1 {
id -11 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.1 weight 14.540
}
hdd_osd OSD-2 {
id -12 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.2 weight 14.540
}
hdd_osd OSD-3 {
id -13 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.3 weight 14.540
}
root hdd {
id -1 # do not change unnecessarily
# weight 58.160
alg straw
hash 0 # rjenkins1
item OSD-0 weight 14.540
item OSD-1 weight 14.540
item OSD-2 weight 14.540
item OSD-3 weight 14.540
}
root ssd {
id -2 # do not change unnecessarily
# weight 0.000
alg straw
hash 0 # rjenkins1
}
# rules
rule hdd {
ruleset 0
type replicated
min_size 1
max_size 10
step take hdd
step chooseleaf firstn 0 type hdd_osd
step emit
}
rule ssd {
ruleset 1
type replicated
min_size 1
max_size 10
step take ssd
step chooseleaf firstn 0 type ssd_osd
step emit
}
# end crush map
* crushmap 을 통해 확인결과 룰과 버킷은 남아 있고, 클라이언트에서 명령어를 통해 체크 해보면 정상 적으로 체크가됨. 풀을 못찾고 딜레이 되는 현상이 발생 안함.
[ OSD 추가 (SATA,SSD) ]
* 자세한 osd설치 내용 URL=OSD_cepe설치 에서 확인
* 아래 install 명령어 옵션중 " --repo-url 'https://download.ceph.com/rpm-jewel/el7' --gpg-url 'https://download.ceph.com/keys/release.asc' " 은 가끔 repo에서 최신 버젼을 설치 해버림;;; 그래서 강제적으로 10버전을 지정하고 진행.
i. OSD ceph 설치 및 구동
[root@MGMT03:41:43:~]# ceph-deploy install --repo-url 'https://download.ceph.com/rpm-jewel/el7' --gpg-url 'https://download.ceph.com/keys/release.asc' OSD-20 OSD-21
[root@MGMT03:41:43:~]# ceph-deploy disk zap OSD-20:sda OSD-21:sda
[root@MGMT03:41:43:~]# ceph-deploy osd prepare OSD-20:sda OSD-21:sda
[root@MGMT03:41:43:~]# ceph-deploy osd activate OSD-20:sda1 OSD-21:sda1
[root@MGMT03:42:52:~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-3 0.90579 root default
4 0.45380 osd.4 up 1.00000 1.00000
5 0.45200 osd.5 up 1.00000 1.00000
-2 0 root ssd
-1 58.15997 root hdd
-10 14.53999 hdd_osd OSD-0
0 14.53999 osd.0 up 1.00000 1.00000
-11 14.53999 hdd_osd OSD-1
1 14.53999 osd.1 up 1.00000 1.00000
-12 14.53999 hdd_osd OSD-2
2 14.53999 osd.2 up 1.00000 1.00000
-13 14.53999 hdd_osd OSD-3
3 14.53999 osd.3 up 1.00000 1.00000
ii. crushtool 을 이용하여 crushmap 적용
* 신규 osd 추가시 default 버킷이 같이 생겨버림. 명령어를 통해서 신규 osd를 ssd,hdd 버킷으로 나누어도 되지만 깔끔하게 crushtool 을 이용하여 진행.
* 수량, 타입에 따라 다르니 해당 내용은 가변적으로 적용하시길...
[root@MGMT03:42:58:~]# vi /tmp/crushmap.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
# types
type 0 osd
type 1 ssd_osd
type 2 hdd_osd
type 3 root
# buckets
hdd_osd OSD-0 {
id -10 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.0 weight 14.540
}
hdd_osd OSD-1 {
id -11 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.1 weight 14.540
}
hdd_osd OSD-2 {
id -12 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.2 weight 14.540
}
hdd_osd OSD-3 {
id -13 # do not change unnecessarily
# weight 14.540
alg straw
hash 0 # rjenkins1
item osd.3 weight 14.540
}
root hdd {
id -1 # do not change unnecessarily
# weight 58.160
alg straw
hash 0 # rjenkins1
item OSD-0 weight 14.540
item OSD-1 weight 14.540
item OSD-2 weight 14.540
item OSD-3 weight 14.540
}
ssd_osd OSD-20 {
id -20 # do not change unnecessarily
# weight 0.454
alg straw
hash 0 # rjenkins1
item osd.4 weight 0.454
}
ssd_osd OSD-21 {
id -21 # do not change unnecessarily
# weight 0.454
alg straw
hash 0 # rjenkins1
item osd.5 weight 0.454
}
root ssd {
id -2 # do not change unnecessarily
# weight 0.908
alg straw
hash 0 # rjenkins1
item OSD-20 weight 0.454
item OSD-21 weight 0.454
}
# rules
rule hdd {
ruleset 0
type replicated
min_size 1
max_size 10
step take hdd
step chooseleaf firstn 0 type hdd_osd
step emit
}
rule ssd {
ruleset 1
type replicated
min_size 1
max_size 10
step take ssd
step chooseleaf firstn 0 type ssd_osd
step emit
}
# end crush map
[root@MGMT03:44:39:~]# crushtool -c /tmp/crushmap.txt -o /tmp/crushmap-new.bin
[root@MGMT03:45:35:~]# crushtool -c /tmp/crushmap.txt -o /tmp/crushmap.coloc
[root@MGMT03:45:39:~]# ceph osd setcrushmap -i /tmp/crushmap.coloc
set crush map
[root@MGMT03:45:45:~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-2 0.90799 root ssd
-20 0.45399 ssd_osd OSD-20
4 0.45399 osd.4 up 1.00000 1.00000
-21 0.45399 ssd_osd OSD-21
5 0.45399 osd.5 up 1.00000 1.00000
-1 58.15997 root hdd
-10 14.53999 hdd_osd OSD-0
0 14.53999 osd.0 up 1.00000 1.00000
-11 14.53999 hdd_osd OSD-1
1 14.53999 osd.1 up 1.00000 1.00000
-12 14.53999 hdd_osd OSD-2
2 14.53999 osd.2 up 1.00000 1.00000
-13 14.53999 hdd_osd OSD-3
3 14.53999 osd.3 up 1.00000 1.00000
iii. ssd_osd 추가시 ssd_pool ssd ruleset 변경
[root@MGMT03:46:19:~]# ceph osd pool set ssd_pool crush_ruleset 1
[root@MGMT03:46:25:~]# ceph osd dump | grep crush_ruleset
pool 0 'hdd_pool' replicated size 2 min_size 1 crush_ruleset 0 object_hash rjenkins pg_num 128 pgp_num 128 last_change 127 flags hashpspool stripe_width 0
pool 2 'ssd_pool' replicated size 2 min_size 1 crush_ruleset 1 object_hash rjenkins pg_num 128 pgp_num 128 last_change 225 flags hashpspool stripe_width 0
[root@MGMT03:47:47:~]# ceph -s
cluster 427f2e6a-5722-4365-a475-8fcdc218a418
health HEALTH_OK
monmap e2: 4 mons at {MON-0=192.168.1.13:6789/0,MON-1=192.168.1.14:6789/0,MON-2=192.168.1.15:6789/0,MON-3=192.168.1.16:6789/0}
election epoch 6, quorum 0,1,2,3 MON-0,MON-1,MON-2,MON-3
osdmap e224: 6 osds: 6 up, 6 in
flags sortbitwise,require_jewel_osds
pgmap v1673: 256 pgs, 2 pools, 32 bytes data, 5 objects
675 MB used, 60483 GB / 60484 GB avail
256 active+clean
* 시나리오 구성 완료