[root@stor01 ~]# rbd create kube/foo --size 1G
[root@stor01 ~]# rbd info kube/foo
'rbd image 'foo':
size 1 GiB in 256 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 6067311e5e2c
block_name_prefix: rbd_data.6067311e5e2c
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
op_features:
flags:
create_timestamp: Mon Oct 11 13:57:23 2021
access_timestamp: Mon Oct 11 13:57:23 2021
modify_timestamp: Mon Oct 11 13:57:23 2021
[root@stor01 ~]# rados ls -p kube
rbd_object_map.6067311e5e2c #点后面是image的id,同时也是object名称的prefix的一部分
rbd_header.6067311e5e2c
rbd_id.foo #foo为image的名称,内容中含有image的id,通过hexdump -vC rbd_id.foo查看内容
rbd_directory
rbd_info
6067311e5e2c[root@stor01 test]# hexdump -vC rbd_id.foo
00000000 0c 00 00 00 36 30 36 37 33 31 31 65 35 65 32 63 |....6067311e5e2c|
00000010
[root@stor01 test]# rbd feature disable kube/foo object-map fast-diff deep-flatten
[root@stor01 test]# rbd map kube/foo
/dev/rbd0
[root@stor01 test]# rbd map kube/foo
/dev/rbd0
[root@stor01 test]# mkfs.xfs /dev/rbd0
meta-data=/dev/rbd0 isize=512 agcount=8, agsize=32768 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=0, sparse=0
data = bsize=4096 blocks=262144, imaxpct=25
= sunit=1024 swidth=1024 blks
naming =version 2 bsize=4096 ascii-ci=0 ftype=1
log =internal log bsize=4096 blocks=2560, version=2
= sectsz=512 sunit=8 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
[root@stor01 test]# mkdir /foo
[root@stor01 test]# mount /dev/rbd0 /foo
[root@stor01 test]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 3.9G 0 3.9G 0% /dev
......
/dev/rbd0 1014M 33M 982M 4% /foo
[root@stor01 test]# rados -p kube ls |sort
rbd_children
rbd_data.6067311e5e2c.0000000000000000 #格式化成xfs后,生成了12个对象,并没有256个啊,这就是RBD的精简置备的一个验证
rbd_data.6067311e5e2c.0000000000000001
rbd_data.6067311e5e2c.0000000000000020
rbd_data.6067311e5e2c.0000000000000040
rbd_data.6067311e5e2c.0000000000000060
rbd_data.6067311e5e2c.0000000000000080
rbd_data.6067311e5e2c.0000000000000081
rbd_data.6067311e5e2c.0000000000000082
rbd_data.6067311e5e2c.00000000000000a0
rbd_data.6067311e5e2c.00000000000000c0
rbd_data.6067311e5e2c.00000000000000e0
rbd_data.6067311e5e2c.00000000000000ff
rbd_directory
rbd_header.6067311e5e2c
rbd_id.foo
rbd_info
rbd_trash
[root@stor01 test]# rados get rbd_data.6067311e5e2c.0000000000000000 rbd_data.6067311e5e2c.0000000000000000 -p kube
[root@stor01 test]# rados get rbd_data.6067311e5e2c.0000000000000001 rbd_data.6067311e5e2c.0000000000000001 -p kube
[root@stor01 test]# hexdump -vC rbd_data.6067311e5e2c.0000000000000000 |head
00000000 58 46 53 42 00 00 10 00 00 00 00 00 00 04 00 00 |XFSB............| #XFSB表示文件系统为xfs
00000010 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000020 d7 ee 15 24 2b ce 4e ac 84 63 03 37 cf a1 07 96 |...$+.N..c.7....|
00000030 00 00 00 00 00 02 00 08 00 00 00 00 00 00 20 00 |.............. .|
00000040 00 00 00 00 00 00 20 01 00 00 00 00 00 00 20 02 |...... ....... .|
00000050 00 00 00 01 00 00 80 00 00 00 00 08 00 00 00 00 |................|
00000060 00 00 0a 00 b5 a5 02 00 02 00 00 08 00 00 00 00 |................|
00000070 00 00 00 00 00 00 00 00 0c 09 09 03 0f 00 00 19 |................|
00000080 00 00 00 00 00 00 00 40 00 00 00 00 00 00 00 3d |.......@.......=|
00000090 00 00 00 00 00 03 f5 d8 00 00 00 00 00 00 00 00 |................|
# 猜测xfs把文件系统核心信息就保存在块设备的最最前面的128KB。
[root@stor01 test]# hexdump -vC rbd_data.6067311e5e2c.0000000000000001 |head
00000000 49 4e 41 ed 03 01 00 00 00 00 00 00 00 00 00 00 |INA.............| #猜测IN表示inode
00000010 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000020 00 00 00 00 00 00 00 00 61 63 de 9a 18 8d a1 18 |........ac......|
00000030 61 63 de 9a 18 8d a1 18 00 00 00 00 00 00 00 06 |ac..............|
00000040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000050 00 00 00 02 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000060 ff ff ff ff a3 b4 4a 1f 00 00 00 00 00 00 00 02 |......J.........|
00000070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000080 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00000090 61 63 de 9a 18 8d a1 18 00 00 00 00 00 00 20 00 |ac............ .|
[root@stor01 test]# hexdump -vC rbd_data.6067311e5e2c.0000000000000001 |grep IN|wc -l
64
[root@stor01 test]# hexdump -vC rbd_data.6067311e5e2c.0000000000000002| grep IN |wc -l
64 #所有的inode即IN都出现在第2个object上,索引范围为[0x00000000, 0x00008000)的区间,这个0x00004000的单位是byte转换过来就是32KB,这个小4M内的所有的inode都保存在前32KB区间。
[root@stor01 test]# du -sh *
128K rbd_data.6067311e5e2c.0000000000000000 #此时发现并没有4M那么大,别担心,当写入文件后,一会会变大的
32K rbd_data.6067311e5e2c.0000000000000001
现在让我们把foo塞满:
[root@stor01 test]# dd if=/dev/zero of=/foo/full-me
dd: writing to '/foo/full-me': No space left on device
2010265+0 records in
2010264+0 records out
1029255168 bytes (1.0 GB) copied, 6.52614 s, 158 MB/s
[root@stor01 mnt]# rados ls -p kube |head
rbd_data.6067311e5e2c.00000000000000a8
rbd_data.6067311e5e2c.000000000000009f
rbd_data.6067311e5e2c.00000000000000ad
rbd_data.6067311e5e2c.00000000000000ec
rbd_data.6067311e5e2c.0000000000000003
rbd_data.6067311e5e2c.000000000000002b
rbd_data.6067311e5e2c.00000000000000d1
rbd_data.6067311e5e2c.000000000000001d
rbd_data.6067311e5e2c.0000000000000017
rbd_data.6067311e5e2c.00000000000000fd
[root@stor01 mnt]# rados ls -p kube |grep rbd_data.6067311e5e2c |wc -l
256 #此时已经产生了256个object
[root@stor01 mnt]# rados get rbd_data.6067311e5e2c.00000000000000a8 rbd_data.6067311e5e2c.00000000000000a8 -p kube
[root@stor01 mnt]# du -sh rbd_data.6067311e5e2c.00000000000000a8
4.0M rbd_data.6067311e5e2c.00000000000000a8 #此时对象的大小已经变成了4M
#删除文件(有个想法:删除文件后,object里面的数据是否也删掉了?)
[root@stor01 foo]# rm -rf /foo/full-me
[root@ceph ~]# echo '111111111111111111111111111111111111111111' > /foo/file1.txt
[root@ceph ~]# echo '222222222222222222222222222222222222222222' > /foo/file2.txt
[root@ceph ~]# echo '333333333333333333333333333333333333333333' > /foo/file3.txt
[root@stor01 mnt]# rados get rbd_data.6067311e5e2c.0000000000000001 rbd_data.6067311e5e2c.0000000000000001 -p kube
#这里我之所以只导出了0x01这个对象,是因为我之前已经导出过所有的对象,经过对比后发现,在写入文件之后,只有这个文件的大小增大了diff之后,很快找到了写入的内容。(其实写文件是从0x01这个对象这个文件开始写的,Inode后面就开始存储文件内容,可以尝试一下)
[root@stor01 mnt]# hexdump -vC rbd_data.6067311e5e2c.0000000000000001 |grep -E '1111111|222222|333333'
00008000 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 |1111111111111111|
00008010 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 |1111111111111111|
00008020 31 31 31 31 31 31 31 31 31 31 0a 00 00 00 00 00 |1111111111......|
00009000 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 |2222222222222222|
00009010 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 |2222222222222222|
00009020 32 32 32 32 32 32 32 32 32 32 0a 00 00 00 00 00 |2222222222......|
0000a000 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 |3333333333333333|
0000a010 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 33 |3333333333333333|
0000a020 33 33 33 33 33 33 33 33 33 33 0a 00 00 00 00 00 |3333333333......|
# 00008000到00009000,换算成10进制是4096 bytes == 4KB,也是xfs格式化中的块大小bsize=4096。
#现在想通过dd命令修改/dev/rbd0中的数据,看看文件中是否有对应的变化:
#修改第2个object的00009000位置,先计算偏移量这个位置在/dev/rbd0中的偏移量:4MB+00009000=4*1024*1024+36864=4231168
#dd if=Ceph.txt of=/dev/rbd0 seek=4231168 oflag=seek_bytes #if ‘oflag=seek_bytes’ is specified, n is interpreted as a byte
#上面的命令会将Ceph.txt文件中的内容写入到/dev/rbd0设备中,写入位置:先偏移4231168个字节,然后再写入。文件中的内容写完后,dd命令退出
[root@stor01 ~]# echo Ceph>Ceph.txt
[root@stor01 ~]# dd if=Ceph.txt of=/dev/rbd0 seek=4231168 oflag=seek_bytes
0+1 records in
0+1 records out
5 bytes (5 B) copied, 0.00166521 s, 3.0 kB/s #写入5个字节
[root@stor01 ~]# hexdump -Cv /dev/rbd0 -n 100 -s 0x409000
00409000 43 65 70 68 0a 32 32 32 32 32 32 32 32 32 32 32 |Ceph.22222222222|
00409010 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 32 |2222222222222222|
00409020 32 32 32 32 32 32 32 32 32 32 0a 00 00 00 00 00 |2222222222......|
00409030 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00409040 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00409050 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
00409060 00 00 00 00 |....|
00409064
[root@stor01 foo]# sync && echo 1 > /proc/sys/vm/drop_caches #执行该命令后,才能看到修改的效果。
[root@stor01 foo]# cat file2.txt
Ceph
2222222222222222222222222222222222222