常用HBase Shell命令

创建表

1
2
/* 创建表,cf1,cf2为列族名 */
create 'table_name','cf1','cf2',...

写入数据

1
2
/* 插入或更新数据,cf为列族名,cq为列名,value为要写入的数据 */
put 'table_name', 'row_key', 'cf:cq', 'value'[, timestamp]

读取数据

1
2
3
4
5
6
7
8
/* 读取单行数据 */
get 'table_name', 'row_key'

/* 读取列族数据 */
get 'table_name', 'row_key', 'cf'

/* 读取单元格数据 */
get 'table_name', 'row_key', 'cf:cq'

删除数据

1
2
3
4
5
/* 删除列 */
delete 'table_name', 'row_key', 'cf:cq'[, 'timestamp]

/* 删除行 */
deleteall 'table_name', 'row_key'

全表扫描

1
2
/* 查看表的全部数据 */
scan 'table_name'

计算表的行数

1
count 'table_name'

其他常用命令

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/* 使表有效 */
enable 'table_name'

/* 使表无效 */
disable 'table_name'

/* 删除表 */
drop 'table_name'

/* 查看所有表名 */
list

/* 查看所有可用命令 */
help

HBase Shell基本操作

  1. 查看当前namespace,创建一个新的namespace “test”,并设置最大建表数为10。(提示:hbase.namespace.quota.maxtables)

    HBase中没有库的概念,namespace就相当于库的概念,一个namespace中包含了很多table

    1. 查看当前namespace

      1. 使用list_namespace查看所有命名空间
      1
      2
      3
      4
      5
      6
      hbase:006:0> list_namespace
      NAMESPACE
      default
      hbase
      2 row(s)
      Took 0.0864 seconds

      可以发现当前有两个命名空间default和hbase,default命名空间为当前默认的命名空间,hbase命名空间为存放系统表的命名空间。

      1. 使用list_namespace_tables查看指定命名空间下的所有table
      1
      2
      3
      4
      5
      6
      7
      hbase:007:0> list_namespace_tables 'hbase'
      TABLE
      meta
      namespace
      2 row(s)
      Took 0.0709 seconds
      => ["meta", "namespace"]

      可以发现hbase下有两个表meta表与namespace表,namespace表存放了与命名空间相关的元数据的表 。meta存放了表和相关region元数据的表。

    2. 创建test的namespace

      1. 使用create_namespace创建命名空间

        1
        2
        3
        4
        5
        6
        7
        8
        9
        10
        hbase:002:0> create_namespace 'test'
        Took 1.5677 seconds

        hbase:003:0> list_namespace
        NAMESPACE
        default
        hbase
        test
        3 row(s)
        Took 0.0722 seconds

        可以看到创建好的test namespace

      2. 通过describe_namespace查看命名空间配置

        1
        2
        3
        4
        5
        hbase:004:0> describe_namespace 'test'
        DESCRIPTION
        {NAME => 'test'}
        Quota is disabled
        Took 0.3294 seconds

        可以看到test namespace下的所有配置信息

      3. 通过alter_namespace修改命名空间配置

        设置最大建表数为10

        1
        2
        3
        4
        5
        6
        7
        8
        hbase:009:0> alter_namespace 'test', {METHOD => 'set', 'hbase.namespace.quota.maxtables' => '10'}
        Took 0.2673 second

        hbase:010:0> describe_namespace 'test'
        DESCRIPTION
        {NAME => 'test', hbase.namespace.quota.maxtables => '10'}
        Quota is disabled
        Took 0.0595 seconds

        可以看到配置已经变更

  2. 创建一个表test01,有两个列族f1,f2,设置f1的最大版本数为5,设置表的预分区为3个;

    create 'test:test01', {NAME=>'f1', VERSIONS=>5}, 'f2', SPLITS => ['10','20','30']

    1
    2
    3
    4
    hbase:011:0> create 'test:test01', {NAME=>'f1', VERSIONS=>5}, 'f2', SPLITS => ['10','20','30']
    Created table test:test01
    Took 4.3689 seconds
    => Hbase::Table - test:test01

    通过desc查看表结构

    desc 'test:test01'

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    hbase:012:0> desc 'test:test01'
    Table test:test01 is ENABLED
    test:test01
    COLUMN FAMILIES DESCRIPTION
    {NAME => 'f1', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '5', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    {NAME => 'f2', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    2 row(s)
    Quota is disabled
    Took 0.2004 seconds
  3. 创建另一个表test02,有两个列族g1,g2;

    create 'test:test02', 'g1', 'g2'

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    hbase:013:0> create 'test:test02', 'g1', 'g2'
    Created table test:test02
    Took 1.1753 seconds
    => Hbase::Table - test:test02

    hbase:017:0> desc 'test:test02'
    Table test:test02 is ENABLED
    test:test02
    COLUMN FAMILIES DESCRIPTION
    {NAME => 'g1', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    {NAME => 'g2', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    2 row(s)
    Quota is disabled
    Took 0.0637 seconds
  4. 查看namespace test 中的表;

    使用list查看

    1
    2
    3
    4
    5
    6
    7
    hbase:018:0> list
    TABLE
    test:test01
    test:test02
    2 row(s)
    Took 0.0113 seconds
    => ["test:test01", "test:test02"]
  5. 删除表test02中的g2列;

    alter 'test:test02', NAME=>'g2', METHOD=>'delete'

    或者alter 'test:test02, 'delete'=>'g2'

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    hbase:019:0> alter 'test:test02', NAME=>'g2', METHOD=>'delete'
    Updating all regions with the new schema...
    1/1 regions updated.
    Done.
    Took 2.4014 seconds

    hbase:020:0> desc 'test:test02'
    Table test:test02 is ENABLED
    test:test02
    COLUMN FAMILIES DESCRIPTION
    {NAME => 'g1', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    1 row(s)
    Quota is disabled
    Took 0.0640 seconds
  6. 修改表test02的的属性MAX_FILESIZE为256128256;

    alter 'test:test02', MAX_FILESIZE='256128256'

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    hbase:022:0> alter 'test:test02', MAX_FILESIZE='256128256'
    (hbase):22: warning: already initialized constant MAX_FILESIZE
    Updating all regions with the new schema...
    1/1 regions updated.
    Done.
    Took 1.9905 seconds

    hbase:023:0> desc 'test:test02'
    Table test:test02 is ENABLED
    test:test02
    COLUMN FAMILIES DESCRIPTION
    {NAME => '256128256', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1
    ', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION =>
    'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE =
    > '65536', REPLICATION_SCOPE => '0'}

    {NAME => 'g1', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    2 row(s)
    Quota is disabled
    Took 0.0680 seconds
  7. 为表test02增加一列s1,设置s1的最大版本数为5;

    alter 'test:test02', {NAME=>'s1', VERSIONS=>'5'}

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    hbase:001:0> alter 'test:test02', {NAME=>'s1', VERSIONS=>'5'}
    Updating all regions with the new schema...
    1/1 regions updated.
    Done.
    Took 3.4812 seconds

    hbase:002:0> desc 'test:test02'
    Table test:test02 is ENABLED
    test:test02
    COLUMN FAMILIES DESCRIPTION
    {NAME => '256128256', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1
    ', KEEP_DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION =>
    'NONE', TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE =
    > '65536', REPLICATION_SCOPE => '0'}

    {NAME => 'g1', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '1', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    {NAME => 's1', BLOOMFILTER => 'ROW', IN_MEMORY => 'false', VERSIONS => '5', KEEP
    _DELETED_CELLS => 'FALSE', DATA_BLOCK_ENCODING => 'NONE', COMPRESSION => 'NONE',
    TTL => 'FOREVER', MIN_VERSIONS => '0', BLOCKCACHE => 'true', BLOCKSIZE => '6553
    6', REPLICATION_SCOPE => '0'}

    3 row(s)
    Quota is disabled
    Took 0.0701 seconds
  8. 删除表test02;

    disable 'test:test02'

    drop 'test:test02'

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    hbase:004:0> disable 'test:test02'
    Took 0.7600 seconds

    hbase:005:0> drop 'test:test02'
    Took 0.4076 seconds

    hbase:006:0> list
    TABLE
    test:test01
    1 row(s)
    Took 0.0251 seconds
    => ["test:test01"]
    hbase:007:0>
  9. 为表test01添加两行数据:

    r01 f1:name zhang
    r01 f1:num 10
    r01 f2:addr shanghai
    r02 f1:name wang
    r02 f2:addr hangzhou

    1
    2
    3
    4
    5
    put 'test:test01', 'r01', 'f1:name', 'zhang'
    put 'test:test01', 'r01', 'f1:num', '10'
    put 'test:test01', 'r01', 'f1:addr', 'shanghai'
    put 'test:test01', 'r02', 'f1:name', 'wang'
    put 'test:test01', 'r02', 'f2:addr', 'hangzhou'
  10. 全表扫描数据;

    scan 'test:test01'

    1
    2
    3
    4
    5
    6
    7
    8
    9
    hbase:013:0> scan 'test:test01'
    ROW COLUMN+CELL
    r01 column=f1:addr, timestamp=2022-04-24T11:01:28.736, value=shanghai
    r01 column=f1:name, timestamp=2022-04-24T11:00:11.237, value=zhang
    r01 column=f1:num, timestamp=2022-04-24T11:01:28.584, value=10
    r02 column=f1:name, timestamp=2022-04-24T11:01:28.795, value=wang
    r02 column=f2:addr, timestamp=2022-04-24T11:01:30.250, value=hangzhou
    2 row(s)
    Took 0.0399 seconds
  11. 查看r02行的name列数据;

    get 'test:test01', 'r02', 'f1:name'

    1
    2
    3
    4
    5
    hbase:014:0> get 'test:test01', 'r02', 'f1:name'
    COLUMN CELL
    f1:name timestamp=2022-04-24T11:01:28.795, value=wang
    1 row(s)
    Took 0.0387 seconds
  12. 删除r02行f2列的数据;

    delete 'test:test01', 'r02', 'f2'

    1
    2
    hbase:015:0> delete 'test:test01', 'r02', 'f2'
    Took 0.0252 seconds
  13. 清空表test01的数据。

    注意第二个仅清除数据,第一个会清除表分区。

    truncate 'test:test01'

    1
    2
    3
    4
    5
    hbase:016:0> truncate_preserve 'test:test01'
    Truncating 'test:test01' table (it may take a while):
    Disabling table...
    Truncating table...
    Took 3.4415 seconds

HBase简单应用

  1. 现有以下关系型数据库中的表和数据,要求将其转换为适合于HBase存储的表,绘出表格,然后插入数据,并查看数据。建议用列族的方式来创建。

表1 学生表(Student)

学号(S_No) 姓名(S_Name) 年龄(S_Age)
2018001 Lily 21
2018002 Jacky 22
2018003 Mouse 21

表2 课程表(Course)

课程号(C_No) 课程名(C_Name)
123001 English
123002 Computer

表3 选课表(SC)

学号(SC_Sno) 课程号(SC_Cno) 成绩(SC_Score)
2018001 123001 89
2018001 123002 78
2018002 123001 90
2018002 123002 69
2018003 123001 78
2018003 1230023 65

提示:

构造的HBase表格可以为(仅供参考):

(1) 主键的列名是随机分配的,因此无需创建主键列。

(2) 创建表:create 表名,字段名1 / 列族1,字段名2 / 列族2,……

create ‘scs’, ‘basic_info’, ‘score’

(3) 插入数据:
        put 表名,rowkey,字段名1,字段值1
        put 表名,rowkey,字段名2,字段值2
        …

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
put 'scs', 's001', 'basic_info:sno', '2018001'
put 'scs', 's002', 'basic_info:sno', '2018002'
put 'scs', 's003', 'basic_info:sno', '2018003'

put 'scs', 's001', 'basic_info:sname', 'Lily'
put 'scs', 's002', 'basic_info:sname', 'Jacky'
put 'scs', 's003', 'basic_info:sname', 'Mouse'

put 'scs', 's001', 'basic_info:sage', '21'
put 'scs', 's002', 'basic_info:sage', '22'
put 'scs', 's003', 'basic_info:sage', '21'

put 'scs', 's001', 'score:English', '89'
put 'scs', 's001', 'score:Computer', '78'

put 'scs', 's002', 'score:English', '90'
put 'scs', 's002', 'score:Computer', '69'

put 'scs', 's003', 'score:English', '78'
put 'scs', 's003', 'score:Computer', '65'

(4) 查看数据:scan 表名

scan 'scs'

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
hbase:045:0> scan 'scs'
ROW COLUMN+CELL
s001 column=basic_info:sage, timestamp=2022-04-24T11:46:08.761, value=21
s001 column=basic_info:sname, timestamp=2022-04-24T11:46:08.627, value=Lily
s001 column=basic_info:sno, timestamp=2022-04-24T11:46:08.466, value=2018001
s001 column=score:Computer, timestamp=2022-04-24T11:46:08.951, value=78
s001 column=score:English, timestamp=2022-04-24T11:46:08.921, value=89
s002 column=basic_info:sage, timestamp=2022-04-24T11:46:08.800, value=22
s002 column=basic_info:sname, timestamp=2022-04-24T11:46:08.667, value=Jacky
s002 column=basic_info:sno, timestamp=2022-04-24T11:46:08.520, value=2018002
s002 column=score:Computer, timestamp=2022-04-24T11:46:09.035, value=69
s002 column=score:English, timestamp=2022-04-24T11:46:09.003, value=90
s003 column=basic_info:sage, timestamp=2022-04-24T11:46:08.858, value=21
s003 column=basic_info:sname, timestamp=2022-04-24T11:46:08.706, value=Mouse
s003 column=basic_info:sno, timestamp=2022-04-24T11:46:08.570, value=2018003
s003 column=score:Computer, timestamp=2022-04-24T11:46:10.323, value=65
s003 column=score:English, timestamp=2022-04-24T11:46:09.094, value=78
3 row(s)
Took 0.0259 seconds
  1. 如果在学生表中为学生增加一项“联系电话(S_Tel)”,如何完成?插入数据后,查看数据。(提示:使用列族,添加列限定符)

    put 'scs', 's001', 'basic_info:tel', '12345'

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    hbase:046:0> put 'scs', 's001', 'basic_info:tel', '12345'
    Took 0.0117 seconds

    hbase:048:0> get 'scs', 's001'
    COLUMN CELL
    basic_info:sage timestamp=2022-04-24T11:46:08.761, value=21
    basic_info:sname timestamp=2022-04-24T11:46:08.627, value=Lily
    basic_info:sno timestamp=2022-04-24T11:46:08.466, value=2018001
    basic_info:tel timestamp=2022-04-24T11:48:32.278, value=12345
    score:Computer timestamp=2022-04-24T11:46:08.951, value=78
    score:English timestamp=2022-04-24T11:46:08.921, value=89
    1 row(s)
    Took 0.0154 seconds
  2. 若查看rowkey为“s001”(值是示例,根据你自己设置的rowkey值进行查询)的所有课程成绩(SC表),如何完成?(提示:get 表名 key值)

    get 'scs', 's001', 'score'

    1
    2
    3
    4
    5
    6
    hbase:050:0> get 'scs', 's001', 'score'
    COLUMN CELL
    score:Computer timestamp=2022-04-24T11:46:08.951, value=78
    score:English timestamp=2022-04-24T11:46:08.921, value=89
    1 row(s)
    Took 0.0104 seconds
  3. 思考如何对成绩表按照成绩进行降序排序呢?(仅写出解决思路即可)

<不会>

HBase与传统关系型数据库的对比