首页 > 其他 > 详细

hbase禁用掉自动split后如何维护

时间:2020-01-17 16:22:47      阅读:244      评论:0      收藏:0      [点我收藏+]

1、创建hbase表

--建表
hbase(main):003:0> create test,cf

--查看hdfs目录,此时cf目录下没有数据
[root@node1 ~]# hadoop fs -ls /hbase/data/default/test
Found 3 items
drwxr-xr-x   - root supergroup          0 2020-01-17 11:23 /hbase/data/default/test/.tabledesc
drwxr-xr-x   - root supergroup          0 2020-01-17 11:23 /hbase/data/default/test/.tmp
drwxr-xr-x   - root supergroup          0 2020-01-17 11:23 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508

[root@node1 ~]# hdfs dfs -ls -R /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf


2、插入数据

--插数据
hbase(main):005:0> put test,user1,cf:name,zhang

--再看cf目录,依然没有数据
[root@node1 ~]# hdfs dfs -ls -R /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf


3、flush

--flush
hbase(main):006:0> flush test

--此时cf目录下就有数据了,这就是一个storefile
[root@node1 ~]# hdfs dfs -ls -R /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf
-rw-r--r--   3 root supergroup       4916 2020-01-17 11:28 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf/6a15799391e84e2689969b3eb461330d


此时再插入一些数据,并flush

--再插入两条数据
hbase(main):007:0> put test,user1,cf:name,zhang1
hbase(main):010:0> put test,user1,cf:name,zhang123

hbase(main):008:0> flush test

--查看,已经有三个文件了
[root@node1 ~]# hdfs dfs -ls -R /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf
-rw-r--r--   3 root supergroup       4916 2020-01-17 11:28 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf/6a15799391e84e2689969b3eb461330d
-rw-r--r--   3 root supergroup       4917 2020-01-17 13:45 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf/9cd91088cea440beafec10548e5a0bae
-rw-r--r--   3 root supergroup       4919 2020-01-17 13:47 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf/ff5c2ca27140490e8f9d10c0e3eae491

--scan,
hbase(main):012:0> scan test
ROW                                          COLUMN+CELL                                                                                                                    
 user1                                       column=cf:name, timestamp=1579240055064, value=zhang123 


--可以发现,虽然有cf目录中三个文件了,但是只有一条数据,这就说明hbase是顺序写入的,这样写入的效率比较高,
--但是读取的效率就会稍微差了一些(也不是特别差)


4、compact

--对test表执行compact
hbase(main):013:0> compact test


--此时cf目录中的文件就又合并成一个了
[root@node1 ~]# hdfs dfs -ls -R /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf
-rw-r--r--   3 root supergroup       4919 2020-01-17 13:57 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508/cf/00a68679a32e4d359f8daeb0be4321c2


5、split

--查看hdfs,59c4ce9f21361f871b64ce86a9a66508就是一个region文件
[root@node1 ~]# hdfs dfs -ls /hbase/data/default/test
Found 3 items
drwxr-xr-x   - root supergroup          0 2020-01-17 11:23 /hbase/data/default/test/.tabledesc
drwxr-xr-x   - root supergroup          0 2020-01-17 11:23 /hbase/data/default/test/.tmp
drwxr-xr-x   - root supergroup          0 2020-01-17 11:28 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508


--插入一些数据
hbase(main):018:0> put test,user2,cf:name,zhang1

hbase(main):019:0> put test,user3,cf:name,zhang1

hbase(main):020:0> put test,user4,cf:name,zhang1

hbase(main):021:0> put test,user5,cf:name,zhang1

hbase(main):022:0> put test,user6,cf:name,zhang1


--split(分割region),以user4(rowkey)为界限(包左不包右),
hbase(main):024:0> split test,user4


--再查看hdfs,已经把原先的region文件分割为两个了,老的region文件等一会就被自动删除了
[root@node1 ~]# hdfs dfs -ls /hbase/data/default/test
Found 5 items
drwxr-xr-x   - root supergroup          0 2020-01-17 11:23 /hbase/data/default/test/.tabledesc
drwxr-xr-x   - root supergroup          0 2020-01-17 11:23 /hbase/data/default/test/.tmp
drwxr-xr-x   - root supergroup          0 2020-01-17 14:10 /hbase/data/default/test/59c4ce9f21361f871b64ce86a9a66508
drwxr-xr-x   - root supergroup          0 2020-01-17 14:10 /hbase/data/default/test/c4d08147e30b00058ced15b8547260a4
drwxr-xr-x   - root supergroup          0 2020-01-17 14:10 /hbase/data/default/test/c6391f28bc8acdcb12bf185dc45596b9


compact和split也可以在hbase的web页面中做;


6、hbase手动compact与split脚本

https://yq.aliyun.com/articles/59591

shell脚本

#!/bin/bash

die () {
    echo >&2 "$@"
    echo "usage:"
    echo "       $0 check|split table_name [split_size]"
    exit 1
}

[[ "$#" -lt 2 ]] && die "at least 2 arguments required, $# provided"

COMMAND=$1
TABLE=$2
SIZE="${3:-1073741824}"

split() {
    region_key=`python /home/hduser/hbase/hbase-scan.py -t hbase:meta -f "RowFilter (=, ‘substring:$1‘)"`
    echo "split ‘$region_key‘" | hbase shell
}

if [ "$COMMAND" != "check" ] ; then
    for region in `hadoop fs -ls /hbase/data/default/$TABLE | awk {print $8}`
    do
        [[ ${region##*/} =~ ^\. ]] && continue
        [[ `hadoop fs -du -s $region | awk {print $1}` -gt $SIZE ]] && split ${region##*/}
    done

    # check after split
    sleep 60
fi

for region in `hadoop fs -ls /hbase/data/default/$TABLE | awk {print $8}`
do
    [[ ${region##*/} =~ ^\. ]] && continue
    [[ `hadoop fs -du -s $region | awk {print $1}` -gt $SIZE ]] && echo "${region##*/} (`hadoop fs -du -s -h $region | awk {‘print $1 $2‘}`) is a huge region" || echo "${region##*/} (`hadoop fs -du -s -h $region | awk {‘print $1 $2‘}`) is a small region"
done


python脚本

import subprocess
import datetime
import argparse
import csv
import gzip
import happybase
import logging

def connect_to_hbase():
    return happybase.Connection(itr-hbasetest01)

def main():
    logging.basicConfig(format=%(asctime)s %(name)s %(levelname)s: %(message)s,level=logging.INFO)

    argp = argparse.ArgumentParser(description=EventLog Reader)
    argp.add_argument(-t,--table, dest=table, default=eventlog)
    argp.add_argument(-p,--prefix, dest=prefix)
    argp.add_argument(-f,--filter, dest=filter)
    argp.add_argument(-l,--limit, dest=limit, default=10)

    args = argp.parse_args()

    hbase_conn = connect_to_hbase()

    table = hbase_conn.table(args.table)
    logging.info("scan start")
    scanner = table.scan(row_prefix=args.prefix, batch_size=1000, limit=int(args.limit), filter=args.filter)
    logging.info("scan done")
    i = 0
    for key, data in scanner:
        logging.info(key)
        print key
        i+=1

    logging.info(%s rows read in total, i)

if __name__ == __main__:
    main()

hbase禁用掉自动split后如何维护

原文:https://www.cnblogs.com/weiyiming007/p/12205719.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!