hbase 多线程大数据量入库

最新推荐文章于 2024-06-30 21:03:25 发布

Rudolf__

最新推荐文章于 2024-06-30 21:03:25 发布

阅读量629

点赞数 1

CC 4.0 BY-SA版权

分类专栏： hbase 文章标签： hbase 多线程

本文链接：https://blog.csdn.net/Rudolf__/article/details/94886241

hbase 专栏收录该内容

1 篇文章

订阅专栏

本文介绍了一种利用线程池进行HBase数据库批量插入的方法，通过实例展示了如何在HBase中高效地插入大量数据。同时，文章还详细介绍了HDFS和HBase的高可用配置，包括HDFS的复制因子、名称节点配置、HBase的数据存储路径及ZooKeeper配置。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1.插入数据库

@Test
public void poolinserts(){
    List<PoolInsert> poolInserts = new ArrayList<PoolInsert>();
    final Connection conn  =HbaseUtils.conn();
    final String tableName = "ns1:t2";
    ThreadPoolExecutor pool = ThreadPool.getPool();
    ExecutorService fixPool = ThreadPool.getFixPool();
    poolInserts.add(new PoolInsert(0,300,tableName,conn));
    poolInserts.add(new PoolInsert(300,600,tableName,conn));
    poolInserts.add(new PoolInsert(600,900,tableName,conn));
    poolInserts.add(new PoolInsert(900,1000,tableName,conn));
    for (PoolInsert p:poolInserts){
        pool.execute(p);
    }
    while (!pool.isTerminated()){
        pool.shutdown();
    }
    System.out.println("成功");
}

2.hbase连接工具

public static Connection conn(){
    Configuration conf = HBaseConfiguration.create();
    try {
        return ConnectionFactory.createConnection(conf);
    }catch (IOException e){
        log.error("连接异常:{}",e);
    }
    return null;
}

3.1 hdfs-site.xml配置

 <configuration>
    <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
    <property>
        <name>dfs.nameservices</name>
        <value>mycluster</value>
    </property>
    <property>
        <name>dfs.ha.namenodes.mycluster</name>
        <value>nn1,nn2</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn1</name>
        <value>s226:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn2</name>
        <value>s229:8020</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn1</name>
        <value>s226:50070</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn2</name>
        <value>s229:50070</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://s227:8485;s228:8485;s229:8485/mycluster</value>
    </property>
    <property>
        <name>dfs.client.failover.proxy.provider.mycluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>
            sshfence
            shell(/bin/true)
        </value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/centos/.ssh/id_rsa</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/home/centos/hadoop/journal</value>
    </property>
    <property>
        <name>dfs.ha.automatic-failover.enabled</name>
        <value>true</value>
        <description>
            Whether automatic failover is enabled. See the HDFS High
            Availability documentation for details on automatic HA
            configuration.
        </description>
    </property>

3.2 hbase-site.xml配置文件

<configuration>
    <!-- 使用完全分布式 -->
    <property>
        <name>hbase.cluster.distributed</name>
        <value>true</value>
    </property>

    <!-- 指定hbase数据在hdfs上的存放路径 -->
    <property>
        <name>hbase.rootdir</name>
        <value>hdfs://mycluster/hbase</value>
    </property>
    <!-- 配置zk地址 -->
    <property>
        <name>hbase.zookeeper.quorum</name>
        <value>s227:2181,s228:2181,s229:2181</value>
    </property>
    <!-- zk的本地目录 -->
    <property>
        <name>hbase.zookeeper.property.dataDir</name>
        <value>/home/centos/zookeeper</value>
    </property>
</configuration>

4.需要插入的对象封装

@Slf4j
@Data
public class PoolInsert implements Runnable{
	
	    private int start;
	    private int end;
	    private String tableName;
	    private Connection conn;
	
	    public PoolInsert(int start, int end, String tableName, Connection conn) {
	        this.start = start;
	        this.end = end;
	        this.tableName = tableName;
	        this.conn = conn;
	    }
	
	    @Override
	    public void run() {
	        System.out.println(1111);
	        //inset(start,end,tableName,conn);
	        try {
	            TableName tname = TableName.valueOf(tableName);
	            HTable table = (HTable) conn.getTable(tname);
	
	            DecimalFormat df = new DecimalFormat("0000");
	            table.setAutoFlush(false);
	            for (int j = start; j < end; j++) {
	                byte[] rowkey = Bytes.toBytes("row" + df.format(j));
	
	                Put put = new Put(rowkey);
	                put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("id"), Bytes.toBytes(end - 1));
	                put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("name"), Bytes.toBytes("name" + j));
	                table.put(put);
	                if (j % 2000 == 0) {
	                    table.flushCommits();
	                }
	            }
	            table.flushCommits();
	        }catch(Exception e){
	            log.error("插入异常：{}",e);
	        }
	        System.out.println(2222);
	    }
	    //百万插入
	    private String inset(int start, int end, String tableName, Connection conn ) {
	        try {
	            TableName tname = TableName.valueOf(tableName);
	            HTable table = (HTable) conn.getTable(tname);
	
	            DecimalFormat df = new DecimalFormat("0000");
	            table.setAutoFlush(false);
	            for (int j = start; j < end; j++) {
	                byte[] rowkey = Bytes.toBytes("row" + df.format(j));
	
	                Put put = new Put(rowkey);
	                put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("id"), Bytes.toBytes(end - 1));
	                put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("name"), Bytes.toBytes("name" + j));
	                table.put(put);
	                if (j % 2000 == 0) {
	                    table.flushCommits();
	                }
	            }
	            table.flushCommits();
	        }catch(Exception e){
	            log.error("插入异常：{}",e);
	        }
	        return "完成："+(end-start);
	    }
	
	}