hive表中导入数据多种方法详细说明

最新推荐文章于 2025-06-25 11:41:52 发布

nuhao_

最新推荐文章于 2025-06-25 11:41:52 发布

阅读量1.9k

点赞数 9

CC 4.0 BY-SA版权

文章标签： hive hadoop 数据仓库

本文链接：https://blog.csdn.net/nuhao/article/details/136280040

本文详细介绍了如何在Hive中通过loaddata、insertinto、创建表时指定location以及export/import操作来加载和管理数据，包括使用partitionedby和rowformat等特性。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

文章中对hive表中导入数据方法目录

方式一：通过load方式加载数据

方式二：直接向分区表中插入数据

方式三：查询语句中创建表并加载数据（as select）

方式四：创建表时通过location指定加载数据路径

1. 创建表，并指定在hdfs上的位置

2. 上传数据到hdfs上

3. 查询数据

方式五：export导出与 import导入 hive表数据（内部非分区表操作）

准备数据

-- 创建 db_myhive_5
create database db_myhive_5;

use db_myhive_5;

-- 创建表
create table tb_score(
    s_id string,
    c_id string,
    score int
)
partitioned by (month string)
row format delimited fields terminated by '\t';

方式一：通过load方式加载数据

load data local inpath '/export/data/hive_data/score.txt' overwrite into table tb_score partition(month='202006');

方式二：直接向分区表中插入数据

通过insert into方式加载数据

create table score3 like tb_score;

insert into table score3 partition(month ='202007') values ('001','002','100');

通过查询方式加载数据

create table score4 like score;

insert overwrite table score4 partition(month = '202006') select s_id,c_id,s_score from tb_score;

例子

需求1: 创建新库 myhive6, 切换库 myhive6

需求2: 创建t_score_1(s_id, c_id, score) 按月指定分区 month, 指定字段分隔符为 '\t'

需求3: 通过 load data 方式加载文件中数据

需求4: 创建表 t_score_2 依据表 t_score_1的结构

需求5: 通过insert into 添加一行数据

需求6: 创建表 t_score_3 依据表 t_score_1的结构

需求7: 通过 select 添加n条记录

实现

-- 需求1: 创建新库 myhive6, 切换库 myhive6

create database myhive6;

use myhive6;

-- 需求2: 创建t_score_1(s_id, c_id, score) 按月指定分区 month, 指定字段分隔符为 '\t'

create table t_score_1(

    s_id string,

    c_id string,

    score int

)

partitioned by (month string)

row format delimited fields terminated by '\t';

-- 需求3: 通过 load data 方式加载文件中数据

load data local inpath '/export/data/hive_data/f_score.csv' overwrite into table t_score_1 partition (month='202101');

-- 需求4: 创建表 t_score_2 依据表 t_score_1的结构

create table t_score_2 like t_score_1;

-- 需求5: 通过insert into 添加一行数据

insert into t_score_2 partition(month='202002') values('01', '02', 66);

select * from t_score_2;

-- 需求6: 创建表 t_score_3 依据表 t_score_1的结构

create table t_score_3 like t_score_1;

-- 需求7: 通过 select 添加n条记录

insert overwrite table t_score_3 partition(month='202003') select s_id, c_id, score from t_score_1;

select * from t_score_3;

方式三：查询语句中创建表并加载数据（as select）

将查询的结果保存到一张表当中去

create table score5 as select * from score;

方式四：创建表时通过location指定加载数据路径

1. 创建表，并指定在hdfs上的位置

create external table score6 (s_id string,c_id string,s_score int)

row format delimited

fields terminated by '\t'

location '/myscore6';

2. 上传数据到hdfs上

hdfs dfs -mkdir -p /myscore6

hdfs dfs -put score.txt /myscore6;

3. 查询数据

select * from score6;

例子

1 创建表 t_score_6(s_id, c_id, score), 指定分隔符为'\t', 指定保存位置为 '/hivedatas/t_score_6';

2 将分数信息文件上传到 hdfs的目录下 '/hivedatas/t_score_6'

3 查看表中的数据

实现

-- 1 创建表 t_score_6(s_id, c_id, score), 指定分隔符为'\t', 指定保存位置为 '/hivedatas/t_score_6';

create table t_score_6(

    s_id string,

    c_id string,

    score int

)

row format delimited fields terminated by '\t'

location '/hivedatas/t_score_6';

-- 2 将分数信息文件上传到 hdfs的目录下 '/hivedatas/t_score_6'

-- hdfs dfs -put 文件 /hivedatas/t_score_6

-- 3 查看表中的数据

select * from t_score_6;