Hive

Reference Book: Programming Hive

$hive -f /path/to/file/xxx.hql
$cat /path/to/file/xxx.hql
select * from src x;
$hive
hive> source /path/to/file/xxx.hql;
row format delimited
fields terminated by '\001'
collection items terminated by '\002'
map keys terminated by '\003'
lines terminated by '\n'
stored as textfile
create table if not exists databasename.tablename(
name string comment 'XXX',
salary float comment 'XXX',
subordinates array comment 'XXX',
deductions map comment 'XXX',
address struct comment 'XXX'
)
partitioned by XXX
comment 'XXX Table'
location 'XXX';
drop table if exits tablename;
alter table tablename add if not exists
partition(year = 2011, month = 1, day = 1) location 'XXX'
partition(year = 2011, month = 1, day = 2) location 'XXX'
partition(year = 2011, month = 1, day = 3) location 'XXX';
alter table tablename partition(year = 2011, month = 1, day = 1)
set location 'XXX';
alter table tablename drop if exists partition(year = 2011, month = 1, day = 1);
--对某个字段重命名,修改位置、类型、注释
alter table tablename
change column old_columnname new_columnname new_type
comment 'new_XXX'
after other_column; --将被修改字段放在other_column之后
或first; --将被修改字段放在第一个字段位置
alter table tablename add columns(
columnname type comment 'XXX',
columnname type comment 'XXX');
alter table tablename replace columns(
newcolumn type commnent 'XXX');
alter table table_name set tblproperties();
load data local inpath 'XXX'
overwrite into table tablename
partition (column = 'XXX');
insert overwrite table tablename1
partition (column = 'XXX') --为新插入的数据创建分区
select * from tablename2 tb2
where tb2.column1 = 'xxx';
from tablename2 tb2
insert overwrite table tablename1
  
   
insert overwrite table tablename1
  
  
insert overwrite table employee
partition (country, state)
select ..., se.cnty, se.st
from staged_employees se;
insert overwrite table employee
partition(country = 'US', state)
select ..., se.cnty, se.st
from staged_employees se
where se.cnty = 'US';
create tale ca_employees
as select name, salary, address
from employees se
where se.state = 'CA'
hadoop fs -cp source_path target_path
insert overwrite local directory 'target_path'
select name, salary, address
from employees
where se.state = 'CA';
select name, subordinates[0] from employees;
select name, deduction["State Taxes"] from employees;
select name, address.state from employees;
--如果subordinates字段内容为空,不产生新记录;不为空,则数组中的每个元素产生一行新纪录
select explode(subordinates) as sub from employees; --as创建列别名
select /*streamtable(s)*/s.columnname --s为较大表的别名
select /*+ mapjoin(s) */ b.column1, b.column2 s.column3
from bigtable b join smalltable s on b.column1 = s.column2
--SQL写法(Hive不支持):用exists in
select l.column1, l.column2 from lefttable l
where l.column1 in
(select r.column1 from righttable r)
--Hive写法:用left semi-join
select l.column1, l.column2
from lefttable l left semi join righttable r
on l.column1 = r.column1
select * from tablename t
distribute by t.column1 --将column1具有相同值的记录分到同一个reducer中
sort by t.column1, t.column2
select name, salary from employees
where cast(salary as float) < 10000.0;
--如果salary字段不是合法的浮点数,会返回null
--将浮点数转换为整数推荐使用round()/floor(),而不是cast()
select (2.0*cast(cast(b as string) as double)) from src; --b为binary类型
select * from numbers tablesample(bucket 3 out of 10 on rand()) s;
select * from numbers tablesample(bucket 3 out of 10 on number) s;
select * from numbers tablesample(0.1 percent) s;