cd /export/server/apache-hive-3.1.2-bin/ bin/schematool -initSchema -dbType mysql -verbos #初始化成功会在mysql中创建74张表
2.5 解决注释信息中文乱码
MySQL中执行以下语句
1 2 3 4 5 6
use hive3; alter table hive3.COLUMNS_V2 modify column COMMENT varchar(256) character set utf8; alter table hive3.TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8; alter table hive3.PARTITION_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8; alter table hive3.PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8; alter table hive3.INDEX_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
-- 返回所有匹配的行 去除重复的结果 selectdistinct state from t_usa_covid19;
-- 多个字段distinct 整体去重 selectdistinct county,state from t_usa_covid19;
5.3.1 where条件
比较运算符 =><<=>=!=<>(不等于)
1 2 3 4 5
-- 找出来自于California州的疫情数据 select*from t_usa_covid19 where state ='California';
-- where条件中使用函数 找出州名字母长度超过10位的有哪些 select*from t_usa_covid19 where length(state) >10;
逻辑运算 andor
1
select*from t_usa_covid19 where length(state)>10and length(state)<20;
空值判断 is null
1
select*from t_usa_covid19 where fips isnull;
between...and
1
select*from t_usa_covid19 where length(state) between10and20;
in
1
select*from t_usa_covid19 where length(state) in (10,11,13);
5.3.2 聚合操作
countsummaxminavg
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
-- 统计美国总共有多少个县county -- 学会使用as 给查询返回的结果起个别名 selectcount(county) as county_cnts from t_usa_covid19;
-- 去重distinct selectcount(distinct county) as county_cnts from t_usa_covid19;
-- 统计美国加州有多少个县 selectcount(county) from t_usa_covid19 where state = "California";
-- 统计德州总死亡病例数 selectsum(deaths) from t_usa_covid19 where state = "Texas";
-- 统计出美国最高确诊病例数是哪个县 selectmax(cases) from t_usa_covid19;
5.3.3 GROUP BY
GROUP BY语句用于结合聚合函数,根据一个或多个列对结果集进行分组
1 2 3 4 5 6 7 8
-- 根据state州进行分组 统计每个州有多少个县county selectcount(county) from t_usa_covid19 where count_date = "2021-01-28" groupby state;
-- 想看一下统计的结果是属于哪一个州的 select state,count(county) as county_nums from t_usa_covid19 where count_date = "2021-01-28" groupby state;
-- 被聚合函数应用 select state,count(county),sum(deaths) from t_usa_covid19 where count_date = "2021-01-28" groupby state;
5.3.4 HAVING
由于SQL执行顺序决定where在分组前执行,所以where中不能使用聚合函数,比如下边的错误示范
1 2
-- 错误示范-统计2021-01-28死亡病例数大于10000的州 select state,sum(deaths) from t_usa_covid19 where count_date = "2021-01-28" andsum(deaths) >10000groupby state;
例
1 2 3 4 5
-- 先where分组前过滤,再进行group by分组, 分组后每个分组结果集确定 再使用having过滤 select state,sum(deaths) from t_usa_covid19 where count_date = "2021-01-28" groupby state havingsum(deaths) >10000;
-- 这样写更好 即在group by的时候聚合函数已经作用得出结果 having直接引用结果过滤 不需要再单独计算一次了 select state,sum(deaths) as cnts from t_usa_covid19 where count_date = "2021-01-28" groupby state having cnts>10000;
-- if条件判断: if(boolean testCondition, T valueTrue, T valueFalseOrNull) select if(1=2,100,200); select if(sex ='男','M','W') from student limit 3;
-- 条件转换函数: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END selectcase100when50then'tom'when100then'mary'else'tim'end; selectcase sex when'男'then'male'else'female'endfrom student limit 3;
-- 空值转换函数: nvl(T value, T default_value) select nvl("allen","itcast"); select nvl(null,"itcast");