第8章的代码-林子雨编著《大数据基础编程、实验和案例教程(第3版)》教材

大数据学习路线图

林子雨编著《大数据基础编程、实验和案例教程(第3版)》(教材官网)教材中的命令行和代码,在纸质教材中的印刷效果不是很好,可能会影响读者对命令行和代码的理解,为了方便读者正确理解命令行和代码或者直接拷贝命令行和代码用于上机实验,这里提供全书配套的所有命令行和代码。
查看教材所有章节的代码

第8章 数据仓库Hive的安装和使用

(温馨提示:代码框上方的复制代码按钮,也就是“两张A4纸图标”,用鼠标点击复制代码按钮,就可以把代码框中的代码复制到粘贴板,粘贴到其他地方。但是,有的浏览器可能不支持该功能)

教材第140页

sudo tar -zxvf ./apache-hive-3.1.3-bin.tar.gz -C /usr/local   # 解压到/usr/local中
cd /usr/local/
sudo mv apache-hive-3.1.3-bin hive       # 将文件夹名改为hive
sudo chown -R hadoop:hadoop hive          # 修改文件权限

教材第121页

vim ~/.bashrc
export HIVE_HOME=/usr/local/hive
export PATH=$PATH:$HIVE_HOME/bin
export HADOOP_HOME=/usr/local/hadoop
export HIVE_CONF_DIR=/usr/local/hive/conf
source ~/.bashrc
cd /usr/local/hive/conf
sudo mv hive-default.xml.template hive-default.xml
cd /usr/local/hive/conf
vim hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://localhost:3306/hive?createDatabaseIfNotExist=true&useSSL=false</value>
    <description>JDBC connect string for a JDBC metastore</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
    <description>Driver class name for a JDBC metastore</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
    <description>username to use against metastore database</description>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>hive</value>
    <description>password to use against metastore database</description>
  </property>
</configuration>

教材第122页

cd /usr/local/hive/conf
sudo mv hive-env.sh.template hive-env.sh
cd /usr/local/hive/conf
vim hive-env.sh
export HADOOP_HEAPSIZE=4096

教材第123页

cd ~/Downloads
tar -zxvf mysql-connector-java-5.1.40.tar.gz   #解压
#下面将mysql-connector-java-5.1.40-bin.jar拷贝到/usr/local/hive/lib目录下
cp mysql-connector-java-5.1.40/mysql-connector-java-5.1.40-bin.jar  /usr/local/hive/lib
service mysql start  #启动MySQL服务
mysql -u root -p   #登录MySQL数据库
mysql> create database hive;
mysql> grant all on *.* to hive@localhost identified by 'hive';
mysql> flush privileges;
cd /usr/local/hive
./bin/schematool -initSchema -dbType mysql

教材第124页

cd /usr/local/hadoop
./sbin/start-dfs.sh #如果Hadoop是伪分布式,只需要执行本条命令启动HDFS
./sbin/start-yarn.sh #如果Hadoop是分布式,需要执行本条命令启动YARN
cd /usr/local/hive
./bin/hive
hive

教材第125页

hive> create database hive;
hive> create database if not exists hive;
hive> use hive;
hive>create table if not exists usr(id bigint,name string,age int);
hive>create table if not exists hive.usr(id bigint,name string,age int)
    >location '/usr/local/hive/warehouse/hive/usr';

教材第126页

hive>create external table if not exists hive.usr(id bigint,name string,age int)
>row format delimited fields terminated by ','
>location ‘/usr/local/data’;
hive>create table hive.usr(id bigint,name string,age int) partitioned by(sex boolean);
hive> use hive;
hive> create table if not exists usr1 like usr;
hive>create view little_usr as select id,age from usr;
hive> drop database hive;
hive>drop database if exists hive;
hive> drop database if exists hive cascade;

教材第127页

hive> drop table if exists usr;
hive> drop view if exists little_usr;
hive> alter database hive set dbproperties(‘edited-by’=’lily’);
hive> alter table usr rename to user;
hive> alter table usr add if not exists partition(sex=true);
hive> alter table usr add if not exists partition(sex=false);
hive> alter table usr drop if exists partition(sex=true);
hive>alter table usr change name username string after age;
hive>alter table usr add columns(sex boolean);
hive>alter table usr replace columns(newid bigint,newname string,newage int);

教材第128页

hive> alter table usr set tblproperties(‘notes’=’the columns in usr may be null except id’);
hive> alter view little_usr set tblproperties(‘create_at’=’refer to timestamp’);
hive> show databases;
hive>show databases like ‘h.*’;
hive> use hive;
hive> show tables;
hive> show tables in hive like ‘u.*’;
hive> describe database hive;
hive>describe database extended hive;

教材第129页

hive> describe hive.usr;
hive> describe hive.little_usr;
hive> describe extended hive.usr;
hive> describe extended hive.little_usr;
hive> describe extended hive.usr.id;
hive> load data local inpath ‘/usr/local/data’ overwrite into table usr;
hive> load data local inpath ‘/usr/local/data’ into table usr;
hive> load data inpath ‘hdfs://master_server/usr/local/data’ overwrite into table usr;
hive> insert overwrite table usr1
   > select * from usr where age=10;

教材第130页

hive> insert into table usr1
   > select * from usr where age=10;
cd /usr/local/hadoop
mkdir input
cd /usr/local/hadoop/input
echo "hello world" > file1.txt
echo "hello hadoop" > file2.txt
cd /usr/local/hadoop
./bin/hdfs dfs -mkdir input #假设HDFS中已经存在/user/hadoop目录
./bin/hdfs dfs -put ./input/file1.txt /user/hadoop/input
./bin/hdfs dfs -put ./input/file2.txt /user/hadoop/input
$ hive
hive> create table docs(line string);
hive> load data inpath 'input' overwrite into table docs;
hive>create table word_count as
>select word, count(*) as count from
>(select explode(split(line,' ')) as word from docs) w
>group by word
    >order by word;