代码-第7章 ETL工具Kettle-林子雨编著《数据采集与预处理》

大数据学习路线图

林子雨编著《数据采集与预处理》教材配套代码(教材官网
查看所有章节代码

第7章 ETL工具Kettle

https://dev.mysql.com/downloads/connector/j/?os=26
CREATE DATABASE kettle;
USE kettle;
#------------创建表book
DROP TABLE IF EXISTS book;
CREATE TABLE book(
no int,
author VARCHAR(10),
price int,
amount int
);
mysql> USE kettle;
mysql> SELECT * FROM book;
CREATE DATABASE kettle;
USE kettle;
#------------创建表student_info
DROP TABLE IF EXISTS student_info;
CREATE TABLE student_info(
sno int,
sname VARCHAR(10),
ssex VARCHAR(2),
sage int
);
CREATE DATABASE kettle;
USE kettle;
#------------创建表user
DROP TABLE IF EXISTS user;
CREATE TABLE user (
  userid int(10) DEFAULT NULL COMMENT '用户ID',
  username varchar(10) DEFAULT NULL COMMENT '用户姓名',
  usersex varchar(1) DEFAULT NULL COMMENT '性别',
  userposition varchar(20) DEFAULT NULL COMMENT '职业',
  userage int(3) DEFAULT NULL COMMENT '年龄'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
#------------插入数据
INSERT INTO user VALUES ('1', '陈四', '女', '学生', '20');
INSERT INTO user VALUES ('2', '王五', '男', '工程师', '30');
INSERT INTO user VALUES ('3', '李六', '女', '医生', '40');
#------------创建表product
DROP TABLE IF EXISTS product;
CREATE TABLE product (
  productid int(10) DEFAULT NULL COMMENT '产品ID',
  productname varchar(20) DEFAULT NULL COMMENT '产品名称'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
#------------插入数据
INSERT INTO product VALUES ('1', '手机');
INSERT INTO product VALUES ('2', '电脑');
INSERT INTO product VALUES ('3', '水杯');
#------------创建表orders
DROP TABLE IF EXISTS orders;
CREATE TABLE orders (
  orderid int(10) DEFAULT NULL COMMENT '订单ID',
  userid int(10) DEFAULT NULL COMMENT '用户ID',
  productid int(10) DEFAULT NULL COMMENT '产品ID',
  buytime datetime DEFAULT NULL COMMENT '购买时间'
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
#------------插入数据
INSERT INTO orders VALUES ('1', '1', '1', '2021-06-01 15:02:02');
INSERT INTO orders VALUES ('2', '1', '2', '2021-06-02 15:02:22');
INSERT INTO orders VALUES ('3', '1', '3', '2021-06-02 15:02:36');
INSERT INTO orders VALUES ('4', '2', '1', '2021-06-06 15:02:52');
INSERT INTO orders VALUES ('5', '3', '2', '2021-06-09 16:55:24');
INSERT INTO orders VALUES ('6', '2', '2', '2021-07-14 14:01:36');
> cd c:\hadoop-3.1.3\bin
> hadoop fs -mkdir hdfs://localhost:9000/test
> cd c:\hadoop-3.1.3\bin
> hadoop fs -mkdir -p hdfs://localhost:9000/user/Lenovo
> cd c:\hadoop-3.1.3\bin
> hadoop fs -ls hdfs://localhost:9000/input_kettle
> hadoop fs -cat hdfs://localhost:9000/input_kettle/word.txt
> cd c:\hadoop-3.1.3\bin
> hadoop fs -put D:\book.txt hdfs://localhost:9000/
> hadoop fs -cat hdfs://localhost:9000/student.txt
CREATE DATABASE kettle;
USE kettle;
#------------创建表student_table
DROP TABLE IF EXISTS student_table;
CREATE TABLE student_table (
no int,
name VARCHAR(10),
sex VARCHAR(2),
age int
);
mysql> USE kettle;
mysql> SELECT * FROM student_table;