林子雨编著《大数据基础编程、实验和案例教程(第3版)》(教材官网)教材中的命令行和代码,在纸质教材中的印刷效果不是很好,可能会影响读者对命令行和代码的理解,为了方便读者正确理解命令行和代码或者直接拷贝命令行和代码用于上机实验,这里提供全书配套的所有命令行和代码。
查看教材所有章节的代码
第4章 HDFS操作方法和基础编程
教材第52页
(温馨提示:代码框上方的复制代码按钮,也就是“两张A4纸图标”,用鼠标点击复制代码按钮,就可以把代码框中的代码复制到粘贴板,粘贴到其他地方。但是,有的浏览器可能不支持该功能)
cd /usr/local/hadoop
./sbin/start-dfs.sh
cd /usr/local/hadoop
./bin/hdfs dfs
教材第53页
./bin/hdfs dfs -help put
教材第54页
cd /usr/local/hadoop
./bin/hdfs dfs -mkdir -p /user/hadoop
./bin/hdfs dfs -ls .
./bin/hdfs dfs -ls /user/hadoop
./bin/hdfs dfs -ls
./bin/hdfs dfs -mkdir input
./bin/hdfs dfs -mkdir /input
./bin/hdfs dfs -rm -r /input
教材第55页
Hadoop
Spark
XMU DBLAB
./bin/hdfs dfs -put /home/hadoop/myLocalFile.txt input
./bin/hdfs dfs -ls input
./bin/hdfs dfs -cat input/myLocalFile.txt
./bin/hdfs dfs -get input/myLocalFile.txt /home/hadoop/Downloads
教材第75页
cd ~
cd Downloads
ls
cat myLocalFile.txt
./bin/hdfs dfs -cp input/myLocalFile.txt /input
教材第61页
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
/**
* 过滤掉文件名满足特定条件的文件
*/
class MyPathFilter implements PathFilter {
String reg = null;
MyPathFilter(String reg) {
this.reg = reg;
}
public boolean accept(Path path) {
if (!(path.toString().matches(reg)))
return true;
return false;
}
}
/***
* 利用FSDataOutputStream和FSDataInputStream合并HDFS中的文件
*/
public class MergeFile {
Path inputPath = null; //待合并的文件所在的目录的路径
Path outputPath = null; //输出文件的路径
public MergeFile(String input, String output) {
this.inputPath = new Path(input);
this.outputPath = new Path(output);
}
public void doMerge() throws IOException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS","hdfs://localhost:9000");
conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
FileSystem fsSource = FileSystem.get(URI.create(inputPath.toString()), conf);
FileSystem fsDst = FileSystem.get(URI.create(outputPath.toString()), conf);
//下面过滤掉输入目录中后缀为.abc的文件
FileStatus[] sourceStatus = fsSource.listStatus(inputPath,
new MyPathFilter(".*\\.abc"));
FSDataOutputStream fsdos = fsDst.create(outputPath);
PrintStream ps = new PrintStream(System.out);
//下面分别读取过滤之后的每个文件的内容,并输出到同一个文件中
for (FileStatus sta : sourceStatus) {
//下面打印后缀不为.abc的文件的路径、文件大小
System.out.print("路径:" + sta.getPath() + " 文件大小:" + sta.getLen()
+ " 权限:" + sta.getPermission() + " 内容:");
FSDataInputStream fsdis = fsSource.open(sta.getPath());
byte[] data = new byte[1024];
int read = -1;
while ((read = fsdis.read(data)) > 0) {
ps.write(data, 0, read);
fsdos.write(data, 0, read);
}
fsdis.close();
}
ps.close();
fsdos.close();
}
public static void main(String[] args) throws IOException {
MergeFile merge = new MergeFile(
"hdfs://localhost:9000/user/hadoop/",
"hdfs://localhost:9000/user/hadoop/merge.txt");
merge.doMerge();
}
}
教材第63页
cd /usr/local/hadoop
./sbin/start-dfs.sh
教材第64页
cd /usr/local/hadoop
./bin/hdfs dfs -ls /user/hadoop
./bin/hdfs dfs -cat /user/hadoop/merge.txt
cd /usr/local/hadoop
mkdir myapp
教材第66页
cd /usr/local/hadoop/myapp
ls
cd /usr/local/hadoop
./bin/hdfs dfs -rm /user/hadoop/merge.txt
cd /usr/local/hadoop
./bin/hadoop jar ./myapp/HDFSExample.jar
cd /usr/local/hadoop
./bin/hdfs dfs -ls /user/hadoop
./bin/hdfs dfs -cat /user/hadoop/merge.txt