第4章的代码-林子雨编著《大数据基础编程、实验和案例教程（第3版）》教材

林子雨编著《大数据基础编程、实验和案例教程（第3版）》(教材官网)教材中的命令行和代码，在纸质教材中的印刷效果不是很好，可能会影响读者对命令行和代码的理解，为了方便读者正确理解命令行和代码或者直接拷贝命令行和代码用于上机实验，这里提供全书配套的所有命令行和代码。
查看教材所有章节的代码

第4章 HDFS操作方法和基础编程

教材第52页

cd /usr/local/hadoop
./sbin/start-dfs.sh

cd /usr/local/hadoop
./bin/hdfs dfs

教材第53页

./bin/hdfs dfs -help put

教材第54页

cd /usr/local/hadoop
./bin/hdfs dfs -mkdir -p /user/hadoop

./bin/hdfs dfs -ls .

./bin/hdfs dfs -ls /user/hadoop

./bin/hdfs dfs -ls

./bin/hdfs dfs -mkdir input

./bin/hdfs dfs -mkdir /input

./bin/hdfs dfs -rm -r /input

教材第55页

Hadoop
Spark
XMU DBLAB

./bin/hdfs dfs -put /home/hadoop/myLocalFile.txt  input

./bin/hdfs dfs -ls input

./bin/hdfs dfs -cat input/myLocalFile.txt

./bin/hdfs dfs -get input/myLocalFile.txt  /home/hadoop/Downloads

教材第75页

cd ~
cd Downloads
ls
cat myLocalFile.txt

./bin/hdfs dfs -cp input/myLocalFile.txt  /input

教材第61页

import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;

/**
 * 过滤掉文件名满足特定条件的文件 
 */
class MyPathFilter implements PathFilter {
     String reg = null; 
     MyPathFilter(String reg) {
          this.reg = reg;
     }
     public boolean accept(Path path) {
        if (!(path.toString().matches(reg)))
            return true;
        return false;
    }
}
/***
 * 利用FSDataOutputStream和FSDataInputStream合并HDFS中的文件
 */
public class MergeFile {
    Path inputPath = null; //待合并的文件所在的目录的路径
    Path outputPath = null; //输出文件的路径
    public MergeFile(String input, String output) {
        this.inputPath = new Path(input);
        this.outputPath = new Path(output);
    }
    public void doMerge() throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://localhost:9000");
          conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
        FileSystem fsSource = FileSystem.get(URI.create(inputPath.toString()), conf);
        FileSystem fsDst = FileSystem.get(URI.create(outputPath.toString()), conf);
                //下面过滤掉输入目录中后缀为.abc的文件
        FileStatus[] sourceStatus = fsSource.listStatus(inputPath,
                new MyPathFilter(".*\\.abc")); 
        FSDataOutputStream fsdos = fsDst.create(outputPath);
        PrintStream ps = new PrintStream(System.out);
        //下面分别读取过滤之后的每个文件的内容，并输出到同一个文件中
        for (FileStatus sta : sourceStatus) {
            //下面打印后缀不为.abc的文件的路径、文件大小
            System.out.print("路径：" + sta.getPath() + "    文件大小：" + sta.getLen()
                    + "   权限：" + sta.getPermission() + "   内容：");
            FSDataInputStream fsdis = fsSource.open(sta.getPath());
            byte[] data = new byte[1024];
            int read = -1;

            while ((read = fsdis.read(data)) > 0) {
                ps.write(data, 0, read);
                fsdos.write(data, 0, read);
            }
            fsdis.close();          
        }
        ps.close();
        fsdos.close();
    }
    public static void main(String[] args) throws IOException {
        MergeFile merge = new MergeFile(
                "hdfs://localhost:9000/user/hadoop/",
                "hdfs://localhost:9000/user/hadoop/merge.txt");
        merge.doMerge();
    }
}

教材第63页

cd /usr/local/hadoop
./sbin/start-dfs.sh

教材第64页

cd /usr/local/hadoop
./bin/hdfs dfs -ls /user/hadoop
./bin/hdfs dfs -cat /user/hadoop/merge.txt

cd /usr/local/hadoop
mkdir myapp

教材第66页

cd /usr/local/hadoop/myapp
ls

cd /usr/local/hadoop
./bin/hdfs dfs -rm /user/hadoop/merge.txt

cd /usr/local/hadoop
./bin/hadoop jar ./myapp/HDFSExample.jar

cd /usr/local/hadoop
./bin/hdfs dfs -ls /user/hadoop
./bin/hdfs dfs -cat /user/hadoop/merge.txt

厦大数据库实验室博客

第4章 HDFS操作方法和基础编程

教材第52页

教材第53页

教材第54页

教材第55页

教材第75页

教材第61页

教材第63页

教材第64页

教材第66页