林子雨编著《大数据基础编程、实验和案例教程(第2版)》教材第7章的代码

大数据学习路线图

林子雨编著《大数据基础编程、实验和案例教程(第2版)》(教材官网)教材中的命令行和代码,在纸质教材中的印刷效果不是很好,可能会影响读者对命令行和代码的理解,为了方便读者正确理解命令行和代码或者直接拷贝命令行和代码用于上机实验,这里提供全书配套的所有命令行和代码。
查看教材所有章节的代码

第7章 MapReduce基础编程

教材第124页

(温馨提示:代码框上方的复制代码按钮,也就是“两张A4纸图标”,用鼠标点击复制代码按钮,就可以把代码框中的代码复制到粘贴板,粘贴到其他地方。但是,有的浏览器可能不支持该功能)

  1. public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
  2. private static final IntWritable one = new IntWritable(1);
  3. private Text word = new Text();
  4. public TokenizerMapper() {
  5. }
  6. public void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
  7. StringTokenizer itr = new StringTokenizer(value.toString());
  8. while(itr.hasMoreTokens()) {
  9. this.word.set(itr.nextToken());
  10. context.write(this.word, one);
  11. }
  12. }
  13. }
Java

教材第125页

  1. public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
  2. private IntWritable result = new IntWritable();
  3. public IntSumReducer() {
  4. }
  5. public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
  6. int sum = 0;
  7. IntWritable val;
  8. for(Iterator i$ = values.iterator(); i$.hasNext(); sum += val.get()) {
  9. val = (IntWritable)i$.next();
  10. }
  11. this.result.set(sum);
  12. context.write(key, this.result);
  13. }
  14. }
Java

教材第125页

  1. public static void main(String[] args) throws Exception {
  2. Configuration conf = new Configuration();
  3. String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();
  4. if(otherArgs.length < 2) {
  5. System.err.println("Usage: wordcount <in> [<in>...] <out>");
  6. System.exit(2);
  7. }
  8. Job job = Job.getInstance(conf, "word count"); //设置环境参数
  9. job.setJarByClass(WordCount.class); //设置整个程序的类名
  10. job.setMapperClass(WordCount.TokenizerMapper.class); //添加Mapper类
  11. job.setReducerClass(WordCount.IntSumReducer.class); //添加Reducer类
  12. job.setOutputKeyClass(Text.class); //设置输出类型
  13. job.setOutputValueClass(IntWritable.class); //设置输出类型
  14. for(int i = 0; i < otherArgs.length - 1; ++i) {
  15. FileInputFormat.addInputPath(job, new Path(otherArgs[i])); //设置输入文件
  16. }
  17. FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));//设置输出文件
  18. System.exit(job.waitForCompletion(true)?0:1);
  19. }
Java

教材第126页

  1. import java.io.IOException;
  2. import java.util.Iterator;
  3. import java.util.StringTokenizer;
  4. import org.apache.hadoop.conf.Configuration;
  5. import org.apache.hadoop.fs.Path;
  6. import org.apache.hadoop.io.IntWritable;
  7. import org.apache.hadoop.io.Text;
  8. import org.apache.hadoop.mapreduce.Job;
  9. import org.apache.hadoop.mapreduce.Mapper;
  10. import org.apache.hadoop.mapreduce.Reducer;
  11. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  12. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  13. import org.apache.hadoop.util.GenericOptionsParser;
  14. public class WordCount {
  15. public WordCount() {
  16. }
  17. public static void main(String[] args) throws Exception {
  18. Configuration conf = new Configuration();
  19. String[] otherArgs = (new GenericOptionsParser(conf, args)).getRemainingArgs();
  20. if(otherArgs.length < 2) {
  21. System.err.println("Usage: wordcount <in> [<in>...] <out>");
  22. System.exit(2);
  23. }
  24. Job job = Job.getInstance(conf, "word count");
  25. job.setJarByClass(WordCount.class);
  26. job.setMapperClass(WordCount.TokenizerMapper.class);
  27. job.setCombinerClass(WordCount.IntSumReducer.class);
  28. job.setReducerClass(WordCount.IntSumReducer.class);
  29. job.setOutputKeyClass(Text.class);
  30. job.setOutputValueClass(IntWritable.class);
  31. for(int i = 0; i < otherArgs.length - 1; ++i) {
  32. FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
  33. }
  34. FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
  35. System.exit(job.waitForCompletion(true)?0:1);
  36. }
  37. public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {
  38. private static final IntWritable one = new IntWritable(1);
  39. private Text word = new Text();
  40. public TokenizerMapper() {
  41. }
  42. public void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
  43. StringTokenizer itr = new StringTokenizer(value.toString());
  44. while(itr.hasMoreTokens()) {
  45. this.word.set(itr.nextToken());
  46. context.write(this.word, one);
  47. }
  48. }
  49. }
  50. public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
  51. private IntWritable result = new IntWritable();
  52. public IntSumReducer() {
  53. }
  54. public void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
  55. int sum = 0;
  56. IntWritable val;
  57. for(Iterator i$ = values.iterator(); i$.hasNext(); sum += val.get()) {
  58. val = (IntWritable)i$.next();
  59. }
  60. this.result.set(sum);
  61. context.write(key, this.result);
  62. }
  63. }
  64. }
Java

教材第128页

  1. cd /usr/local/hadoop
Shell 命令
export CLASSPATH="/usr/local/hadoop/share/hadoop/common/hadoop-common-3.1.3.jar:/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core-3.1.3.jar:/usr/local/hadoop/share/hadoop/common/lib/commons-cli-1.2.jar:$CLASSPATH"
  1. javac WordCount.java
Shell 命令
  1. jar -cvf WordCount.jar *.class
Shell 命令
  1. ./bin/hadoop jar WordCount.jar WordCount input output
Shell 命令
  1. ./bin/hadoop fs -cat output/*
Shell 命令

教材第135页

  1. cd /usr/local/hadoop/myapp
  2. ls
Shell 命令

教材第136页

  1. cd /usr/local/hadoop
  2. ./sbin/start-dfs.sh
Shell 命令

教材第137页

  1. cd /usr/local/hadoop
  2. ./bin/hdfs dfs -rm -r input
  3. ./bin/hdfs dfs -rm -r output
Shell 命令
  1. cd /usr/local/hadoop
  2. ./bin/hdfs dfs -mkdir input
Shell 命令
  1. cd /usr/local/hadoop
  2. ./bin/hdfs dfs -put ./wordfile1.txt input
  3. ./bin/hdfs dfs -put ./wordfile2.txt input
Shell 命令
  1. cd /usr/local/hadoop
  2. ./bin/hdfs dfs -rm -r /user/hadoop/output
Shell 命令
  1. cd /usr/local/hadoop
  2. ./bin/hadoop jar ./myapp/WordCount.jar input output
Shell 命令

教材第138页

  1. cd /usr/local/hadoop
  2. ./bin/hdfs dfs -cat output/*
Shell 命令