admin管理员组文章数量:1130349
win10 IDEA 链接远程hadoop 2.6 集群
win10 准备:
1。下载 hadoop2.6-CDH5.7.5 解压 //重要说明:CDH 和 普通版本不一样
2。下载 winutils .exe ( hadoop2.6-CDH5.7.5版本 )
3 .将winutils.exe 放入 hadoop2.6/bin 中 ,同时添加环境变量,
4 将登陆用户设置成英文名字,进入计算机管理界面,本地用户,用户,改成英文的例如:hadoop(如果是中文的会报错)
IDEA 准备:
maven 配置:
<repositories>
<repository>
<id>nexus-aliyun</id>
<name>Nexus aliyun</name>
<url>;/url>
</repository>
<repository>
<id>cloudera</id>
<url>/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-cdh5.7.5</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
项目中的 resources 存放 XML文件:
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://Machenmaster</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml:
<property>
<name>dfs.nameservices</name>
<value>Machenmaster</value>
</property>
<!-- Master下面有两个NameNode,分别是Master,Slave1 -->
<property>
<name>dfs.ha.namenodes.Machenmaster</name>
<value>m1,m2</value>
</property>
<!-- Master的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.Machenmaster.m1</name>
<value>172.16.11.221:9000</value>
</property>
<!-- Master的http通信地址 -->
<property>
<name>dfs.namenode.http-address.Machenmaster.m1</name>
<value>172.16.11.221:50070</value>
</property>
<!-- Slave1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.Machenmaster.m2</name>
<value>172.16.11.222:9000</value>
</property>
<!-- Slave1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.Machenmaster.m2</name>
<value>172.16.11.222:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://172.16.11.223:8485;172.16.11.224:8485;172.16.11.225:8485;172.16.11.221:8485;172.16.11.222:8485/Machenmaster</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.Machenmaster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
其他俩个配置文件:
mapper-site.xml 和 yarn-site.xml 和 linux集群中一样即可
代码: (网上摆的),其中路径自己指定
package mapreducetest;
import java.io.IOException;
import java.URI;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {public static class TokenizerMapperextends Mapper<Object, Text, Text, IntWritable> {private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {word.set(itr.nextToken());
context.write(word, one);
}}}public static class IntSumReducerextends Reducer<Text, IntWritable, Text, IntWritable> {private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {int sum = 0;
for (IntWritable val : values) {sum += val.get();
}result.set(sum);
context.write(key, result);
}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();
conf.set("mapred.jar","D:\\project\\HadoopAndHbase\\Hadooptest\\target\\Hadooptest-1.0-SNAPSHOT.jar");
// Path input = new Path("hdfs://192.168.0.26:9000/people");
Path input = new Path(URI.create("hdfs://Machenmaster/test/inputTeacherData.txt"));
Path output = new Path(URI.create("hdfs://Machenmaster/win10_MR_out/out"));
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
System.setProperty("HADOOP_USER_NAME", "hadoop");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
job.waitForCompletion(true);
}
编译准备:
IDEA 对此项目 main class 打包 ;
注意,这是后来我加的代码 ,又编译打包了1次
conf.set("mapred.jar","D:\\project\\HadoopAndHbase\\Hadooptest\\target\\Hadooptest-1.0-SNAPSHOT.jar");
《问题 :为何必须打包??????????,不打包不能直接链接么,有知晓的朋友请留言探讨》
《问题:为何必须指定绝对打包路径????》
百度的解释:
"
经过验证,发现问题原因及解决办法如下:
因为使用的是0.20以上的Hadoop版本,在调用jar中的自定义mapper时,需要设置setJarByClass方法,设置方法如下:
job.setJarByClass(MyJob.class);
其实,在输出日志中也有提示信息:
11/12/11 22:53:03 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
job.setJarByClass(MyJob.class);
运行代码无误
win10 IDEA 链接远程hadoop 2.6 集群
win10 准备:
1。下载 hadoop2.6-CDH5.7.5 解压 //重要说明:CDH 和 普通版本不一样
2。下载 winutils .exe ( hadoop2.6-CDH5.7.5版本 )
3 .将winutils.exe 放入 hadoop2.6/bin 中 ,同时添加环境变量,
4 将登陆用户设置成英文名字,进入计算机管理界面,本地用户,用户,改成英文的例如:hadoop(如果是中文的会报错)
IDEA 准备:
maven 配置:
<repositories>
<repository>
<id>nexus-aliyun</id>
<name>Nexus aliyun</name>
<url>;/url>
</repository>
<repository>
<id>cloudera</id>
<url>/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-cdh5.7.5</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
项目中的 resources 存放 XML文件:
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://Machenmaster</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml:
<property>
<name>dfs.nameservices</name>
<value>Machenmaster</value>
</property>
<!-- Master下面有两个NameNode,分别是Master,Slave1 -->
<property>
<name>dfs.ha.namenodes.Machenmaster</name>
<value>m1,m2</value>
</property>
<!-- Master的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.Machenmaster.m1</name>
<value>172.16.11.221:9000</value>
</property>
<!-- Master的http通信地址 -->
<property>
<name>dfs.namenode.http-address.Machenmaster.m1</name>
<value>172.16.11.221:50070</value>
</property>
<!-- Slave1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.Machenmaster.m2</name>
<value>172.16.11.222:9000</value>
</property>
<!-- Slave1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.Machenmaster.m2</name>
<value>172.16.11.222:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://172.16.11.223:8485;172.16.11.224:8485;172.16.11.225:8485;172.16.11.221:8485;172.16.11.222:8485/Machenmaster</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.Machenmaster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
其他俩个配置文件:
mapper-site.xml 和 yarn-site.xml 和 linux集群中一样即可
代码: (网上摆的),其中路径自己指定
package mapreducetest;
import java.io.IOException;
import java.URI;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {public static class TokenizerMapperextends Mapper<Object, Text, Text, IntWritable> {private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {word.set(itr.nextToken());
context.write(word, one);
}}}public static class IntSumReducerextends Reducer<Text, IntWritable, Text, IntWritable> {private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {int sum = 0;
for (IntWritable val : values) {sum += val.get();
}result.set(sum);
context.write(key, result);
}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();
conf.set("mapred.jar","D:\\project\\HadoopAndHbase\\Hadooptest\\target\\Hadooptest-1.0-SNAPSHOT.jar");
// Path input = new Path("hdfs://192.168.0.26:9000/people");
Path input = new Path(URI.create("hdfs://Machenmaster/test/inputTeacherData.txt"));
Path output = new Path(URI.create("hdfs://Machenmaster/win10_MR_out/out"));
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
System.setProperty("HADOOP_USER_NAME", "hadoop");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, input);
FileOutputFormat.setOutputPath(job, output);
job.waitForCompletion(true);
}
编译准备:
IDEA 对此项目 main class 打包 ;
注意,这是后来我加的代码 ,又编译打包了1次
conf.set("mapred.jar","D:\\project\\HadoopAndHbase\\Hadooptest\\target\\Hadooptest-1.0-SNAPSHOT.jar");
《问题 :为何必须打包??????????,不打包不能直接链接么,有知晓的朋友请留言探讨》
《问题:为何必须指定绝对打包路径????》
百度的解释:
"
经过验证,发现问题原因及解决办法如下:
因为使用的是0.20以上的Hadoop版本,在调用jar中的自定义mapper时,需要设置setJarByClass方法,设置方法如下:
job.setJarByClass(MyJob.class);
其实,在输出日志中也有提示信息:
11/12/11 22:53:03 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
job.setJarByClass(MyJob.class);
运行代码无误
本文标签: win10 IDEA 链接远程hadoop 26 集群
版权声明:本文标题:win10 IDEA 链接远程hadoop 2.6 集群 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://it.en369.cn/IT/1694657999a254628.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。


发表评论