天天看点

jdk1.7+Eclipse+Maven3.5+Hadoop2.7.3构建hadoop项目

之前是用Map/Reduce项目来运行hadoop的,这是直接Run on hadoop的,参见:http://blog.csdn.net/cafebar123/article/details/73611341

下面用java Application 来运行wordcount 程序。

(1)前提条件:

1.eclipse中集成maven插件;

2.eclipse中集成hadoop插件;

3.eclipse中可以运行Map/Reduce项目,并可以得到有效结果。

(2)Maven构建hadoop环境

打开window 命令控制台,输入:

e:
cd E:\eclipse demo\myHadoop
mvn archetype:generate -DarchetypeGroupId=org.apache.maven.archetypes -DgroupId=org.myhadoop.wordcount -DartifactId=myHadoop -DpackageName=org.myhadoop.wordcount -Dversion=1.0-SNAPSHOT -DinteractiveMode=false
           

接着看下载界面,如图:

jdk1.7+Eclipse+Maven3.5+Hadoop2.7.3构建hadoop项目

这样会构建一个java项目。

(3)安装需要的jar包。

cd E:\eclipse demo\myHadoop\myHadoop
mvn clean install
           

(4)将maven构建的java项目导入eclipse中。

(5)在pom.xml 中增加依赖包:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>org.myhadoop.wordcount</groupId>
	<artifactId>myHadoop</artifactId>
	<packaging>jar</packaging>
	<version>1.0-SNAPSHOT</version>
	<properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <hadoop.version>2.7.3</hadoop.version>
    </properties>
	<name>myHadoop</name>
	<url>http://maven.apache.org</url>
	<dependencies>		
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.4</version>
			<scope>test</scope>
		</dependency>
		<dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
	</dependencies>
</project>
           

(6)继续下载jar包:

cd E:\eclipse demo\myHadoop\myHadoop
mvn clean install
           

结果会出现,如图:

jdk1.7+Eclipse+Maven3.5+Hadoop2.7.3构建hadoop项目

(7)从linux服务器中下载hadoop配置文件:

core-site.xml

hdfs-site.xml

mapred-site.xml

新建一个/src/main/resources 目录,目录下面新建一个hadoop文件夹,把配置文件放进这里。

(8)配置本地host文件,

目录:C:\Windows\System32\drivers\etc

把集群的master主机ip加进去,

119.29.174.43 master

重启电脑。

(9)写一个wordcount程序,代码如下:

package org.myhadoop.wordcount;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

/** 
* @ClassName: WordCount2 
* @Description: TODO 
* @author [email protected]
* @date 2017年6月23日
*  
*/
public class WordCount2 {

	public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {

		private final static IntWritable one = new IntWritable(1);
		private Text word = new Text();

		public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
			StringTokenizer itr = new StringTokenizer(value.toString());
			while (itr.hasMoreTokens()) {
				word.set(itr.nextToken());
				context.write(word, one);
			}
		}
	}

	public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
		private IntWritable result = new IntWritable();

		public void reduce(Text key, Iterable<IntWritable> values, Context context)
				throws IOException, InterruptedException {
			int sum = 0;
			for (IntWritable val : values) {
				sum += val.get();
			}
			result.set(sum);
			context.write(key, result);
		}
	}

	public static void main(String[] args) throws Exception {		
		Configuration conf = new Configuration();

		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		if (otherArgs.length != 2) {
			System.err.println(otherArgs.length);
			System.err.println("Usage: wordcount <in> <out>");
			System.exit(2);
		}
		Job job = new Job(conf, "word count");
		job.setJarByClass(WordCount2.class);
		job.setMapperClass(TokenizerMapper.class);
		job.setCombinerClass(IntSumReducer.class);
		job.setReducerClass(IntSumReducer.class);
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
		System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
}
           

(10)配置run configuration arguments

参考:http://blog.csdn.net/cafebar123/article/details/73611341

(11)run java application

效果:

jdk1.7+Eclipse+Maven3.5+Hadoop2.7.3构建hadoop项目

(12)问题汇总。

1)

log4j:WARN No appenders could be found for logger (org.apache.hadoop.metrics2.lib.MutableMetricsFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Exception in thread "main" org.apache.hadoop.security.AccessControlException: Permission denied: user=zy, access=EXECUTE, inode="/tmp/hadoop-yarn/staging/zy/.staging":root:supergroup:drwx------
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:319)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkTraverse(FSPermissionChecker.java:259)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:205)
	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:190)
	at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1728)
	at org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.getFileInfo(FSDirStatAndListingOp.java:108)
	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getFileInfo(FSNamesystem.java:3857)
	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.getFileInfo(NameNodeRpcServer.java:1012)
	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.getFileInfo(ClientNamenodeProtocolServerSideTranslatorPB.java:843)
	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:616)
	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:982)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2049)
	at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2045)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:415)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2043)

	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(Unknown Source)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(Unknown Source)
	at java.lang.reflect.Constructor.newInstance(Unknown Source)
	at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
	at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
	at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:2110)
	at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1305)
	at org.apache.hadoop.hdfs.DistributedFileSystem$22.doCall(DistributedFileSystem.java:1301)
	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
	at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1317)
	at org.apache.hadoop.fs.FileSystem.exists(FileSystem.java:1426)
	at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:116)
	at org.apache.hadoop.mapreduce.JobSubmitter.submitJobInternal(JobSubmitter.java:144)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1290)
	at org.apache.hadoop.mapreduce.Job$10.run(Job.java:1287)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Unknown Source)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
	at org.apache.hadoop.mapreduce.Job.submit(Job.java:1287)
	at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:575)
	at org.apache.hadoop.mapred.JobClient$1.run(JobClient.java:570)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Unknown Source)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
	at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:570)
	at org.apache.hadoop.mapred.JobClient.submitJob(JobClient.java:561)
	at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:870)
	at org.conan.myhadoop.mr.WordCount.main(WordCount.java:74)
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=zy, access=EXECUTE, inode="/tmp/hadoop-yarn/staging/zy/.staging":root:supergroup:drwx------
           

hdfs文件权限问题,解决方法:

1.

export HADOOP_USER_NAME=zy
           

2.

./bin/hdfs dfs -chmod -R 777 /user/hadoop
           

3.修改hdfs-site.xml文件

<property>
  <name>dfs.permissions</name>
  <value>false</value>
  <description></description>
</property>
           

2)

chmod: changing permissions of '/user/hadoop/miqiLog10000Input': Permission denied. user=zy is not the owner of inode=miqiLog10000Input
           

这个目录之前是由root创建的,因此,user=zy不具有修改权限.

可以授权获得权限:

./bin/hdfs dfs -chown zy /user/Hadoop
           

继续阅读