(1)raini@biyuzhe:~$ gedit .bashrc
#java
export JAVA_HOME=/home/raini/app/jdk1.7.0_79
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:$CLASSPATH
export PATH=${JAVA_HOME}/bin:$JRE_HOME/bin:$PATH
#scala
export SCALA_HOME=/home/raini/app/scala-2.10.6
export PATH=${SCALA_HOME}/bin:$PATH
#spark
export SPARK_HOME=/home/raini/spark1
export PATH=$PATH:$SPARK_HOME/bin:$PATH
# hadoop2.6
export HADOOP_PREFIX=/home/raini/hadoop2
export CLASSPATH=".:$JAVA_HOME/lib:$CLASSPATH"
export PATH="$JAVA_HOME/:$HADOOP_PREFIX/bin:$PATH"
export HADOOP_PREFIX PATH CLASSPATH
(2)raini@biyuzhe:~$ sudo apt-get install rsync
(3)raini@biyuzhe:~$ sudo apt-get install openssh-server
cd ~/.ssh/ # 若没有该目录,请先执行一次ssh localhost
ssh-keygen -t rsa # 会有提示,都按回车就可以
cat id_rsa.pub >> authorized_keys # 加入授权
使用ssh localhost试试能否直接登录
(4)raini@biyuzhe:~$ sudo gedit /etc/hosts
127.0.0.1 localhost
127.0.1.1 biyuzhe
#10.155.243.206 biyuzhe
#有的说这里必须修改,否则后面会遇到连接拒绝等问题
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
(5)修改配置文件:etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/raini/app/jdk
export HADOOP_COMMON_HOME=/home/raini/hadoop
(6)raini@biyuzhe:~$ gedit .bashrc
添加export PATH="/home/raini/hadoop/bin:/home/raini/hadoop/sbin:
如 export PATH="/home/raini/hadoop/bin:/home/raini/hadoop/sbin: $JAVA_HOME/:$HADOOP_PREFIX/bin:$PATH"
(7)修改文件etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/raini/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.proxyuser.master.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.master.groups</name>
<value>*</value>
</property>
</configuration>
(8)修改etc/hadoop/hdfs-site.xml:
<configuration>
<!-- spark 配置主节点名 和 端口号
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>localhost:9001</value>
</property>-->
<!-- 虽然只需要配置 fs.defaultFS 和 dfs.replication 就可以运行(官方教程如此),
不过若没有配置 hadoop.tmp.dir 参数,则默认使用的临时目录为 /tmp/hadoo-hadoop,
由于系统重启后,找不到namenode进程,这是因为这个目录在重启时有可能被系统清理掉,而导致必须重新执行 format 才行,所以加入下面配置。 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/raini/hadoop/tmp/dfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/raini/hadoop/tmp/dfs/datanode</value>
</property>
<!-- 配置副本数 -->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- 不为ture就不能使用 webhdfs 的 LISTSTATUS(liststatus),list-filestatus等需要列出文件 文件状态的命令。因为这些信息都是由 namenode 保存的 -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
(9)修改配置文件mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value> 旧的吧?
</property>
-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>localhost:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>localhost:19888</value>
</property>
</configuration>
(10)修改配置文件yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<!--resourcemanager 的 地址 -->
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:8032</value>
</property>
<!--resourcemanager 调度器端口 -->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:8031</value>
</property>
<!--resourcemanager 管理器端口-->
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:8033</value>
</property>
<!--resourcemanager 的 Web 端口,监控 job 的资源调度 -->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:8088</value>
</property>
</configuration>
(11) raini@biyuzhe:~$ source .bashrc
raini@biyuzhe:~/hadoop$ sbin/start-dfs.sh
Starting namenodes on [localhost]
localhost: starting namenode, logging to /home/raini/app/hadoop-2.7.2/logs/hadoop-raini-namenode-biyuzhe.out
biyuzhe: starting datanode, logging to /home/raini/app/hadoop-2.7.2/logs/hadoop-raini-datanode-biyuzhe.out
Starting secondary namenodes [0.0.0.0]
The authenticity of host '0.0.0.0 (0.0.0.0)' can't be established.
ECDSA key fingerprint is SHA256:7Th7Qu6av5WOqmmVLemv3YN+52LAcHw4BuFBNwBt5DU.
Are you sure you want to continue connecting (yes/no)? yes
0.0.0.0: Warning: Permanently added '0.0.0.0' (ECDSA) to the list of known hosts.
0.0.0.0: starting secondarynamenode, logging to /home/raini/app/hadoop-2.7.2/logs/hadoop-raini-secondarynamenode-biyuzhe.out
raini@biyuzhe:~/hadoop$ jps
14242 Jps
14106 SecondaryNameNode
13922 DataNode------------------(无namenode)
(12) raini@biyuzhe:~/hadoop$ hdfs namenode -format
raini@biyuzhe:~/hadoop$ sbin/stop-dfs.sh
Stopping namenodes on [localhost]
localhost: no namenode to stop
biyuzhe: stopping datanode
Stopping secondary namenodes [0.0.0.0]
0.0.0.0: stopping secondarynamenode
raini@biyuzhe:~/hadoop$ sbin/start-dfs.sh
Starting namenodes on [localhost]
localhost: starting namenode, logging to /home/raini/app/hadoop-2.7.2/logs/hadoop-raini-namenode-biyuzhe.out
biyuzhe: starting datanode, logging to /home/raini/app/hadoop-2.7.2/logs/hadoop-raini-datanode-biyuzhe.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /home/raini/app/hadoop-2.7.2/logs/hadoop-raini-secondarynamenode-biyuzhe.out
raini@biyuzhe:~/hadoop$ jps
14919 NameNode-----------------------(namenode)
15407 Jps
15271 SecondaryNameNode
15073 DataNode
(13)raini@biyuzhe:~/hadoop$ sbin/start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /home/raini/hadoop/logs/yarn-raini-resourcemanager-biyuzhe.out
biyuzhe: starting nodemanager, logging to /home/raini/app/hadoop-2.7.2/logs/yarn-raini-nodemanager-biyuzhe.out
raini@biyuzhe:~/hadoop$ jps
15625 NodeManager
14919 NameNode
15271 SecondaryNameNode
15073 DataNode
15937 Jps
15501 ResourceManager
(14)验证: yarn:http://localhost:8088/
http://localhost:50070
Overview 'localhost:9000' (active)
Started: | Sat Apr 23 14:04:17 CST 2016 | Version: | 2.7.2, rb165c4fe8a74265c792ce23f546c64604acf0e41 | Compiled: | 2016-01-26T00:08Z by jenkins from (detached from b165c4f) | Cluster ID: | CID-b0ad8d51-6ea3-4bfc-a1d8-ee0cbc9a8ff6 | Block Pool ID: | BP-890697487-127.0.1.1-1461391390144 |
|
|