本帖最后由 hyj 于 2014-7-7 14:08 编辑
问题导读:
1.hadoop升级需要做哪些工作? 2.如何升级? 3.升级后能否搭建ha?
参考文档:
百度文库
Hadoop1.x集群升级到Hadoop2.x指导及需要注意的问题
---------------------------------------------------------------------------------------------------------------------
-- 升级前(hadoop 1.0.3 )
192.168.114.53 -- namenode
192.168.117.148 -- datanode、secondnamenode
192.168.117.32 -- datanode
192.168.117.62 -- datanode
-- 升级后:
192.168.114.52 -- namenode
192.168.114.53 -- namenode
192.168.117.148 -- datanode
192.168.117.32 -- datanode
192.168.117.62 -- datanode
192.168.117.149 他用(hbase、hive、ZK)
-- 在datanode的hadoop用户下创建如下目录:
mkdir -p /hadoop-disk1/mapred/local
mkdir -p /hadoop-disk2/mapred/local
mkdir -p /hadoop-disk3/mapred/local
mkdir -p /hadoop-disk4/mapred/local
mkdir -p /hadoop-disk5/mapred/local
mkdir -p /hadoop-disk6/mapred/local
mkdir -p /hadoop-disk7/mapred/local
mkdir -p /hadoop-disk8/mapred/local
mkdir -p /hadoop-disk9/mapred/local
mkdir -p /hadoop-disk10/mapred/local
mkdir -p /hadoop-disk11/mapred/local
mkdir -p /hadoop-disk12/mapred/local
mkdir -p /hadoop-disk1/hdfs
mkdir -p /hadoop-disk2/hdfs
mkdir -p /hadoop-disk3/hdfs
mkdir -p /hadoop-disk4/hdfs
mkdir -p /hadoop-disk5/hdfs
mkdir -p /hadoop-disk6/hdfs
mkdir -p /hadoop-disk7/hdfs
mkdir -p /hadoop-disk8/hdfs
mkdir -p /hadoop-disk9/hdfs
mkdir -p /hadoop-disk10/hdfs
mkdir -p /hadoop-disk11/hdfs
mkdir -p /hadoop-disk12/hdfs
-- 在namenode(53)的hadoop用户下创建如下目录:
mkdir -p /hadoop-disk1/hdfs/name
mkdir -p /hadoop-disk2/hdfs/name
mkdir -p /hadoop-disk3/hdfs/name
-- 在secondnamenode(148)的hadoop用户下创建如下目录:
mkdir -p /hadoop-disk1/hdfs/tmp_namesecondary
mkdir -p /hadoop-disk2/hdfs/tmp_namesecondary
mkdir -p /hadoop-disk3/hdfs/tmp_namesecondary
-- 在所有节点创建如下目录:
mkdir -p /home/hadoop/mapred/system -- 这个目录好像是hdfs的目录
mkdir -p /hadoop-disk6/hadoop/logs
mkdir -p /opt/hadoopgpl/native/Linux-amd64-64
export HADOOP_LOG_DIR=/hadoop-disk6/hadoop/logs
-- hadoop namenode -format
-- 1、目录相关参数:
-- 1.1 core-site.xml
hadoop.tmp.dir /home/hadoop/tmp -- 临时目录,其他临时目录的父目录
fs.trash.root /home/hadoop/tmp/Trash
-- 1.2 hdfs-site.xml
dfs.name.dir /hadoop-disk[1-3]/hdfs/name
dfs.data.dir /hadoop-disk[1-12]/hdfs/data
fs.checkpoint.dir /hadoop-disk[1-3]/hdfs/tmp_namesecondary
-- 1.3 mapred-site.xml
mapred.local.dir /hadoop-disk[1-12]/mapred/local
mapred.system.dir /home/hadoop/mapred/system
rm -rf /home/hadoop/tmp/*
rm -rf /hadoop-disk1/hdfs/*
rm -rf /hadoop-disk2/hdfs/*
rm -rf /hadoop-disk3/hdfs/*
rm -rf /hadoop-disk4/hdfs/*
rm -rf /hadoop-disk5/hdfs/*
rm -rf /hadoop-disk6/hdfs/*
rm -rf /hadoop-disk7/hdfs/*
rm -rf /hadoop-disk8/hdfs/*
rm -rf /hadoop-disk9/hdfs/*
rm -rf /hadoop-disk10/hdfs/*
rm -rf /hadoop-disk11/hdfs/*
rm -rf /hadoop-disk12/hdfs/*
rm -rf /hadoop-disk1/mapred/*
rm -rf /hadoop-disk2/mapred/*
rm -rf /hadoop-disk3/mapred/*
rm -rf /hadoop-disk4/mapred/*
rm -rf /hadoop-disk5/mapred/*
rm -rf /hadoop-disk6/mapred/*
rm -rf /hadoop-disk7/mapred/*
rm -rf /hadoop-disk8/mapred/*
rm -rf /hadoop-disk9/mapred/*
rm -rf /hadoop-disk10/mapred/*
rm -rf /hadoop-disk11/mapred/*
rm -rf /hadoop-disk12/mapred/*
rm -rf /home/hadoop/mapred/system/*
------------------------------------------------------------------------------------
-- 在namenode节点执行如下命令格式化hadoop:
hadoop namenode -format
cd $HADOOP_HOME/bin
nohup sh /home/hadoop/datateam/ghh/lab/log_catch_hour_lzo.sh 'http://192.168.116.61:8081/website/pv/2' '/dw/logs/web/origin/pv/2' 20140627 20140627 0 10 &
------------------------------------------------------------------------------------
-- 开始升级:
------------------------------------------------------------------------------------
-- Step 1、停止所有线上业务。用现有版本进入保护模式,检查元数据块:
hadoop dfsadmin -safemode enter
hadoop fsck / -files -blocks -locations > /home/hadoop/dfs-v-old-fsck-1.log
-- 保存上面步骤生成的日志到本地。
tail -n 400 /home/hadoop/dfs-v-old-fsck-1.log
-- dfs-v-old-fsck-1.log 文件最后打印类似如下(Corrupt blocks应该为0,才健康):
/home <dir>
/home/hadoop <dir>
/home/hadoop/mapred <dir>
/home/hadoop/mapred/system <dir>
/home/hadoop/mapred/system/jobtracker.info 4 bytes, 1 block(s): OK
0. blk_1448192668627741947_1001 len=4 repl=2 [192.168.117.62:50011, 192.168.117.148:50011]
Status: HEALTHY
Total size: 3143709511 B
Total dirs: 25
Total files: 45
Total blocks (validated): 47 (avg. block size 66887436 B)
Minimally replicated blocks: 47 (100.0 %)
Over-replicated blocks: 0 (0.0 %)
Under-replicated blocks: 0 (0.0 %)
Mis-replicated blocks: 0 (0.0 %)
Default replication factor: 2
Average block replication: 2.0
Corrupt blocks: 0
Missing replicas: 0 (0.0 %)
Number of data-nodes: 3
Number of racks: 1
FSCK ended at Mon Jun 30 18:32:12 CST 2014 in 42 milliseconds
The filesystem under path '/' is HEALTHY
/home <dir>
/home/hadoop <dir>
/home/hadoop/mapred <dir>
/home/hadoop/mapred/system <dir>
/home/hadoop/mapred/system/jobtracker.info 4 bytes, 1 block(s): OK
0. BP-377498577-192.168.114.53-1404124653047:blk_1448192668627741947_1001 len=4 repl=2 [192.168.117.148:50010, 192.168.117.62:50010]
Status: HEALTHY
Total size: 3143709511 B
Total dirs: 25
Total files: 45
Total symlinks: 0
Total blocks (validated): 47 (avg. block size 66887436 B)
Minimally replicated blocks: 47 (100.0 %)
Over-replicated blocks: 0 (0.0 %)
Under-replicated blocks: 0 (0.0 %)
Mis-replicated blocks: 0 (0.0 %)
Default replication factor: 2
Average block replication: 2.0
Corrupt blocks: 0
Missing replicas: 0 (0.0 %)
Number of data-nodes: 3
Number of racks: 1
FSCK ended at Mon Jun 30 18:45:10 CST 2014 in 65 milliseconds
The filesystem under path '/' is HEALTHY
------------------------------------------------------------------------------------
-- Step 2、停掉现有集群、修改环境变量(包括新的JAVA_HOME、HADOOP_HOME等,包括一些连接文件指向的修改)、并创建新版本配置文件相关的目录
$HADOOP_HOME/bin/stop-all.sh
-- 创建yarn日志、任务相关目录:
mkdir -p /data/hadoop/logs/yarn_local
mkdir -p /data/hadoop/logs/yarn_log
mkdir -p /data/hadoop/logs/yarn_remotelog
mkdir -p /data/hadoop/logs/yarn_userstag
mkdir -p /data/hadoop/logs/yarn_intermediatedone
mkdir -p /data/hadoop/logs/dfs/yarn_done
mkdir -p /data/hadoop/tmp
mkdir -p /usr/local/hadoop/logs
chown -R hadoop.hadoop /data/hadoop
chown -R hadoop.hadoop /usr/local/hadoop/logs
ls -l /data/hadoop/logs/
ls -l /data/hadoop/
-- 重装软件:
-- 1. hadoop
rm -rf /usr/local/hadoop
ln -s /usr/local/hadoop-2.4.0 /usr/local/hadoop
ls -l /usr/local
-- 2. java
rm -rf /usr/java/latest
ln -s /usr/java/jdk1.7.0_51 /usr/java/latest
ls -l /usr/java/
-- 修改环境变量:(修改前,最好先备份一下)
vi ~/.bash_profile
rm -rf ~/.bash_profile
cp ~/.bash_profile.new ~/.bash_profile
-- 撤销命令:cp ~/.bash_profile.old ~/.bash_profile
-- 同步环境变量到其他节点(注意有其他应用的节点,例如:hbase)
scp -P5044 ~/.bash_profile hadoop@funshion-hadoop148:~/
export JAVA_HOME=/usr/java/latest
export PATH=$PATH:$HOME/bin:$JAVA_HOME/bin
export HADOOP_INSTALL=/usr/local/hadoop
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_DEV_HOME=/usr/local/hadoop
export HADOOP_PREFIX=/usr/local/hadoop
export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native
# export HIVE_HOME=/usr/local/hive
# export HBASE_HOME=/usr/local/hbase
# export ZK_HOME=/usr/local/zookeeper
export PATH=$PATH:$HADOOP_DEV_HOME/bin
export PATH=$PATH:$HADOOP_DEV_HOME/sbin
# export PATH=$PATH:$HIVE_HOME/bin
# export PATH=$PATH:$HBASE_HOME/bin
# export PATH=$PATH:$ZK_HOME/bin
export HADOOP_MAPARED_HOME=${HADOOP_DEV_HOME}
export HADOOP_COMMON_HOME=${HADOOP_DEV_HOME}
export HADOOP_HDFS_HOME=${HADOOP_DEV_HOME}
export YARN_HOME=${HADOOP_DEV_HOME}
export HADOOP_YARN_HOME=${HADOOP_DEV_HOME}
export HADOOP_CLIENT_CONF_DIR=${HADOOP_DEV_HOME}/etc/hadoop
export HADOOP_CONF_DIR=${HADOOP_DEV_HOME}/etc/hadoop
export HDFS_CONF_DIR=${HADOOP_DEV_HOME}/etc/hadoop
export YARN_CONF_DIR=${HADOOP_DEV_HOME}/etc/hadoop
export CLASSPATH=".:$JAVA_HOME/lib:$CLASSPATH"
export PATH="$JAVA_HOME/:$HADOOP_PREFIX/bin:$PATH"
# Native Path
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_PREFIX}/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_PREFIX/lib/native"
# SET HADOOP_CLASSPATH
for file in `ls $HADOOP_HOME/share/hadoop/common/lib/*jar`
do
HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$file
done
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HADOOP_HOME/lib/native/hadoop-lzo-0.4.20-SNAPSHOT.jar
# SET JAVA_LIBRARY_PATH
for file in `ls $JAVA_HOME/lib/*jar`
do
JAVA_LIBRARY_PATH=$JAVA_LIBRARY_PATH:$file
done
export JAVA_LIBRARY_PATH
-- 核对$HADOOP_HOME/etc/hadoop/slaves文件是否正确指定所有的datanode节点(若修改的话,记得同步到所有其他节点),
-- 最好核对一下每个节点的当前使用java的版本
-- 一切准备就绪后,开始升级(升级前,先重新加载一下hadoop用户的环境变量:source ~/.bash_profile):
cd $HADOOP_HOME/
sbin/start-dfs.sh -upgrade
-- 升级命令:
./start-dfs.sh
$HADOOP_HOME/sbin/hadoop-daemon.sh start namenode -upgrade
-- 回滚命令:
$HADOOP_HOME/sbin/hadoop-daemon.sh start namenode -rollback
rm -rf /usr/local/hadoop
ln -s /usr/local/hadoop-1.0.3 /usr/local/hadoop
ls -l /usr/local
-- 提交升级:
hdfs dfsadmin -finalizeUpgrade
scp -P5044 ~/.bash_profile hadoop@funshion-hadoop52:~/
scp -P5044 ~/.bash_profile hadoop@funshion-hadoop32:~/
scp -P5044 ~/.bash_profile hadoop@funshion-hadoop62:~/
scp -P5044 ~/.bash_profile hadoop@funshion-hadoop148:~/
scp -P5044 -r $HADOOP_HOME/etc/hadoop/* hadoop@funshion-hadoop148:$HADOOP_HOME/etc/hadoop/
scp -P5044 -r $HADOOP_HOME/etc/hadoop/* hadoop@funshion-hadoop32:$HADOOP_HOME/etc/hadoop/
scp -P5044 -r $HADOOP_HOME/etc/hadoop/* hadoop@funshion-hadoop62:$HADOOP_HOME/etc/hadoop/
org.apache.hadoop.hdfs.server.common.InconsistentFSStateException:
Directory /hadoop-disk1/hdfs/name is in an inconsistent state: previous fs state should not exist during upgrade. Finalize or rollback first.
STARTUP_MSG: build = Unknown -r Unknown; compiled by 'root' on 2014-06-20T09:54Z
STARTUP_MSG: java = 1.7.0_51
************************************************************/
2014-06-30 11:06:21,843 INFO org.apache.hadoop.hdfs.server.namenode.NameNode: registered UNIX signal handlers for [TERM, HUP, INT]
2014-06-30 11:06:21,848 INFO org.apache.hadoop.hdfs.server.namenode.NameNode: createNameNode []
2014-06-30 11:06:22,296 INFO org.apache.hadoop.metrics2.impl.MetricsConfig: loaded properties from hadoop-metrics2.properties
2014-06-30 11:06:22,431 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Scheduled snapshot period at 10 second(s).
2014-06-30 11:06:22,432 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: NameNode metrics system started
2014-06-30 11:06:22,591 FATAL org.apache.hadoop.hdfs.server.namenode.NameNode: Exception in namenode join
java.lang.RuntimeException: java.lang.reflect.InvocationTargetException
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:131)
at org.apache.hadoop.security.Groups.<init>(Groups.java:64)
at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:240)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:255)
at org.apache.hadoop.security.UserGroupInformation.setConfiguration(UserGroupInformation.java:283)
at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:502)
at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:670)
at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:655)
at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1304)
at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1370)
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:129)
... 9 more
Caused by: java.lang.UnsatisfiedLinkError: org.apache.hadoop.security.JniBasedUnixGroupsMapping.anchorNative()V
at org.apache.hadoop.security.JniBasedUnixGroupsMapping.anchorNative(Native Method)
at org.apache.hadoop.security.JniBasedUnixGroupsMapping.<clinit>(JniBasedUnixGroupsMapping.java:49)
at org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback.<init>(JniBasedUnixGroupsMappingWithFallback.java:38)
... 14 more
2014-06-30 11:06:22,596 INFO org.apache.hadoop.util.ExitUtil: Exiting with status 1
2014-06-30 11:06:22,597 INFO org.apache.hadoop.hdfs.server.namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at funshion-hadoop53/192.168.114.53
************************************************************/
-- ############################################################################## --
------------------------------------------------------------------------------------
-- 升级后,搭建HA环境:
-- 总共5台服务器,如下:
------------------------------------------------------------------------------------------------------------------
| IP地址 | 主机名 | NameNode | JournalNode | DataNode | Zookeeper | Hbase | Hive |
------------------------------------------------------------------------------------------------------------------
| 192.168.114.52 | funshion-hadoop52 | 是 | 否 | 否 | 否 | 是 | 否 |
------------------------------------------------------------------------------------------------------------------
| 192.168.114.53 | funshion-hadoop53 | 是 | 否 | 否 | 否 | 是 | 否 |
------------------------------------------------------------------------------------------------------------------
| 192.168.117.148 | funshion-hadoop148 | 否 | 是 | 是 | 是 | 是(Master)| 是(Mysql) |
------------------------------------------------------------------------------------------------------------------
| 192.168.117.32 | funshion-hadoop32 | 否 | 是 | 是 | 是 | 是 | 否 |
------------------------------------------------------------------------------------------------------------------
| 192.168.117.62 | funshion-hadoop62 | 否 | 是 | 是 | 是 | 是 | 否 |
------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------
-- Step 1. 搭建Zookeeper(在每个节点上,以root用户顺序执行如下命令)
-- 注意:在执行如下命令之前,记得先在每个节点将zookeeper-3.4.6.tar.gz安装包放到/data/software目录下
cd /data/software/
tar -xvf zookeeper-3.4.6.tar.gz
mv zookeeper-3.4.6 /usr/local/
chown -R hadoop.hadoop /usr/local/zookeeper-3.4.6
cd /usr/local
ln -s zookeeper-3.4.6 zookeeper
ls -l |grep zoo
------------------------------------------------------------------------------------
-- Step 2. 在53机器上配置zoo.cfg文件,添加如下内容(记得屏蔽之前的dataDir设置):
cd /usr/local/zookeeper/conf
cp zoo_sample.cfg zoo.cfg
vi zoo.cfg
---------------
dataDir=/usr/local/zookeeper/var/data
dataLogDir=/usr/local/zookeeper/var/datalog
server.1=funshion-hadoop148:2888:3888
server.2=funshion-hadoop32:2888:3888
server.3=funshion-hadoop62:2888:3888
------------------------------------------------------------------------------------
-- Step 3. 同步zoo.cfg到其他节点,并在每个节点创建相关目录
scp -P5044 /usr/local/zookeeper/conf/zoo.cfg hadoop@funshion-hadoop32:/usr/local/zookeeper/conf/
scp -P5044 /usr/local/zookeeper/conf/zoo.cfg hadoop@funshion-hadoop62:/usr/local/zookeeper/conf/
-- 以hadoop用户在5个节点创建以下两个目录:
mkdir -p /usr/local/zookeeper/var/data
mkdir -p /usr/local/zookeeper/var/datalog
-- 在5个节点的/usr/local/zookeeper/var/data路径下创建myid文件,并分别输入数据1、2、3、4、5。
vi /usr/local/zookeeper/var/data/myid
------------------------------------------------------------------------------------
-- Step 4. 启动Zookeeper(在5个节点分别执行):
/usr/local/zookeeper/bin/zkServer.sh start
-- 查看启动是否成功(只需要在一个节点执行如下命令)
[hadoop@funshion-hadoop52 conf]$ /usr/local/zookeeper/bin/zkServer.sh status
JMX enabled by default
Using config: /usr/local/zookeeper/bin/../conf/zoo.cfg
Mode: follower
-- 看到 follower或master字样,表示Zookeeper启动成功。
------------------------------------------------------------------------------------
-- Step 5.
-- 5.1 将现有的$HADOP_HOME/etc/hadoop目录copy为$HADOOP_HOME/etc/hadoop_no_ha.ok (表示非HA模式的配置文件)
-- 5.1 将现有的$HADOP_HOME/etc/hadoop目录copy为$HADOOP_HOME/etc/hadoop_is_ha.ok (表示是HA模式的配置文件)
-- 并编辑$HADOOP_HOME/etc/hadoop_is_ha.ok目录下的相关配置文件,设置HA相关参数。
-- 以hadoop用户分别在5个节点创建如下目录(hdfs-site.xml文件中参数dfs.journalnode.edits.dir指定的路径):
mkdir -p /data/hadoop/hdfs/journal
-- 暂时不要用HA相关配置。
------------------------------------------------------------------------------------
-- Step 6. 给另外添加的一个namenode(funshion-hadoop52添加互信互访)
-- (该步骤最好升级前就做好,以节约开始升级的时间)
------------------------------------------------------------------------------------
-- Step 6.
-- 同步本地库、lzo、lzop相关文件
-- funshion-hadoop53上执行:
scp -P5044 -r /usr/local/hadoop/lzo* hadoop@funshion-hadoop52:/usr/local/hadoop
scp -P5044 -r /usr/local/hadoop/lzo* hadoop@funshion-hadoop32:/usr/local/hadoop
scp -P5044 -r /usr/local/hadoop/lzo* hadoop@funshion-hadoop62:/usr/local/hadoop
scp -P5044 -r /usr/local/hadoop/lzo* hadoop@funshion-hadoop148:/usr/local/hadoop
scp -P5044 -r /usr/local/hadoop/lib/native/* hadoop@funshion-hadoop52:/usr/local/hadoop/lib/native
scp -P5044 -r /usr/local/hadoop/lib/native/* hadoop@funshion-hadoop32:/usr/local/hadoop/lib/native
scp -P5044 -r /usr/local/hadoop/lib/native/* hadoop@funshion-hadoop62:/usr/local/hadoop/lib/native
scp -P5044 -r /usr/local/hadoop/lib/native/* hadoop@funshion-hadoop148:/usr/local/hadoop/lib/native
------------------------------------------------------------------------------------
-- Step 7. 用原来的配置文件关闭hadoop集群:
scp -P5044 -r /usr/local/hadoop/etc/hadoop_no_ha.ok/* hadoop@funshion-hadoop53:/usr/local/hadoop/etc/hadoop/
scp -P5044 -r /usr/local/hadoop/etc/hadoop_no_ha.ok/* hadoop@funshion-hadoop148:/usr/local/hadoop/etc/hadoop/
scp -P5044 -r /usr/local/hadoop/etc/hadoop_no_ha.ok/* hadoop@funshion-hadoop32:/usr/local/hadoop/etc/hadoop/
scp -P5044 -r /usr/local/hadoop/etc/hadoop_no_ha.ok/* hadoop@funshion-hadoop62:/usr/local/hadoop/etc/hadoop/
[hadoop@funshion-hadoop53 sbin]$ ./stop-all.sh
-- 关闭集群后,用新的HA的配置文件准备就绪启动集群(可以先检查一下各节点的进程情况,以确认每个节点是否已经正常关闭):
scp -P5044 -r /usr/local/hadoop/etc/hadoop_is_ha.ok/* hadoop@funshion-hadoop52:/usr/local/hadoop/etc/hadoop/
scp -P5044 -r /usr/local/hadoop/etc/hadoop_is_ha.ok/* hadoop@funshion-hadoop53:/usr/local/hadoop/etc/hadoop/
scp -P5044 -r /usr/local/hadoop/etc/hadoop_is_ha.ok/* hadoop@funshion-hadoop148:/usr/local/hadoop/etc/hadoop/
scp -P5044 -r /usr/local/hadoop/etc/hadoop_is_ha.ok/* hadoop@funshion-hadoop32:/usr/local/hadoop/etc/hadoop/
scp -P5044 -r /usr/local/hadoop/etc/hadoop_is_ha.ok/* hadoop@funshion-hadoop62:/usr/local/hadoop/etc/hadoop/
-- 将其配置文件同步到其他节点后,记得修改另外一个namenode节点(funshion-hadoop52)的 yarn-site.xml文件的yarn.resourcemanager.ha.id属性值。
-- 7.1 在原来的namenode节点(funshion-hadoop53)执行如下命令,创建命名空间
[hadoop@funshion-hadoop53 bin]$ cd $HADOOP_HOME
[hadoop@funshion-hadoop53 hadoop]$ ./bin/hdfs zkfc -formatZK
-- 7.2 启动JournalNode进程(在5个节点执行)
$HADOOP_HOME/sbin/hadoop-daemon.sh start journalnode
-- 7.3 然后执行下列命令初始化共享存储
bin/hdfs namenode -initializeSharedEdits
-- 上面命令的输出类似如下:
STARTUP_MSG: build = Unknown -r Unknown; compiled by 'root' on 2014-06-20T09:54Z
STARTUP_MSG: java = 1.7.0_51
************************************************************/
14/07/01 15:06:16 INFO namenode.NameNode: registered UNIX signal handlers for [TERM, HUP, INT]
14/07/01 15:06:16 INFO namenode.NameNode: createNameNode [-initializeSharedEdits]
14/07/01 15:06:16 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
14/07/01 15:06:17 INFO namenode.FSNamesystem: fsLock is fair:true
14/07/01 15:06:17 INFO namenode.HostFileManager: read includes:
HostSet(
)
14/07/01 15:06:17 INFO namenode.HostFileManager: read excludes:
HostSet(
)
14/07/01 15:06:17 INFO blockmanagement.DatanodeManager: dfs.block.invalidate.limit=1000
14/07/01 15:06:17 INFO blockmanagement.DatanodeManager: dfs.namenode.datanode.registration.ip-hostname-check=true
14/07/01 15:06:17 INFO util.GSet: Computing capacity for map BlocksMap
14/07/01 15:06:17 INFO util.GSet: VM type = 64-bit
14/07/01 15:06:17 INFO util.GSet: 2.0% max memory 3.6 GB = 72.8 MB
14/07/01 15:06:17 INFO util.GSet: capacity = 2^23 = 8388608 entries
14/07/01 15:06:17 INFO blockmanagement.BlockManager: dfs.block.access.token.enable=true
14/07/01 15:06:17 INFO blockmanagement.BlockManager: dfs.block.access.key.update.interval=600 min(s), dfs.block.access.token.lifetime=600 min(s),
dfs.encrypt.data.transfer.algorithm=null
14/07/01 15:06:17 INFO blockmanagement.BlockManager: defaultReplication = 2
14/07/01 15:06:17 INFO blockmanagement.BlockManager: maxReplication = 512
14/07/01 15:06:17 INFO blockmanagement.BlockManager: minReplication = 1
14/07/01 15:06:17 INFO blockmanagement.BlockManager: maxReplicationStreams = 2
14/07/01 15:06:17 INFO blockmanagement.BlockManager: shouldCheckForEnoughRacks = false
14/07/01 15:06:17 INFO blockmanagement.BlockManager: replicationRecheckInterval = 3000
14/07/01 15:06:17 INFO blockmanagement.BlockManager: encryptDataTransfer = false
14/07/01 15:06:17 INFO blockmanagement.BlockManager: maxNumBlocksToLog = 1000
14/07/01 15:06:17 INFO namenode.FSNamesystem: fsOwner = hadoop (auth:SIMPLE)
14/07/01 15:06:17 INFO namenode.FSNamesystem: supergroup = supergroup
14/07/01 15:06:17 INFO namenode.FSNamesystem: isPermissionEnabled = true
14/07/01 15:06:17 INFO namenode.FSNamesystem: Determined nameservice ID: mycluster
14/07/01 15:06:17 INFO namenode.FSNamesystem: HA Enabled: true
14/07/01 15:06:17 INFO namenode.FSNamesystem: Append Enabled: true
14/07/01 15:06:17 INFO util.GSet: Computing capacity for map INodeMap
14/07/01 15:06:17 INFO util.GSet: VM type = 64-bit
14/07/01 15:06:17 INFO util.GSet: 1.0% max memory 3.6 GB = 36.4 MB
14/07/01 15:06:17 INFO util.GSet: capacity = 2^22 = 4194304 entries
14/07/01 15:06:17 INFO namenode.NameNode: Caching file names occuring more than 10 times
14/07/01 15:06:17 INFO util.GSet: Computing capacity for map cachedBlocks
14/07/01 15:06:17 INFO util.GSet: VM type = 64-bit
14/07/01 15:06:17 INFO util.GSet: 0.25% max memory 3.6 GB = 9.1 MB
14/07/01 15:06:17 INFO util.GSet: capacity = 2^20 = 1048576 entries
14/07/01 15:06:17 INFO namenode.FSNamesystem: dfs.namenode.safemode.threshold-pct = 0.9990000128746033
14/07/01 15:06:17 INFO namenode.FSNamesystem: dfs.namenode.safemode.min.datanodes = 0
14/07/01 15:06:17 INFO namenode.FSNamesystem: dfs.namenode.safemode.extension = 30000
14/07/01 15:06:17 INFO namenode.FSNamesystem: Retry cache on namenode is enabled
14/07/01 15:06:17 INFO namenode.FSNamesystem: Retry cache will use 0.03 of total heap and retry cache entry expiry time is 600000 millis
14/07/01 15:06:17 INFO util.GSet: Computing capacity for map NameNodeRetryCache
14/07/01 15:06:17 INFO util.GSet: VM type = 64-bit
14/07/01 15:06:17 INFO util.GSet: 0.029999999329447746% max memory 3.6 GB = 1.1 MB
14/07/01 15:06:17 INFO util.GSet: capacity = 2^17 = 131072 entries
14/07/01 15:06:17 INFO namenode.AclConfigFlag: ACLs enabled? false
14/07/01 15:06:17 INFO common.Storage: Lock on /hadoop-disk1/hdfs/name/in_use.lock acquired by nodename 14555@funshion-hadoop53
14/07/01 15:06:17 INFO common.Storage: Lock on /hadoop-disk2/hdfs/name/in_use.lock acquired by nodename 14555@funshion-hadoop53
14/07/01 15:06:17 INFO common.Storage: Lock on /hadoop-disk3/hdfs/name/in_use.lock acquired by nodename 14555@funshion-hadoop53
14/07/01 15:06:17 INFO namenode.FSImage: No edit log streams selected.
14/07/01 15:06:17 INFO namenode.FSImageFormatPBINode: Loading 70 INodes.
14/07/01 15:06:17 INFO namenode.FSImageFormatProtobuf: Loaded FSImage in 0 seconds.
14/07/01 15:06:17 INFO namenode.FSImage: Loaded image for txid 44 from /hadoop-disk1/hdfs/name/current/fsimage_0000000000000000044
14/07/01 15:06:17 INFO namenode.FSNamesystem: Need to save fs image? false (staleImage=true, haEnabled=true, isRollingUpgrade=false)
14/07/01 15:06:17 INFO namenode.NameCache: initialized with 0 entries 0 lookups
14/07/01 15:06:17 INFO namenode.FSNamesystem: Finished loading FSImage in 285 msecs
14/07/01 15:06:19 INFO namenode.FileJournalManager: Recovering unfinalized segments in /hadoop-disk1/hdfs/name/current
14/07/01 15:06:19 INFO namenode.FileJournalManager: Finalizing edits file /hadoop-disk1/hdfs/name/current/edits_inprogress_0000000000000000045 -> /hadoop-
disk1/hdfs/name/current/edits_0000000000000000045-0000000000000000045
14/07/01 15:06:19 INFO namenode.FileJournalManager: Recovering unfinalized segments in /hadoop-disk2/hdfs/name/current
14/07/01 15:06:19 INFO namenode.FileJournalManager: Finalizing edits file /hadoop-disk2/hdfs/name/current/edits_inprogress_0000000000000000045 -> /hadoop-
disk2/hdfs/name/current/edits_0000000000000000045-0000000000000000045
14/07/01 15:06:19 INFO namenode.FileJournalManager: Recovering unfinalized segments in /hadoop-disk3/hdfs/name/current
14/07/01 15:06:19 INFO namenode.FileJournalManager: Finalizing edits file /hadoop-disk3/hdfs/name/current/edits_inprogress_0000000000000000045 -> /hadoop-
disk3/hdfs/name/current/edits_0000000000000000045-0000000000000000045
14/07/01 15:06:19 INFO client.QuorumJournalManager: Starting recovery process for unclosed journal segments...
14/07/01 15:06:19 INFO client.QuorumJournalManager: Successfully started new epoch 1
14/07/01 15:06:19 INFO namenode.EditLogInputStream: Fast-forwarding stream '/hadoop-disk1/hdfs/name/current/edits_0000000000000000045-0000000000000000045' to
transaction ID 45
14/07/01 15:06:19 INFO namenode.FSEditLog: Starting log segment at 45
14/07/01 15:06:19 INFO namenode.FSEditLog: Ending log segment 45
14/07/01 15:06:19 INFO namenode.FSEditLog: Number of transactions: 1 Total time for transactions(ms): 1 Number of transactions batched in Syncs: 0 Number of syncs: 1
SyncTimes(ms): 25
14/07/01 15:06:19 INFO util.ExitUtil: Exiting with status 0
14/07/01 15:06:19 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at funshion-hadoop53/192.168.114.53
************************************************************/
-- 7.3 在原有namenode(funshion-hadoop53)启动namenode进程:
[hadoop@funshion-hadoop53 sbin]$ $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode
starting namenode, logging to /usr/local/hadoop/logs/hadoop-hadoop-namenode-funshion-hadoop53.out
-- 此时,执行jps命令将看到如下进程:
[hadoop@funshion-hadoop53 sbin]$ jps
16385 NameNode
16285 JournalNode
16447 Jps
10142 QuorumPeerMain
-- 然后,在另外一个namenode(funshion-hadoop52) 执行如下命令:
[hadoop@funshion-hadoop52 bin]$ $HADOOP_HOME/bin/hdfs namenode -bootstrapStandby
[hadoop@funshion-hadoop52 bin]$ $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode
------------------------------------------------------------------------------------
$HADOOP_HOME/bin/hdfs haadmin -getServiceState nn1
$HADOOP_HOME/bin/hdfs haadmin -getServiceState nn2
------------------------------------------------------------------------------------
org.apache.hadoop.hdfs.server.common.InconsistentFSStateException: Directory /hadoop-disk1/hdfs/name is in an inconsistent state: storage directory does not exist or
is not accessible.
at org.apache.hadoop.hdfs.server.namenode.FSImage.recoverStorageDirs(FSImage.java:297)
at org.apache.hadoop.hdfs.server.namenode.FSImage.recoverTransitionRead(FSImage.java:202)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFSImage(FSNamesystem.java:879)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFromDisk(FSNamesystem.java:638)
at org.apache.hadoop.hdfs.server.namenode.NameNode.loadNamesystem(NameNode.java:455)
at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:511)
at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:670)
at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:655)
at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1304)
at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1370)
2014-07-01 15:15:13,755 INFO org.mortbay.log: Stopped SelectChannelConnector@funshion-hadoop52:50070
2014-07-01 15:15:13,857 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: Stopping NameNode metrics system...
2014-07-01 15:15:13,858 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: NameNode metrics system stopped.
2014-07-01 15:15:13,858 INFO org.apache.hadoop.metrics2.impl.MetricsSystemImpl: NameNode metrics system shutdown complete.
2014-07-01 15:15:13,858 FATAL org.apache.hadoop.hdfs.server.namenode.NameNode: Exception in namenode join
org.apache.hadoop.hdfs.server.common.InconsistentFSStateException: Directory /hadoop-disk1/hdfs/name is in an inconsistent state: storage directory does not exist or
is not accessible.
at org.apache.hadoop.hdfs.server.namenode.FSImage.recoverStorageDirs(FSImage.java:297)
at org.apache.hadoop.hdfs.server.namenode.FSImage.recoverTransitionRead(FSImage.java:202)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFSImage(FSNamesystem.java:879)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.loadFromDisk(FSNamesystem.java:638)
at org.apache.hadoop.hdfs.server.namenode.NameNode.loadNamesystem(NameNode.java:455)
at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:511)
at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:670)
at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:655)
at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1304)
at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1370)
2014-07-01 15:15:13,860 INFO org.apache.hadoop.util.ExitUtil: Exiting with status 1
2014-07-01 15:15:13,862 INFO org.apache.hadoop.hdfs.server.namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at funshion-hadoop52/192.168.114.52
************************************************************/
本文有about云群(371358502)友 月光蚁族 304466490提供
本文链接http://www.aboutyun.com/thread-8359-1-1.html
|