我的yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>256</value>
</property>
<property>
<description>Address where the localizer IPC is.</description>
<name>yarn.nodemanager.localizer.address</name>
<value>0.0.0.0:18089</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--?HDFS??????????????3?-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>259200</value>
</property>
<!--rm??????????-->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<!-- ??RM??? -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- ??RM?cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster-yarn</value>
</property>
<!-- ??RM??? -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>hadoop1,hadoop3</value>
</property>
<!--hadoop1?hadoop3????-->
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>hadoop1</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
<value>5000</value>
</property>
<!-- ????RM??? -->
<property>
<name>yarn.resourcemanager.hostname.hadoop1</name>
<value>hadoop1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.hadoop3</name>
<value>hadoop3</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<!-- ??zk???? -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop2:2181,hadoop4:2181,hadoop5:2181</value>
</property>
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>hadoop2:2181,hadoop4:2181,hadoop5:2181</value>
</property>
<!-- Client??RM?RPC?? (applications manager interface) -->
<property>
<name>yarn.resourcemanager.address.hadoop1</name>
<value>hadoop1:18032</value>
</property>
<property>
<name>yarn.resourcemanager.address.hadoop3</name>
<value>hadoop3:18032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.hadoop1</name>
<value>shadoop1:18030</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.hadoop3</name>
<value>hadoop3:18030</value>
</property>
<!-- RM admin interface -->
<property>
<name>yarn.resourcemanager.admin.address.hadoop1</name>
<value>hadoop1:18033</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.hadoop3</name>
<value>hadoop3:18033</value>
</property>
<!--NM??RM?RPC?? -->
<property>
<name>yarn.resourcemanager.resource-tracker.address.hadoop1</name>
<value>hadoop1:18031</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.hadoop3</name>
<value>hadoop3:18031</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.hadoop1</name>
<value>hadoop1:18088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.hadoop3</name>
<value>hadoop3:18088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.hadoop1</name>
<value>hadoop1:10443</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.hadoop3</name>
<value>hadoop3:10443</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>256</value>
</property>
<property>
<name>yarn.client.failover-proxy-provider</name>
<value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name>
<value>/yarn-leader-election</value>
</property>
<property>
<description>fair-scheduler conf location</description>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/home/hadoop/hadoop-2.7.2/etc/hadoop/fairscheduler.xml</value>
</property>
</configuration>
测试步骤:
active的rm在hadoop3上,我将hadoop3上的resourcemanager进程kill掉,此时hadoop1的rm进程也一同关闭了,无法正常切换至hadoop1,我使用yarn rmadmin -transitionToActive hadoop1
Automatic failover is enabled for org.apache.hadoop.yarn.client.RMHAServiceTarget@27d415d9
Refusing to manually manage HA state, since it may cause
a split-brain scenario or other incorrect state.
If you are very sure you know what you are doing, please
specify the --forcemanual flag.
加了--forcemanual之后
You have specified the --forcemanual flag. This flag is dangerous, as it can induce a split-brain scenario that WILL CORRUPT your HDFS namespace, possibly irrecoverably.
It is recommended not to use this flag, but instead to shut down the cluster and disable automatic failover if you prefer to manually manage your HA state.
You may abort safely by answering 'n' or hitting ^C now.
Are you sure you want to continue? (Y or N) y
16/08/16 13:10:02 WARN ha.HAAdmin: Proceeding with manual HA state management even though
automatic failover is enabled for org.apache.hadoop.yarn.client.RMHAServiceTarget@2f8f5f62
16/08/16 13:10:04 INFO ipc.Client: Retrying connect to server: hadoop1/192.168.80.131:18033. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=1, sleepTime=1000 MILLISECONDS)
Operation failed: Call From hadoop1/192.168.80.131 to hadoop1:18033 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
|