1、环境准备

iphostname备注
172.16.153.10node1NameNode DataNode JobHistoryServer NodeManager
172.16.153.11node2ResourceManager SecondeNameNode DataNode
172.16.153.12node3NodeManager DataNode

分别在 172.16.153.10、172.16.153.11、172.16.153.12机器上执行如下命令 :

hostnamectl set-hostname node1 // 172.16.153.10
hostnamectl set-hostname node2 // 172.16.153.11
hostnamectl set-hostname node3 // 172.16.153.12

所以节点关闭防火墙 :

systemctl stop firewalld
systemctl disable firewalld

无密钥配置(node1到node2、node3,node2到node1、node3)

ssh-keygen -t rsa
ssh-copy-id 对应的节点(比如说node1/node2/node3)

2、安装JDK

下载 jdk-8u461-linux-aarch64.tar.gz
上传到node1节点上 /opt/bigdata 下面,进行 tar -zxvf jdk-8u461-linux-aarch64.tar.gz 解压
配置JAVA_HOME环境变量 :

export JAVA_HOME=/opt/bigdata/jdk1.8.0_461
export PATH=$PATH:$JAVA_HOME/bin

scp 到其他节点上(例如 : scp -r /opt/bigdata/jdk1.8.0_461 node2:/opt/bigdata)

如下表示安装成功 :

[root@node1 bigdata]# source /etc/profile
[root@node1 bigdata]# java -version
java version "1.8.0_461"
Java(TM) SE Runtime Environment (build 1.8.0_461-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.461-b11, mixed mode)

3、安装Hadoop

下载 hadoop-3.2.2.tar.gz
上传到node1节点上 /opt/bigdata 下面,进行 tar -zxvf hadoop-3.2.2.tar.gz 解压

3.1、配置hadoop-env.sh

vim /opt/bigdata/hadoop-3.2.2/etc/hadoop/hadoop-env.sh

export JAVA_HOME=/opt/bigdata/jdk1.8.0_461
export HDFS_NAMENODE_USER=root // 允许root启动,测试方便,默认是不让root启动
export HDFS_DATANODE_USER=root // 允许root启动,测试方便,默认是不让root启动
export HDFS_SECONDARYNAMENODE_USER=root // 允许root启动,测试方便,默认是不让root启动
export YARN_RESOURCEMANAGER_USER=root // 允许root启动,测试方便,默认是不让root启动
export YARN_NODEMANAGER_USER=root // 允许root启动,测试方便,默认是不让root启动

3.2、配置core-site.xml

<configuration>
    <!-- 指定NameNode的地址 -->
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://node1:8020</value>
    </property>

    <!-- 指定hadoop数据的存储目录 -->
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/opt/bigdata/hadoop-3.2.2/data</value>
    </property>

    <!-- 配置HDFS网页登录使用的静态用户为newbie -->
    <property>
        <name>hadoop.http.staticuser.user</name>
        <value>hadoop</value>
    </property>
</configuration>

3.3、配置hdfs-site.xml

<configuration>
    <!-- nn web端访问地址-->
    <property>
        <name>dfs.namenode.http-address</name>
        <value>node1:9870</value>
    </property>
    <!-- 2nn web端访问地址-->
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>node3:9868</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
</configuration>

3.4、配置mapred-site.xml

<configuration>
    <!-- 指定MapReduce程序运行在Yarn上 -->
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <!-- 历史服务器端地址 -->
    <property>
        <name>mapreduce.jobhistory.address</name>
        <value>node1:10020</value>
    </property>

    <!-- 历史服务器web端地址 -->
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>node1:19888</value>
    </property>
</configuration>

3.5、配置yarn-site.xml

<configuration>

    <!-- 指定MR走shuffle -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>

    <!-- 指定ResourceManager的地址-->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>node2</value>
    </property>

    <!-- 环境变量的继承 -->
    <property>
        <name>yarn.nodemanager.env-whitelist</name>
        <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
    <!-- 开启日志聚集功能 -->
    <property>
       <name>yarn.log-aggregation-enable</name>
       <value>true</value>
    </property>
    <!-- 设置日志聚集服务器地址 -->
    <property>
       <name>yarn.log.server.url</name>
       <value>http://node1:19888/jobhistory/logs</value>
    </property>
    <!-- 设置日志保留时间为7天 -->
    <property>
       <name>yarn.log-aggregation.retain-seconds</name>
       <value>604800</value>
    </property>

    <property>
       <name>yarn.nodemanager.pmem-check-enabled</name>
       <value>false</value>
    </property>
    <property>
       <name>yarn.nodemanager.vmem-check-enabled</name>
       <value>false</value>
    </property>
</configuration>

3.6、配置workers配置

node1
node2
node3

3.7、分发hadoop

scp 到其他节点上(例如 : scp -r /opt/bigdata/hadoop-3.2.2 node2:/opt/bigdata)

3.8、配置HADOOP_HOME

vim /etc/profile

export JAVA_HOME=/opt/bigdata/jdk1.8.0_461
export HADOOP_HOME=/opt/bigdata/hadoop-3.2.2
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

3.9、启动HDFS

node1节点上

hdfs namenode -format
start-dfs.sh

如果要单独启动服务 :
./hadoop-daemon.sh start namenode
./hadoop-daemon.sh start datanode
./hadoop-daemon.sh start secondarynamenode

http://node1:9870/dfshealth.html#tab-overview

3.9、启动YARN

node2节点上

start-yarn.sh

http://node2:8088/cluster

3.10、启动历史服务器

node1节点上

mapred --daemon start historyserver

http://node1:19888/jobhistory

4、执行WordCount程序验证

1、准备数据
hdfs dfs -mkdir /input
hdfs dfs -put /opt/bigdata/hadoop-3.2.2/README.txt /input

2、执行
hadoop jar /opt/bigdata/hadoop-3.2.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.2.jar wordcount /input/ /output

3、查看output
[root@node1 hadoop-3.2.2]# hdfs dfs -cat /output/*
2025-07-20 17:50:45,648 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
(BIS),  1
(ECCN)  1
(TSU)   1
(see    1
5D002.C.1,      1
740.13) 1
<http://www.wassenaar.org/>     1
Administration  1
Apache  1
BEFORE  1
BIS     1
Bureau  1
Commerce,       1
....

journey
37 声望25 粉丝

引用和评论

0 条评论