准备工作

  • 显卡为8卡910B,aarch64架构鲲鹏cpu
[root@localhost ~]# lscpu
架构:                  aarch64
  CPU 运行模式:        64-bit
  字节序:              Little Endian
CPU:                    128
  在线 CPU 列表:       0-127
厂商 ID:               HiSilicon
  BIOS Vendor ID:       HiSilicon
  型号名称:            Kunpeng-920
    BIOS Model name:    HUAWEI Kunpeng 920 7260
    型号:              0
    每个核的线程数:    1
    每个座的核数:      64
    座:                2
    步进:              0x1
    BogoMIPS:          200.00
    标记:              fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm jscvt fcma dcpop asimddp asimdfh
                        m ssbs
Caches (sum of all):  
  L1d:                  8 MiB (128 instances)
  L1i:                  8 MiB (128 instances)
  L2:                   64 MiB (128 instances)
  L3:                   128 MiB (4 instances)
NUMA:       
  NUMA 节点:           4
  NUMA 节点0 CPU:      0-31
  NUMA 节点1 CPU:      32-63
  NUMA 节点2 CPU:      64-95
  NUMA 节点3 CPU:      96-127
Vulnerabilities:  
  Gather data sampling: Not affected
  Itlb multihit:        Not affected
  L1tf:                 Not affected
  Mds:                  Not affected
  Meltdown:             Not affected
  Mmio stale data:      Not affected
  Retbleed:             Not affected
  Spec rstack overflow: Not affected
  Spec store bypass:    Mitigation; Speculative Store Bypass disabled via prctl
  Spectre v1:           Mitigation; __user pointer sanitization
  Spectre v2:           Not affected
  Srbds:                Not affected
  Tsx async abort:      Not affected
[root@localhost ~]# npu-smi info
+------------------------------------------------------------------------------------------------+
| npu-smi 25.5.1                   Version: 25.5.1                                               |
+---------------------------+---------------+----------------------------------------------------+
| NPU   Name                | Health        | Power(W)    Temp(C)           Hugepages-Usage(page)|
| Chip                      | Bus-Id        | AICore(%)   Memory-Usage(MB)  HBM-Usage(MB)        |
+===========================+===============+====================================================+
| 3     910B4-1             | OK            | 76.5        41                0    / 0             |
| 0                         | 0000:2F:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 4     910B4-1             | OK            | 77.4        41                0    / 0             |
| 0                         | 0000:2C:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 5     910B4-1             | OK            | 84.4        43                0    / 0             |
| 0                         | 0000:2B:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 6     910B4-1             | OK            | 79.7        42                0    / 0             |
| 0                         | 0000:33:00.0  | 0           0    / 0          3203 / 65536         |
+===========================+===============+====================================================+
| 7     910B4-1             | OK            | 87.0        44                0    / 0             |
| 0                         | 0000:11:00.0  | 0           0    / 0          3203 / 65536         |
+===========================+===============+====================================================+
| 8     910B4-1             | OK            | 77.7        42                0    / 0             |
| 0                         | 0000:12:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 9     910B4-1             | OK            | 76.0        41                0    / 0             |
| 0                         | 0000:1A:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 10    910B4-1             | OK            | 79.3        42                0    / 0             |
| 0                         | 0000:19:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
  • 系统为openEuler 22.03 ARM架构
cat /etc/os-release
NAME="openEuler"
VERSION="22.03 (LTS-SP4)"
ID="openEuler"
VERSION_ID="22.03"
PRETTY_NAME="openEuler 22.03 (LTS-SP4)"
ANSI_COLOR="0;31"

一、安装docker compose

修改安装源

cat > /etc/yum.repos.d/docker-ce.repo << 'EOF'
[docker-ce-stable]
name=Docker CE Stable - $basearch
baseurl=https://repo.huaweicloud.com/docker-ce/linux/centos/7/$basearch/stable
enabled=1
gpgcheck=1
gpgkey=https://repo.huaweicloud.com/docker-ce/linux/centos/gpg
EOF

安装docker compose

dnf install -y docker-ce docker-ce-cli containerd.io docker-compose

启动docker并开启开机自启动

systemctl start docker && systemctl enable docker

二、下载安装910B固件和驱动程序

1、下载固件和驱动

下载页面社区版-固件与驱动-昇腾社区

过滤选项为 npu`run后缀`AArch64

下载链接

Atlas A2 中心推理和训练硬件产品ARM架构驱动包(run格式)

Atlas A2 中心推理和训练硬件产品固件包(run格式)

或用以下指令直接下载(可能会失效):

wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend%20HDK/Ascend%20HDK%2025.5.1/Ascend-hdk-910b-npu-driver_25.5.1_linux-aarch64.run?response-content-type=application/octet-stream
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/Ascend%20HDK/Ascend%20HDK%2025.5.1/Ascend-hdk-910b-npu-firmware_7.8.0.6.201.run?response-content-type=application/octet-stream

2、安装依赖

yum install -y make dkms gcc kernel-headers-$(uname -r) kernel-devel-$(uname -r)

3、创建华为用户组

groupadd HwHiAiUser && useradd -g HwHiAiUser -d /home/HwHiAiUser -m HwHiAiUser -s /bin/bash

4、安装驱动

chmod +x * && ./Ascend-hdk-910b-npu-driver_25.5.1_linux-aarch64.run --full --install-for-all

出现 Driver package installed successfully!为安装成功

5、安装固件

./Ascend-hdk-910b-npu-firmware_7.8.0.6.201.run --full

出现如下所示为安装成功

[Firmware] [2026-07-02 10:57:41] [INFO]The firmware of [8] chips are successfully upgraded.
[Firmware] [2026-07-02 10:57:42] [INFO]Firmware package installed successfully! Reboot now or after driver installation for the installation/upgrade to take effect.

安装完成后按照提示重启服务器

6、查看npu信息

npu-smi info

如下所示表示显卡集群正常

[root@localhost ~]# npu-smi info
+------------------------------------------------------------------------------------------------+
| npu-smi 25.5.1                   Version: 25.5.1                                               |
+---------------------------+---------------+----------------------------------------------------+
| NPU   Name                | Health        | Power(W)    Temp(C)           Hugepages-Usage(page)|
| Chip                      | Bus-Id        | AICore(%)   Memory-Usage(MB)  HBM-Usage(MB)        |
+===========================+===============+====================================================+
| 3     910B4-1             | OK            | 75.9        40                0    / 0             |
| 0                         | 0000:2F:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 4     910B4-1             | OK            | 76.9        41                0    / 0             |
| 0                         | 0000:2C:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 5     910B4-1             | OK            | 83.7        41                0    / 0             |
| 0                         | 0000:2B:00.0  | 0           0    / 0          3203 / 65536         |
+===========================+===============+====================================================+
| 6     910B4-1             | OK            | 79.1        40                0    / 0             |
| 0                         | 0000:33:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 7     910B4-1             | OK            | 86.0        43                0    / 0             |
| 0                         | 0000:11:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 8     910B4-1             | OK            | 77.2        40                0    / 0             |
| 0                         | 0000:12:00.0  | 0           0    / 0          3203 / 65536         |
+===========================+===============+====================================================+
| 9     910B4-1             | OK            | 75.5        40                0    / 0             |
| 0                         | 0000:1A:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
| 10    910B4-1             | OK            | 78.4        40                0    / 0             |
| 0                         | 0000:19:00.0  | 0           0    / 0          3202 / 65536         |
+===========================+===============+====================================================+
+---------------------------+---------------+----------------------------------------------------+
| NPU     Chip              | Process id    | Process name             | Process memory(MB)      |
+===========================+===============+====================================================+
| No running processes found in NPU 3                                                            |
+===========================+===============+====================================================+
| No running processes found in NPU 4                                                            |
+===========================+===============+====================================================+
| No running processes found in NPU 5                                                            |
+===========================+===============+====================================================+
| No running processes found in NPU 6                                                            |
+===========================+===============+====================================================+
| No running processes found in NPU 7                                                            |
+===========================+===============+====================================================+
| No running processes found in NPU 8                                                            |
+===========================+===============+====================================================+
| No running processes found in NPU 9                                                            |
+===========================+===============+====================================================+
| No running processes found in NPU 10                                                           |
+===========================+===============+====================================================+

三、安装vLLM

1、创建配置文件

创建目录

mkdir -p /home/qwen3 && cd /home/qwen3

编写容器内启动脚本 start.sh

touch start.sh && chmod +x start.sh && cat > start.sh << 'EOF'
export VLLM_USE_MODELSCOPE=True
export VLLM_ALLOW_LONG_MAX_MODEL_LEN=1
export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export HCCL_BUFFSIZE=512
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=1
export TASK_QUEUE_ENABLE=1
vllm serve /data/model/Qwen3-VL-235B-A22B-Instruct-w8a8-QuaRot \
--host 0.0.0.0 \
--port 8000 \
--tensor-parallel-size 8 \
--data-parallel-size 1 \
--seed 1024 \
--served-model-name qwen3 \
--max-num-seqs 32 \
--max-model-len 131072 \
--max-num-batched-tokens 8096 \
--enable-expert-parallel \
--trust-remote-code \
--gpu-memory-utilization 0.95 \
--hf-overrides '{"rope_parameters": {"rope_type":"yarn","rope_theta":1000000,"factor":4,"original_max_position_embeddings":32768}}' \
--compilation-config '{"cudagraph_mode":"FULL_DECODE_ONLY"}' \
--async-scheduling \
--served-model-name qwen3 \
--enable-auto-tool-choice \
--tool-call-parser hermes
EOF

编写docker-compose文件,以下配置的宿主机目录映射 /mnt/sdb:/data需调整为实际模型存放路径

touch output.log && cat > docker-compose.yml << 'EOF'
services:
  qwen:
    image: quay.io/ascend/vllm-ascend:v0.19.1rc1-openeuler
    container_name: qwen
    network_mode: host
    privileged: true
    shm_size: 500g
    devices:
      - /dev/davinci0:/dev/davinci0
      - /dev/davinci1:/dev/davinci1
      - /dev/davinci2:/dev/davinci2
      - /dev/davinci3:/dev/davinci3
      - /dev/davinci4:/dev/davinci4
      - /dev/davinci5:/dev/davinci5
      - /dev/davinci6:/dev/davinci6
      - /dev/davinci7:/dev/davinci7
      - /dev/davinci_manager:/dev/davinci_manager
      - /dev/devmm_svm:/dev/devmm_svm
      - /dev/hisi_hdc:/dev/hisi_hdc
    volumes:
      - /usr/local/dcmi:/usr/local/dcmi
      - /usr/local/Ascend/driver/tools/hccn_tool:/usr/local/Ascend/driver/tools/hccn_tool
      - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
      - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
      - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
      - /etc/ascend_install.info:/etc/ascend_install.info
      - /mnt/sdb:/data
      - ./start.sh:/workspace/start.sh
    command: /bin/sh -c "/workspace/start.sh"

EOF

2、启动vLLM容器

使用以下指令启动vLLM

docker compose up -d

使用logs指令查看容器日志

docker compose logs -f qwen

首次启动大约10min,出现以下日志内容,即启动完成

(APIServer pid=225) INFO:     Started server process [225]
(APIServer pid=225) INFO:     Waiting for application startup.
(APIServer pid=225) INFO:     Application startup complete.

3、测试api

使用curl测试模型输出

curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -H "Authorization: Bearer sk-LPyEjJE10FbwbEVyviyHMQ" -d '{"model": "qwen3", "messages": [{"role": "user", "content": "你好,请介绍一下你自己"}]}'
最后修改:2026 年 07 月 03 日
如果觉得我的文章对你有用,请随意赞赏