Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
114 commits
Select commit Hold shift + click to select a range
e5e8960
ci: add torch-npu docker images with builder/test separation
May 21, 2026
9bff4ba
ci: trigger docker build workflow on push instead of PR
May 21, 2026
5b6508e
ci: add environment QUAY_USERNAME for registry auth
May 21, 2026
b32098c
ci: lowercase CANN chip in docker tag for docker compatibility
May 21, 2026
58e65c5
fix: add COPY for requirements files in Dockerfiles
May 21, 2026
11c3adf
fix: remove unnecessary COPY common/ from builder Dockerfiles
May 21, 2026
93a1e36
fix: move pytorch install from requirements to Dockerfile
May 21, 2026
db1d9f3
ci: add workflow_dispatch tag input, summary step, and docker pull co…
May 21, 2026
f2411b9
ci: fix matrix output to single-line JSON
May 21, 2026
62dc7e5
fix: upgrade pip/setuptools/wheel before installing Python packages i…
May 21, 2026
3164355
fix: pin z3-solver to 4.13.0.0 for aarch64 compatibility
May 21, 2026
f2599ba
refactor: use pre-built ARM builder image and remove pytorch_version …
May 21, 2026
666226c
ci: change trigger from schedule to pull_request event on v2.7.1_imag…
May 21, 2026
88186d1
ci: change trigger from schedule to PR event on .github path changes
May 21, 2026
2201166
refactor: use pre-built test image and remove dependency installation…
May 21, 2026
0b09e42
refactor: centralize docker images and hardcode PYTORCH_VERSION/PYTHO…
May 21, 2026
2c7740e
fix: replace env context with hardcoded values in reusable workflow w…
May 21, 2026
83cac5c
refactor: use local action references and extract version variables
May 22, 2026
52f45a5
fix: remove unused pip deps and fix local action resolution in PR wor…
May 22, 2026
322649f
debug: add CANN environment diagnostics to collect step and collect_a…
May 22, 2026
f9964d1
debug: enhance CANN env diagnostic - find all set_env.sh and try vers…
May 22, 2026
3241b7f
fix: create symlink /usr/local/Ascend/cann for versioned CANN install…
May 22, 2026
a9631d3
fix: remove --quiet from CANN installer, use set -e for immediate fai…
May 22, 2026
80a9e08
refactor: rewrite install_cann.sh with explicit case-per-chip URLs
May 22, 2026
7e2fd1b
chore: add --quiet back to CANN installer .run commands
May 22, 2026
c2cdf66
feat: switch test image default Python to 3.9
May 22, 2026
b5b482b
feat: add Python version to test image tag naming
May 22, 2026
ab5bb8a
fix: use deadsnakes PPA for Python 3.9 on Ubuntu 22.04
May 22, 2026
494ecb5
fix: switch test image to Ubuntu 20.04 for native Python 3.9
May 22, 2026
c7556a8
feat: switch test image to Ubuntu 22.04 + Python 3.10 (native)
May 22, 2026
fe35de3
refactor: rename builder image tag from py2.7.1 to torch2.7.1 for con…
May 22, 2026
a27b6e7
Update docker images to 202605220956
May 22, 2026
afb831d
rename builder tag: py2.7.1 -> torch2.7.1 to avoid ambiguity with Pyt…
May 22, 2026
dc19a79
add pyyaml to builder requirements
May 22, 2026
cfdd376
chore: add pyyaml install step before build
May 22, 2026
34d9b84
refactor: remove retry mechanism and dead code in run_npu_test_shard.py
May 22, 2026
98a7b44
ci: add disk space cleanup step before Docker image build
May 22, 2026
778db30
chore: update build image and remove pyyaml install step
May 22, 2026
0cdb86e
chore: source CANN and NNAL env before collecting test cases
May 23, 2026
366088c
chore: update docker image tags to 202605221602
May 23, 2026
1ad9897
refactor: change A2/A3 CANN installation from .run to apt-get
May 23, 2026
04c92c4
refactor: change A3 CANN installation to combined driver+toolkit .run…
May 23, 2026
c6be290
fix: make dkms and linux-headers optional for Docker container builds
May 23, 2026
062028e
feat: add HwHiAiUser user and group creation before CANN install
May 23, 2026
8c63ab2
refactor: unify A2/A3 CANN packages to ascend-repo 9.1.0-beta.1
May 23, 2026
37197ad
chore: update docker image references in upstream test workflow
May 23, 2026
817d693
fix: add defaults.run.shell: bash to workflow files to fix source com…
May 23, 2026
a4d8b98
fix: add zstandard dependency for distributed checkpoint tests
May 25, 2026
8dd4b66
fix: add pulp dependency and fix collect diagnostics
May 25, 2026
a423160
chore: update test docker image to 202605250326
May 25, 2026
9564241
fix: eliminate duplicate kerer-ai/pytorch checkout to ensure code imm…
May 26, 2026
3ac74e1
Merge remote-tracking branch 'ascend/v2.7.1' into v2.7.1_image
May 26, 2026
80ec595
fix: remove utils.py.patch targeting non-existent torch/_inductor/uti…
May 26, 2026
120559c
fix: add test_ops_jit.py and test_jit.py to CI whitelist
May 26, 2026
9e605b3
fix: remove accidentally committed tatus file (less help page)
May 26, 2026
cbcb7f1
Revert "fix: remove utils.py.patch targeting non-existent torch/_indu…
May 26, 2026
db4e857
fix: restore utils.py.patch and add _inductor/ directory debug output
May 26, 2026
f59b285
fix: convert utils.py.patch CRLF to LF and add \r stripping in patch …
May 26, 2026
dc6996e
fix: correct import npu to from torch_npu import npu in test_alias_an…
May 26, 2026
6f751dc
fix: convert all CRLF patch files to LF and fix incorrect import npu
May 26, 2026
703445d
Revert "fix: convert all CRLF patch files to LF and fix incorrect imp…
May 26, 2026
d1de9aa
refactor: normalize patch CRLF to LF via temp file in torch_env_patch.sh
May 26, 2026
501d3cd
fix: restore utils.py.patch to original CRLF line endings
May 26, 2026
8c9a2bb
refactor: simplify torch_env_patch.sh to match apply_patch.sh style
May 26, 2026
6a50fdc
fix: use set+e/-e and PIPESTATUS to handle patch failure gracefully
May 26, 2026
f96f6e4
fix: tab to space in build.yml ref and use PIPESTATUS in prepare.yml
May 26, 2026
dda93aa
fix: increase collect job timeout from 60 to 120 minutes
May 26, 2026
78ba840
chore: trigger CI re-run
May 26, 2026
9e1aeaa
chore: trigger CI re-run
May 26, 2026
72e6368
chore: trigger CI re-run
May 26, 2026
b83da55
fix: redirect ccache -s stderr to stdout for stats capture
May 27, 2026
ba129f7
Merge remote-tracking branch 'ascend/v2.7.1' into v2.7.1_image
May 27, 2026
91d9ad5
fix: stage collect logs to flat directory for clean artifact layout
May 27, 2026
a26cb16
fix: remove Huawei pip mirror, use official PyTorch index in Dockerfiles
May 27, 2026
b802529
fix: change distributed test default shard count from 2 to 5
May 28, 2026
bc27c10
Merge remote-tracking branch 'ascend/v2.7.1' into v2.7.1_image
May 28, 2026
22fde4b
fix: remove max-parallel limits for distributed and regular test shards
May 28, 2026
c476ab9
fix: reduce per-case idle timeout from 20min to 10min, guard against …
May 28, 2026
112d742
fix: update ccache stats grep pattern for ccache 4.10 output format
May 28, 2026
46ccc0e
Merge remote-tracking branch 'ascend/v2.7.1' into v2.7.1_image
May 29, 2026
eb2c0be
Merge ascend/v2.7.1 into v2.7.1_image
Jun 1, 2026
3d46afe
feat: distributed tests use independent subprocess per case
Jun 1, 2026
bca405f
chore: switch custom test runner to linux-aarch64-a3-16
Jun 1, 2026
ce5fc94
fix: reduce regular/custom test concurrency to 16 to prevent SUSPECT …
Jun 2, 2026
7f69418
fix: include JUnit XML element text in result message for failure/err…
Jun 2, 2026
0a38259
fix: include JUnit XML element text in message and treat xfail as passed
Jun 3, 2026
3c9845b
Merge ascend/v2.7.1 into v2.7.1_image
Jun 3, 2026
16db6e4
fix: use input parameter for test_files instead of hardcoded value
Jun 3, 2026
3fa6615
Merge remote-tracking branch 'ascend/v2.7.1' into v2.7.1_image
Jun 5, 2026
13fa160
debug: add env dump step to trace TORCH_TRANSFER_TO_NPU origin
Jun 5, 2026
244359c
fix(ci): reduce regular test max-workers from 64 to 16
Jun 5, 2026
8e6ca0d
feat(ci): 增加 PR patch 变更自动检测,只跑对应测试
Jun 8, 2026
7577a9a
fix(ci): 修复 detect patch 推导测试文件时 .py.patch → .py.py 双扩展名 bug
Jun 8, 2026
3263589
fix(ci): git diff 两点改三点,排除上游侧变更误报
Jun 8, 2026
ec29afc
fix(ci): 本地 action 引用前补上 checkout 步骤
Jun 8, 2026
dce817b
refactor(ci): 移除 ref 透传参数,统一使用 github.sha
Jun 8, 2026
5626c0d
fix(ci): 恢复外部 action 引用,撤销无效的 checkout 修复
Jun 8, 2026
f49c07c
chore: 切换触发文件 test_minifer → test_type_info
Jun 9, 2026
bbd3cb4
fix(ci): 修复 .diff 文件推导缺少 .py 后缀
Jun 9, 2026
ed7131c
chore: 撤销 test_type_info.py.patch 空行修改
Jun 9, 2026
3dd05f3
Merge branch 'ascend/v2.7.1' into v2.7.1_image
Jun 9, 2026
cf93ea4
fix(ci): 测试执行 step 覆盖平台默认 CI=true
Jun 9, 2026
6b806a7
fix: apply_patch.sh 只扫描 test/ 子目录,跳过 torch/ 环境 patch
Jun 9, 2026
71877b9
Merge branch 'ascend/v2.7.1' into v2.7.1_image
Jun 10, 2026
17096df
Merge branch 'ascend/v2.7.1' into v2.7.1_image
Jun 11, 2026
98033b8
Update disabled_testcases with 854 CI signal crash and timeout cases
Jun 11, 2026
c0313af
Add all reg-2/3/4/5 error and timeout cases to disabled_testcases
Jun 11, 2026
d8037b3
Add remaining 23 reg-1 non-signal error cases to disabled_testcases
Jun 11, 2026
8be758f
fix: use v2.7.1 ref for checkout when triggered from master branch
Jun 12, 2026
f4f1744
fix: log unexpected non-dict JSON lines to file in test shard runner
Jun 12, 2026
b9a9c48
Merge remote-tracking branch 'ascend/v2.7.1' into v2.7.1_image
Jun 15, 2026
89a889e
chore: switch dist test runner from a3-16 to a3-8
Jun 15, 2026
7311523
fix: sync distributed runner to a3-8 and correct worker counts in CI …
Jun 16, 2026
cdee32b
feat: collect /root/ascend CANN底层日志并上传制品
Jun 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions .ci/docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# torch-npu CI Docker Images

本目录管理 torch-npu 项目的 CI Docker 镜像,包括**构建镜像 (builder)** 和**测试镜像 (test)** 两类,每类分别支持 x86_64 和 aarch64 架构。

## 镜像类型

| 类型 | 基座 | 用途 |
|------|------|------|
| **builder** | manylinux2_28-builder | 编译构建 torch-npu wheel 包,包含完整编译工具链 |
| **test** | ubuntu:22.04 | CI 单元测试运行环境,包含 PyTorch CPU、CANN runtime、triton-ascend 和测试框架 |

## 目录结构

```
.ci/docker/
├── README.md
├── requirements-builder.txt # Builder 镜像 pip 依赖
├── requirements-test.txt # Test 镜像 pip 依赖
├── docker_build.sh # 构建入口脚本
├── common/ # 共享安装脚本
│ ├── install_cann.sh # 安装 CANN toolkit (支持 A1/A2/A3)
│ ├── install_triton.sh # 安装 triton-ascend (需传 Python 版本)
│ ├── install_obs.sh # 安装华为 OBS util
├── builder/
│ ├── Dockerfile.x86_64
│ └── Dockerfile.aarch64
└── test/
├── Dockerfile.x86_64
└── Dockerfile.aarch64
```

## 快速构建

```bash
# Builder 镜像 (不含 CANN)
./docker_build.sh torch-npu-builder-x86_64-torch2.7.1
./docker_build.sh torch-npu-builder-aarch64-torch2.7.1

# Test 镜像 (含 CANN)
./docker_build.sh torch-npu-test-x86_64-cann-a1-py3.10-torch2.7.1
./docker_build.sh torch-npu-test-aarch64-cann-a2-py3.10-torch2.7.1
```

## Tag 命名规范

参考上游 PyTorch `pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11` 模式,tag 即为最终镜像名:

**Builder**(不含 CANN):
```
torch-npu-builder-<ARCH>-torch<PYTORCH_VERSION>
```
```
./docker_build.sh torch-npu-builder-x86_64-torch2.7.1
# ^ ^ ^ ^
# | | | └── PyTorch 版本 (torch2.7.1)
# | | └── 架构
# | └── 镜像类型
# └── 固定前缀
```

**Test**(含 CANN runtime):
```
torch-npu-test-<ARCH>-cann<CHIP>-py<PYTHON_VERSION>-torch<PYTORCH_VERSION>
```
```
./docker_build.sh torch-npu-test-x86_64-cann-a1-py3.10-torch2.7.1
# ^ ^ ^ ^ ^ ^ ^
# | | | | | | └── PyTorch 版本
# | | | | | └── torch 前缀
# | | | | └── Python 版本
# | | | └── py 前缀
# | | └── CANN 芯片 (A1/A2/A3)
# | └── 架构
# └── 镜像类型
```

| 字段 | 可选值 |
|------|--------|
| IMAGE_TYPE | builder, test |
| ARCH | x86_64, aarch64 |
| CHIP | A1 (Ascend 910), A2 (Ascend 910b), A3 (仅 test) |
| PYTHON_VERSION | 3.10 (仅 test) |
| PYTORCH_VERSION | 2.7.1 |

## CANN 芯片映射

| CANN_CHIP | 芯片 | CANN 版本 |
|-----------|------|----------|
| A1 | Ascend 910 | 9.1.0 |
| A2 | Ascend 910b | 8.5.0 (x86_64) / 9.1.0 (aarch64) |
| A3 | Ascend A3 | 9.0.0-beta.1 (x86_64) / 9.0.0-beta.2 (aarch64) |
71 changes: 71 additions & 0 deletions .ci/docker/builder/Dockerfile.aarch64
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
FROM pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-2.7

ARG PYTORCH_VERSION=2.7.1

ENV PATH=/usr/local/bin:$PATH
ENV AUDITWHEEL_PLAT=manylinux_2_28_aarch64
ENV ETCD_UNSUPPORTED_ARCH=arm64
ENV PYTORCH_VERSION=${PYTORCH_VERSION}

COPY requirements-builder.txt /opt/buildtools/

# Set pip & python symlinks
RUN cd /usr/local/bin \
&& ln -sf /opt/_internal/cpython-3.9.21/bin/pip3.9 pip3.9 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/pip3.10 pip3.10 \
&& ln -sf /opt/_internal/cpython-3.11.11/bin/pip3.11 pip3.11 \
&& ln -sf /opt/_internal/cpython-3.12.9/bin/pip3.12 pip3.12 \
&& ln -sf /opt/_internal/cpython-3.13.2/bin/pip3.13 pip3.13 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/pip3.10 pip3 \
&& ln -sf /opt/_internal/cpython-3.9.21/bin/python3.9 python3.9 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/python3.10 python3.10 \
&& ln -sf /opt/_internal/cpython-3.11.11/bin/python3.11 python3.11 \
&& ln -sf /opt/_internal/cpython-3.12.9/bin/python3.12 python3.12 \
&& ln -sf /opt/_internal/cpython-3.13.2/bin/python3.13 python3.13 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/python3.10 python3

# Install PyTorch from official source, then build requirements from PyPI for each python version
RUN pip3.9 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION} \
&& pip3.9 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.10 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION} \
&& pip3.10 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.11 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION} \
&& pip3.11 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.12 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION} \
&& pip3.12 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.13 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION} \
&& pip3.13 install --no-cache-dir auditwheel==5.4.0 \
&& pip3.13 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& ln -sf /opt/_internal/cpython-3.13.2/bin/auditwheel /usr/local/bin/auditwheel

# Install system build tools
RUN echo "alias ll='ls -l --color=auto'" >> /root/.bashrc \
&& yum install -y vim-common --disablerepo=ius \
&& yum install -y ninja-build binutils lld mold dos2unix gcc gcc-c++ make cmake3 wget tar unzip elfutils java-1.8.0-openjdk-devel \
&& cd /tmp \
&& wget https://github.com/ccache/ccache/releases/download/v4.10/ccache-4.10.tar.gz \
&& tar -xzf ccache-4.10.tar.gz \
&& cd ccache-4.10 \
&& mkdir build \
&& cd build \
&& cmake3 .. \
&& make -j$(nproc) \
&& make install \
&& cd /tmp \
&& rm -rf ccache-4.10* \
&& ccache --version \
&& wget https://github.com/etcd-io/etcd/releases/download/v3.4.3/etcd-v3.4.3-linux-arm64.tar.gz \
&& tar -zxf etcd-v3.4.3-linux-arm64.tar.gz \
&& mv etcd-v3.4.3-linux-arm64/etcd /usr/local/bin/ \
&& pip3.10 install python-etcd \
&& etcd --version \
&& yum update -y \
&& yum clean all

# Set timezone
RUN rm -f /etc/localtime \
&& cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& echo 'Asia/Shanghai' >/etc/timezone \
&& echo "export TZ='Asia/Shanghai'" >>/etc/profile

WORKDIR /home
82 changes: 82 additions & 0 deletions .ci/docker/builder/Dockerfile.x86_64
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
FROM pytorch/manylinux2_28-builder:cpu-2.7

ARG PYTORCH_VERSION=2.7.1

ENV PATH=/usr/local/bin:$PATH
ENV AUDITWHEEL_PLAT=manylinux_2_28_x86_64
ENV PYTORCH_VERSION=${PYTORCH_VERSION}

COPY requirements-builder.txt /opt/buildtools/

# Set pip & python symlinks
RUN cd /usr/local/bin \
&& ln -sf /opt/_internal/cpython-3.9.21/bin/pip3.9 pip3.9 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/pip3.10 pip3.10 \
&& ln -sf /opt/_internal/cpython-3.11.11/bin/pip3.11 pip3.11 \
&& ln -sf /opt/_internal/cpython-3.12.9/bin/pip3.12 pip3.12 \
&& ln -sf /opt/_internal/cpython-3.13.2/bin/pip3.13 pip3.13 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/pip3.10 pip3 \
&& ln -sf /opt/_internal/cpython-3.9.21/bin/python3.9 python3.9 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/python3.10 python3.10 \
&& ln -sf /opt/_internal/cpython-3.11.11/bin/python3.11 python3.11 \
&& ln -sf /opt/_internal/cpython-3.12.9/bin/python3.12 python3.12 \
&& ln -sf /opt/_internal/cpython-3.13.2/bin/python3.13 python3.13 \
&& ln -sf /opt/_internal/cpython-3.10.16/bin/python3.10 python3

# Install PyTorch from official source, then build requirements from PyPI for each python version
RUN pip3.9 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION}+cpu \
&& pip3.9 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.10 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION}+cpu \
&& pip3.10 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.11 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION}+cpu \
&& pip3.11 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.12 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION}+cpu \
&& pip3.12 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& pip3.13 install --no-cache-dir --index-url https://download.pytorch.org/whl/cpu torch==${PYTORCH_VERSION}+cpu \
&& pip3.13 install --no-cache-dir auditwheel==5.4.0 \
&& pip3.13 install --no-cache-dir -r /opt/buildtools/requirements-builder.txt \
&& ln -sf /opt/_internal/cpython-3.13.2/bin/auditwheel /usr/local/bin/auditwheel

# Install system build tools
RUN yum remove -y ius-release epel-release 2>/dev/null || true \
&& rm -rf /etc/yum.repos.d/ius*.repo /etc/yum.repos.d/epel*.repo \
&& yum clean all && rm -rf /var/cache/dnf /var/cache/yum \
&& echo "alias ll='ls -l --color=auto'" >> /root/.bashrc \
&& yum install -y vim-common --disablerepo=ius \
&& yum install -y binutils lld dos2unix gcc gcc-c++ make cmake3 wget tar unzip elfutils java-1.8.0-openjdk-devel \
&& cd /tmp \
&& wget -q https://github.com/ninja-build/ninja/releases/download/v1.12.1/ninja-linux.zip \
&& unzip ninja-linux.zip \
&& cp ninja /usr/local/bin/ && chmod +x /usr/local/bin/ninja \
&& cd /tmp \
&& wget -q https://github.com/rui314/mold/archive/refs/tags/v2.32.1.tar.gz \
&& tar -xf v2.32.1.tar.gz \
&& cd mold-2.32.1 \
&& cmake -DCMAKE_BUILD_TYPE=Release -DMOLD_MOSTLY_STATIC=ON . \
&& make -j$(nproc) && make install \
&& cd /tmp \
&& wget https://github.com/ccache/ccache/releases/download/v4.10/ccache-4.10.tar.gz \
&& tar -xzf ccache-4.10.tar.gz \
&& cd ccache-4.10 \
&& mkdir build \
&& cd build \
&& cmake3 .. \
&& make -j$(nproc) \
&& make install \
&& cd /tmp && rm -rf /tmp/* \
&& ninja --version && mold --version && ccache --version \
&& wget https://github.com/etcd-io/etcd/releases/download/v3.4.3/etcd-v3.4.3-linux-amd64.tar.gz \
&& tar -zxf etcd-v3.4.3-linux-amd64.tar.gz \
&& mv etcd-v3.4.3-linux-amd64/etcd /usr/local/bin/ \
&& pip3.10 install python-etcd \
&& etcd --version \
&& yum clean all \
&& yum update -y

# Set timezone
RUN rm -f /etc/localtime \
&& cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& echo 'Asia/Shanghai' >/etc/timezone \
&& echo "export TZ='Asia/Shanghai'" >>/etc/profile

WORKDIR /home
109 changes: 109 additions & 0 deletions .ci/docker/common/install_cann.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/bash
# Install CANN toolkit for Ascend NPU.
# Usage: CANN_CHIP=A1 ./install_cann.sh
# CANN_CHIP: A1 (Ascend 910), A2 (Ascend 910b), A3 (Ascend A3)
# Automatically detects architecture (x86_64 / aarch64).

set -e

CANN_CHIP="${CANN_CHIP:-A1}"
ARCH=$(uname -m)

BASE_URL="https://pytorch-package.obs.cn-north-4.myhuaweicloud.com/pta/cann-package"
CANN_BASE_URL="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%209.1.T1"

case "${ARCH}_${CANN_CHIP}" in
# x86_64
x86_64_A1)
TOOLKIT_URL="${BASE_URL}/20260513/Ascend-cann-toolkit_9.1.0_linux-x86_64.run"
OPS_URL="${BASE_URL}/20260513/Ascend-cann-910-ops_9.1.0_linux-x86_64.run"
NNAL_URL="${BASE_URL}/20260513/Ascend-cann-nnal_9.1.0_linux-x86_64.run"
OPS_GLOB="Ascend-cann-910*"
SET_ENV_PATH="/usr/local/Ascend/cann/set_env.sh"
;;
x86_64_A2)
TOOLKIT_URL="${CANN_BASE_URL}/Ascend-cann-toolkit_9.1.0-beta.1_linux-x86_64.run"
OPS_URL="${CANN_BASE_URL}/Ascend-cann-910b-ops_9.1.0-beta.1_linux-x86_64.run"
NNAL_URL="${CANN_BASE_URL}/Ascend-cann-nnal_9.1.0-beta.1_linux-x86_64.run"
OPS_GLOB="Ascend-cann-910b-ops*"
SET_ENV_PATH="/usr/local/Ascend/cann/set_env.sh"
;;
x86_64_A3)
TOOLKIT_URL="${CANN_BASE_URL}/Ascend-cann-toolkit_9.1.0-beta.1_linux-x86_64.run"
OPS_URL="${CANN_BASE_URL}/Ascend-cann-A3-ops_9.1.0-beta.1_linux-x86_64.run"
NNAL_URL="${CANN_BASE_URL}/Ascend-cann-nnal_9.1.0-beta.1_linux-x86_64.run"
OPS_GLOB="Ascend-cann-A3-ops*"
SET_ENV_PATH="/usr/local/Ascend/cann/set_env.sh"
;;
# aarch64
aarch64_A1)
TOOLKIT_URL="${BASE_URL}/20260302/Ascend-cann-toolkit_9.0.0-beta.1_linux-aarch64.run"
OPS_URL="${BASE_URL}/20260302/Ascend-cann-910b-ops_9.0.0-beta.1_linux-aarch64.run"
NNAL_URL="${BASE_URL}/20260302/Ascend-cann-nnal_9.0.0-beta.1_linux-aarch64.run"
OPS_GLOB="Ascend-cann-910b*"
SET_ENV_PATH="/usr/local/Ascend/cann/set_env.sh"
;;
aarch64_A2)
TOOLKIT_URL="${CANN_BASE_URL}/Ascend-cann-toolkit_9.1.0-beta.1_linux-aarch64.run"
OPS_URL="${CANN_BASE_URL}/Ascend-cann-910b-ops_9.1.0-beta.1_linux-aarch64.run"
NNAL_URL="${CANN_BASE_URL}/Ascend-cann-nnal_9.1.0-beta.1_linux-aarch64.run"
OPS_GLOB="Ascend-cann-910b-ops*"
SET_ENV_PATH="/usr/local/Ascend/cann/set_env.sh"
;;
aarch64_A3)
TOOLKIT_URL="${CANN_BASE_URL}/Ascend-cann-toolkit_9.1.0-beta.1_linux-aarch64.run"
OPS_URL="${CANN_BASE_URL}/Ascend-cann-A3-ops_9.1.0-beta.1_linux-aarch64.run"
NNAL_URL="${CANN_BASE_URL}/Ascend-cann-nnal_9.1.0-beta.1_linux-aarch64.run"
OPS_GLOB="Ascend-cann-A3-ops*"
SET_ENV_PATH="/usr/local/Ascend/cann/set_env.sh"
;;
*)
echo "Unsupported combination: ${ARCH} + ${CANN_CHIP}"
exit 1
;;
esac

echo "Installing CANN ${CANN_CHIP} for ${ARCH}..."

echo "=== Creating HwHiAiUser user and group ==="
groupadd -f HwHiAiUser
id -u HwHiAiUser >/dev/null 2>&1 || useradd -g HwHiAiUser -d /home/HwHiAiUser -m HwHiAiUser -s /bin/bash

rm -rf cann
mkdir -p cann && cd cann

echo "=== Downloading CANN packages ==="
curl -O "${TOOLKIT_URL}"
curl -O "${OPS_URL}"
curl -O "${NNAL_URL}"
echo "Download complete."

chmod +x Ascend-cann*.run

echo "=== Installing CANN toolkit ==="
./Ascend-cann-toolkit*.run --full --quiet --install-path=/usr/local/Ascend
source "${SET_ENV_PATH}"
echo "toolkit install success"

echo "=== Installing CANN ops ==="
./${OPS_GLOB}.run --install --quiet --install-path=/usr/local/Ascend
echo "ops install success"

echo "=== Installing CANN nnal ==="
./Ascend-cann-nnal*.run --install --quiet --install-path=/usr/local/Ascend
source /usr/local/Ascend/nnal/atb/set_env.sh
echo "nnal install success"

# Some CANN versions install to versioned paths (e.g. cann-9.0.0-beta.2)
# instead of /usr/local/Ascend/cann/. Fix broken symlinks so runtime
# sourcing of set_env.sh works.
if [ ! -f /usr/local/Ascend/cann/set_env.sh ]; then
CANN_REAL_DIR=$(ls -d /usr/local/Ascend/cann-* 2>/dev/null | head -1)
if [ -n "${CANN_REAL_DIR}" ]; then
ln -sf "${CANN_REAL_DIR}" /usr/local/Ascend/cann
echo "Fixed: linked ${CANN_REAL_DIR} -> /usr/local/Ascend/cann"
fi
fi

rm -rf *
echo "CANN ${CANN_CHIP} installation complete."
21 changes: 21 additions & 0 deletions .ci/docker/common/install_obs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/bash
# Install Huawei OBS util for object storage access.

set -e

ARCH=$(uname -m)
case "${ARCH}" in
x86_64) OBS_ARCH="amd64" ;;
aarch64) OBS_ARCH="arm64" ;;
*) echo "Unsupported architecture: ${ARCH}"; exit 1 ;;
esac

OBS_URL="https://obs-community.obs.cn-north-1.myhuaweicloud.com/obsutil/current/obsutil_linux_${OBS_ARCH}.tar.gz"

wget -q "${OBS_URL}"
mkdir -p /usr/local/obsutil
tar -zxf "obsutil_linux_${OBS_ARCH}.tar.gz" -C /usr/local/obsutil/
rm -f "obsutil_linux_${OBS_ARCH}.tar.gz"
ln -sf /usr/local/obsutil/obsutil_linux_${OBS_ARCH}_*/obsutil /usr/local/bin/obsutil

echo "OBS util installed."
19 changes: 19 additions & 0 deletions .ci/docker/common/install_triton.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/bash
# Install triton-ascend for NPU.
# Usage: ./install_triton.sh <PYTHON_VERSION>
# PYTHON_VERSION: e.g. 3.10, 3.11, 3.12, 3.13

set -e

TRITON_VERSION="${TRITON_VERSION:-3.2.1}"
PYTHON_VERSION="${1:?Usage: $0 <PYTHON_VERSION> (e.g. 3.10)}"

ARCH=$(uname -m)
PY_SHORT=$(echo "${PYTHON_VERSION}" | tr -d '.')

TRITON_WHL="triton_ascend-${TRITON_VERSION}-cp${PY_SHORT}-cp${PY_SHORT}-manylinux_2_27_${ARCH}.manylinux_2_28_${ARCH}.whl"
TRITON_URL="https://gitcode.com/Ascend/triton-ascend/releases/download/v${TRITON_VERSION}/${TRITON_WHL}"

echo "Installing triton-ascend ${TRITON_VERSION} for Python ${PYTHON_VERSION} (${ARCH})..."
pip3 install --no-cache-dir "${TRITON_URL}"
echo "triton-ascend installed."
Loading
Loading