Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .github/workflows/build-wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,16 @@ jobs:
path: ${{ env.CUDA_CORE_CYTHON_TESTS_DIR }}/test_*${{ env.PY_EXT_SUFFIX }}
if-no-files-found: error

- name: Build cuda.core test binaries
run: bash ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/build_test_binaries.sh

- name: Upload cuda.core test binaries
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries
path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}/*.o
if-no-files-found: error

# Note: This overwrites CUDA_PATH etc
- name: Set up mini CTK
uses: ./.github/actions/fetch_ctk
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/test-wheel-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,19 @@ jobs:
pwd
ls -lahR $CUDA_CORE_CYTHON_TESTS_DIR

- name: Download cuda.core test binaries
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries
path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}
run-id: ${{ inputs.run-id || github.run_id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Display structure of downloaded cuda.core test binaries
run: |
pwd
ls -lahR $CUDA_CORE_TEST_BINARIES_DIR

- name: Set up Python ${{ matrix.PY_VER }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
Expand Down
13 changes: 13 additions & 0 deletions .github/workflows/test-wheel-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,19 @@ jobs:
Get-Location
Get-ChildItem -Recurse -Force $env:CUDA_CORE_CYTHON_TESTS_DIR | Select-Object Mode, LastWriteTime, Length, FullName

- name: Download cuda.core test binaries
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
name: ${{ env.CUDA_CORE_ARTIFACT_NAME }}-test-binaries
path: ${{ env.CUDA_CORE_TEST_BINARIES_DIR }}
run-id: ${{ inputs.run-id || github.run_id }}
github-token: ${{ secrets.GITHUB_TOKEN }}

- name: Display structure of downloaded cuda.core test binaries
run: |
Get-Location
Get-ChildItem -Recurse -Force $env:CUDA_CORE_TEST_BINARIES_DIR | Select-Object Mode, LastWriteTime, Length, FullName

- name: Set up Python ${{ matrix.PY_VER }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ cache_runtime
cache_nvrtc
cuda_bindings/cuda/bindings/_lib/utils.pxi

# cuda.core test object fixtures built locally / downloaded as CI artifacts
cuda_core/tests/test_binaries/*.o

# CUDA Python specific (auto-generated)
cuda_bindings/cuda/bindings/_bindings/cydriver.pxd
cuda_bindings/cuda/bindings/_bindings/cydriver.pyx
Expand Down
1 change: 1 addition & 0 deletions ci/tools/env-vars
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ CUDA_CORE_ARTIFACT_BASENAME="cuda-core-python${PYTHON_VERSION_FORMATTED}-${HOST_
echo "CUDA_CORE_ARTIFACT_NAME=${CUDA_CORE_ARTIFACT_BASENAME}-${SHA}"
echo "CUDA_CORE_ARTIFACTS_DIR=$(realpath "${REPO_DIR}/cuda_core/dist")"
echo "CUDA_CORE_CYTHON_TESTS_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/cython")"
echo "CUDA_CORE_TEST_BINARIES_DIR=$(realpath "${REPO_DIR}/cuda_core/tests/test_binaries")"
echo "PYTHON_VERSION_FORMATTED=${PYTHON_VERSION_FORMATTED}"
} >> $GITHUB_ENV

Expand Down
14 changes: 14 additions & 0 deletions cuda_core/tests/test_binaries/build_test_binaries.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

set -euo pipefail

# Build .o test fixtures. Invoked at CI build stage

SCRIPTPATH=$(dirname "$(realpath "$0")")

nvcc -dc -o "${SCRIPTPATH}/saxpy.o" "${SCRIPTPATH}/saxpy.cu"

ls -lah "${SCRIPTPATH}/saxpy.o"
12 changes: 12 additions & 0 deletions cuda_core/tests/test_binaries/saxpy.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

#include <cstddef>

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is device code specific, let's use cuda/std/cstddef.


template<typename T>
__global__ void saxpy(const T a, const T* x, const T* y, T* out, size_t N) {
const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
for (size_t i = tid; i < N; i += gridDim.x * blockDim.x) {
out[tid] = a * x[tid] + y[tid];
}
}
49 changes: 49 additions & 0 deletions cuda_core/tests/test_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,35 @@ def get_saxpy_fatbin(init_cuda):
return bytes(fatbin), sym_map


@pytest.fixture(scope="module")
def get_saxpy_object():
"""Read the pre-built saxpy.o.

In CI: produced by build stage into a test wheel file.
In local dev: auto-built on demand if nvcc is available; if you edit
saxpy.cu, remove the stale saxpy.o to force a rebuild.
"""
import shutil
import subprocess
from pathlib import Path

binaries_dir = Path(__file__).parent / "test_binaries"
obj_path = binaries_dir / "saxpy.o"

if not obj_path.is_file():
if shutil.which("nvcc") is None:
pytest.skip(
f"saxpy.o not found at {obj_path} and nvcc is unavailable. "
"In CI this is downloaded from the build stage."
)
Comment on lines +191 to +195

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cuda.pathfinder.find_nvidia_binary_utility('nvcc') do similar things and potentially more in the future if we figure out how to deliver nvcc in one place. It's more future proof than shutil.which IMO.

subprocess.run( # noqa: S603
["bash", str(binaries_dir / "build_test_binaries.sh")], # noqa: S607
check=True,
)

return obj_path.read_bytes()


def test_get_kernel(init_cuda):
kernel = """extern "C" __global__ void ABC() { }"""

Expand Down Expand Up @@ -330,6 +359,26 @@ def test_object_code_load_fatbin_from_file(get_saxpy_fatbin, tmp_path, convert_p
mod_obj.get_kernel("saxpy<double>") # force loading


def test_object_code_load_object(get_saxpy_object):
obj = get_saxpy_object
assert isinstance(obj, bytes)
mod_obj = ObjectCode.from_object(obj)
assert mod_obj.code == obj
assert mod_obj.code_type == "object"
# object code is only valid as linker input; get_kernel is unsupported

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q: Is it possible to test this? Maybe write 1 kernel in Python and 1 device function in C++, and link them together?

with pytest.raises(RuntimeError, match=r'Unsupported code type "object"'):
mod_obj.get_kernel("saxpy<float>")


def test_object_code_load_object_from_file(get_saxpy_object, tmp_path):
obj_file = tmp_path / "test.o"
obj_file.write_bytes(get_saxpy_object)
arg = str(obj_file)
mod_obj = ObjectCode.from_object(arg)
assert mod_obj.code == arg
assert mod_obj.code_type == "object"


def test_saxpy_arguments(get_saxpy_kernel_cubin, cuda12_4_prerequisite_check):
krn, _ = get_saxpy_kernel_cubin

Expand Down
Loading