Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions docs/_tutorials/ds4sci_evoformerattention.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ tags: training inference

`DS4Sci_EvoformerAttention` is released as part of DeepSpeed >= 0.10.3.

`DS4Sci_EvoformerAttention` is implemented based on [CUTLASS](https://github.com/NVIDIA/cutlass). You need to clone the CUTLASS repository and specify the path to it in the environment variable `CUTLASS_PATH`.
`DS4Sci_EvoformerAttention` is implemented based on [CUTLASS](https://github.com/NVIDIA/cutlass). DeepSpeed automatically looks for CUTLASS in the [nvidia-cutlass](https://pypi.org/project/nvidia-cutlass/) Python package, Python environment and CMake prefixes, compiler include path environment variables, a `cutlass` checkout next to DeepSpeed or in the current working directory, and common system install prefixes such as `/usr/local`.
CUTLASS setup detection can be ignored by setting ```CUTLASS_PATH="DS_IGNORE_CUTLASS_DETECTION"```, which is useful if you have a well setup compiler (e.g., compiling in a conda package with cutlass and the cuda compilers installed).
CUTLASS location can be automatically inferred using pypi's [nvidia-cutlass](https://pypi.org/project/nvidia-cutlass/) package by setting ```CUTLASS_PATH="DS_USE_CUTLASS_PYTHON_BINDINGS"```. Note that this is discouraged as ```nvidia-cutlass``` is not maintained anymore and outdated.
If automatic detection does not find the intended installation, set `CUTLASS_PATH` to either the CUTLASS checkout root or its `include` directory.

You can always simply clone cutlass and setup ```CUTLASS_PATH```:
You can always simply clone cutlass next to DeepSpeed:
```shell
git clone https://github.com/NVIDIA/cutlass
export CUTLASS_PATH=/path/to/cutlass
```
The kernels will be compiled when `DS4Sci_EvoformerAttention` is called for the first time.

Expand All @@ -43,7 +42,6 @@ Evoformer now supports mixed-architecture packaging directly via
Example:

```shell
CUTLASS_PATH=/path/to/cutlass \
TORCH_CUDA_ARCH_LIST='7.0;8.0' \
DS_BUILD_OPS=0 DS_BUILD_EVOFORMER_ATTN=1 \
pip install -e .
Expand Down
154 changes: 123 additions & 31 deletions op_builder/evoformer_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,23 @@
# DeepSpeed Team

from .builder import CUDAOpBuilder, installed_cuda_version
import importlib
import os
from pathlib import Path
import sys


class EvoformerAttnBuilder(CUDAOpBuilder):
BUILD_VAR = "DS_BUILD_EVOFORMER_ATTN"
NAME = "evoformer_attn"
CUTLASS_IGNORE = "DS_IGNORE_CUTLASS_DETECTION"
CUTLASS_PYTHON_BINDINGS = "DS_USE_CUTLASS_PYTHON_BINDINGS"

def __init__(self, name=None):
name = self.NAME if name is None else name
super().__init__(name=name)
self.cutlass_path = os.environ.get("CUTLASS_PATH")
self._resolved_cutlass_path = None

def absolute_name(self):
return f"deepspeed.ops.{self.NAME}_op"
Expand Down Expand Up @@ -57,21 +62,20 @@ def is_compatible(self, verbose=False):
self.warning("Please install torch if trying to pre-compile kernels")
return False

if self.cutlass_path is None:
if verbose:
self.warning("Please specify CUTLASS location directory as environment variable CUTLASS_PATH")
self.warning(
"Possible values are: a path, DS_IGNORE_CUTLASS_DETECTION and DS_USE_CUTLASS_PYTHON_BINDINGS")
return False

if self.cutlass_path != "DS_IGNORE_CUTLASS_DETECTION":
if self.cutlass_path != self.CUTLASS_IGNORE:
try:
self.include_paths()
except (RuntimeError, ImportError):
except (RuntimeError, ImportError) as exc:
if verbose:
self.warning(str(exc))
return False
# Check version in case it is a CUTLASS_PATH points to a CUTLASS checkout
if os.path.exists(f"{self.cutlass_path}/CHANGELOG.md"):
with open(f"{self.cutlass_path}/CHANGELOG.md", "r") as f:
if self._resolved_cutlass_path is not None:
changelog_path = self._resolved_cutlass_path / "CHANGELOG.md"
else:
changelog_path = None
if changelog_path is not None and changelog_path.exists():
with open(changelog_path, "r") as f:
if "3.1.0" not in f.read():
if verbose:
self.warning("Please use CUTLASS version >= 3.1.0")
Expand All @@ -94,26 +98,114 @@ def is_compatible(self, verbose=False):
cuda_okay = False
return super().is_compatible(verbose) and cuda_okay

@staticmethod
def _repo_root():
return Path(__file__).resolve().parents[1]

@staticmethod
def _dedupe_paths(paths):
deduped = []
seen = set()
for path in paths:
path = Path(path).expanduser()
key = str(path)
if key not in seen:
seen.add(key)
deduped.append(path)
return deduped

@staticmethod
def _env_paths(*names):
paths = []
for name in names:
value = os.environ.get(name)
if not value:
continue
paths.extend(Path(path) for path in value.split(os.pathsep) if path)
return paths

@staticmethod
def _python_package_cutlass_paths():
try:
cutlass_library = importlib.import_module("cutlass_library")
except ImportError:
return []

candidates = []
source_path = getattr(cutlass_library, "source_path", None)
if source_path is not None:
candidates.append(Path(source_path))

package_file = getattr(cutlass_library, "__file__", None)
if package_file is not None:
package_dir = Path(package_file).resolve().parent
candidates.extend([package_dir / "source", package_dir.parent, package_dir])
return candidates

def _candidate_cutlass_paths(self):
if self.cutlass_path == self.CUTLASS_PYTHON_BINDINGS:
candidates = self._python_package_cutlass_paths()
if candidates:
return candidates
self.warning("Please pip install nvidia-cutlass")
raise ImportError("Unable to locate CUTLASS from the nvidia-cutlass Python package")

if self.cutlass_path:
return [Path(self.cutlass_path)]

repo_root = self._repo_root()
python_prefixes = self._dedupe_paths([Path(sys.prefix), Path(sys.exec_prefix), Path(sys.base_prefix)])
prefix_paths = self._env_paths("CUTLASS_ROOT", "CUTLASS_HOME", "CONDA_PREFIX", "VIRTUAL_ENV",
"CMAKE_PREFIX_PATH", "CUDA_HOME", "CUDA_PATH")
include_paths = self._env_paths("CPATH", "CPLUS_INCLUDE_PATH", "C_INCLUDE_PATH")

return self._dedupe_paths([
*self._python_package_cutlass_paths(),
*prefix_paths,
*python_prefixes,
*include_paths,
Path.cwd() / "cutlass",
repo_root / "cutlass",
repo_root.parent / "cutlass",
Path("/usr/local/cutlass"),
Path("/opt/cutlass"),
Path("/usr/local"),
Path("/usr"),
])

@staticmethod
def _cutlass_include_dirs(cutlass_path):
cutlass_path = cutlass_path.expanduser().resolve()
if not cutlass_path.is_dir():
return []

if (cutlass_path / "include" / "cutlass" / "cutlass.h").is_file():
include_root = cutlass_path / "include"
util_include = cutlass_path / "tools" / "util" / "include"
elif (cutlass_path / "cutlass" / "cutlass.h").is_file():
include_root = cutlass_path
util_include = cutlass_path.parent / "tools" / "util" / "include"
else:
return []

include_dirs = [include_root]
if util_include.is_dir():
include_dirs.append(util_include)
return [str(include_dir) for include_dir in include_dirs]

def include_paths(self):
# Assume the user knows best and CUTLASS location is already setup externally
if self.cutlass_path == "DS_IGNORE_CUTLASS_DETECTION":
if self.cutlass_path == self.CUTLASS_IGNORE:
return []
# Use header files vendored with deprecated python packages
if self.cutlass_path == "DS_USE_CUTLASS_PYTHON_BINDINGS":
try:
import cutlass_library
cutlass_path = Path(cutlass_library.__file__).parent / "source"
except ImportError:
self.warning("Please pip install nvidia-cutlass (note that this is deprecated and likely outdated)")
raise
# Use hardcoded path in CUTLASS_PATH
else:
cutlass_path = Path(self.cutlass_path)
cutlass_path = cutlass_path.resolve()
if not cutlass_path.is_dir():
raise RuntimeError(f"CUTLASS_PATH {cutlass_path} does not exist")
include_dirs = cutlass_path / "include", cutlass_path / "tools" / "util" / "include"
include_dirs = [str(include_dir) for include_dir in include_dirs if include_dir.is_dir()]
if not include_dirs:
raise RuntimeError(f"CUTLASS_PATH {cutlass_path} does not contain any include directories")
return include_dirs

for cutlass_path in self._candidate_cutlass_paths():
include_dirs = self._cutlass_include_dirs(cutlass_path)
if include_dirs:
self._resolved_cutlass_path = cutlass_path.expanduser().resolve()
return include_dirs

if self.cutlass_path:
raise RuntimeError(f"CUTLASS_PATH {self.cutlass_path} does not contain CUTLASS headers")

raise RuntimeError("Unable to locate CUTLASS. Install nvidia-cutlass, clone CUTLASS next to DeepSpeed, "
"or set CUTLASS_PATH to the CUTLASS checkout.")
2 changes: 1 addition & 1 deletion tests/benchmarks/DS4Sci_EvoformerAttention_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
This script is to test the performance of the DS4Sci_EvoformerAttention op.
To run the script,
1. Clone the CUTLASS repo. E.g. git clone https://github.com/NVIDIA/cutlass.git
2. Specify the CUTLASS_PATH environment variable. E.g. export CUTLASS_PATH=$(pwd)/cutlass
2. DeepSpeed will detect a local or installed CUTLASS. If needed, set CUTLASS_PATH explicitly.
3. Run the script. E.g. python DS4Sci_EvoformerAttention_bench.py
"""

Expand Down
72 changes: 72 additions & 0 deletions tests/unit/ops/deepspeed4science/test_evoformer_attn_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,22 @@
from pathlib import Path
from unittest.mock import patch

import pytest

from deepspeed.ops.op_builder.builder import CUDAOpBuilder
# Import the concrete builder class instead of the accelerator-dispatched alias.
from deepspeed.ops.op_builder.evoformer_attn import EvoformerAttnBuilder


def make_cutlass_checkout(path):
include_dir = path / "include" / "cutlass"
include_dir.mkdir(parents=True)
(include_dir / "cutlass.h").write_text("// cutlass marker\n")
util_dir = path / "tools" / "util" / "include"
util_dir.mkdir(parents=True)
return path


def test_filter_ccs_removes_below_70_and_keeps_ptx_suffix():
builder = EvoformerAttnBuilder()
result = builder.filter_ccs(["6.0", "6.1", "7.0", "8.0+PTX"])
Expand Down Expand Up @@ -44,3 +55,64 @@ def test_no_cuda_arch_in_checkarch():
end = text.index("};", start) + 2
block = text[start:end]
assert "__CUDA_ARCH__" not in block


def test_include_paths_uses_cutlass_path_env(tmp_path):
cutlass_path = make_cutlass_checkout(tmp_path / "cutlass")

with patch.dict("os.environ", {"CUTLASS_PATH": str(cutlass_path)}, clear=False):
builder = EvoformerAttnBuilder()

assert builder.include_paths() == [
str(cutlass_path / "include"),
str(cutlass_path / "tools" / "util" / "include"),
]


def test_include_paths_finds_python_package_candidate_without_env(tmp_path):
cutlass_path = make_cutlass_checkout(tmp_path / "python_package_cutlass")

with patch.dict("os.environ", {}, clear=True):
builder = EvoformerAttnBuilder()

with patch.object(EvoformerAttnBuilder, "_python_package_cutlass_paths", return_value=[cutlass_path]):
assert builder.include_paths()[0] == str(cutlass_path / "include")


def test_include_paths_finds_cutlass_from_cmake_prefix_path(tmp_path):
cutlass_path = make_cutlass_checkout(tmp_path / "prefix")

with patch.dict("os.environ", {"CMAKE_PREFIX_PATH": str(cutlass_path)}, clear=True):
builder = EvoformerAttnBuilder()
with patch.object(EvoformerAttnBuilder, "_python_package_cutlass_paths", return_value=[]):
assert builder.include_paths()[0] == str(cutlass_path / "include")


def test_include_paths_finds_cutlass_from_compiler_include_path(tmp_path):
cutlass_path = make_cutlass_checkout(tmp_path / "prefix")

with patch.dict("os.environ", {"CPATH": str(cutlass_path / "include")}, clear=True):
builder = EvoformerAttnBuilder()
with patch.object(EvoformerAttnBuilder, "_python_package_cutlass_paths", return_value=[]):
assert builder.include_paths()[0] == str(cutlass_path / "include")


def test_include_paths_accepts_cutlass_include_dir_directly(tmp_path):
cutlass_path = make_cutlass_checkout(tmp_path / "cutlass")

with patch.dict("os.environ", {"CUTLASS_PATH": str(cutlass_path / "include")}, clear=False):
builder = EvoformerAttnBuilder()

assert builder.include_paths() == [
str(cutlass_path / "include"),
str(cutlass_path / "tools" / "util" / "include"),
]


def test_include_paths_reports_missing_cutlass(tmp_path):
with patch.dict("os.environ", {}, clear=True):
builder = EvoformerAttnBuilder()

with patch.object(builder, "_candidate_cutlass_paths", return_value=[tmp_path / "missing"]):
with pytest.raises(RuntimeError, match="Unable to locate CUTLASS"):
builder.include_paths()
Loading