Skip to content
9 changes: 9 additions & 0 deletions docs/source/transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,15 @@ have values in ``[0, MAX_DTYPE]`` where ``MAX_DTYPE`` is the largest value
that can be represented in that dtype. Typically, images of dtype
``torch.uint8`` are expected to have values in ``[0, 255]``.

.. note::

``torch.uint16``, ``torch.uint32``, and ``torch.uint64`` dtypes are not
officially supported by the torchvision transforms. While some operations
may work, most transforms expect ``torch.uint8`` or ``torch.float32``
inputs. If you're working with uint16 images (e.g. from 16-bit medical or
scientific imaging), consider converting to ``float32`` first using
:class:`~torchvision.transforms.v2.ToDtype`.
Copy link
Copy Markdown
Contributor

@zy1git zy1git Apr 13, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason we mention the uint16-to-float32 conversion specifically?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no specific reason, dropped that bit in the latest commit. the note just lists the unsupported dtypes now.


Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can put this note after "Use :class:~torchvision.transforms.v2.ToDtype to convert both the dtype and range of the inputs."

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moved it there in the latest commit.

Use :class:`~torchvision.transforms.v2.ToDtype` to convert both the dtype and
range of the inputs.

Expand Down
8 changes: 4 additions & 4 deletions test/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,8 +614,8 @@ def _get_1_channel_tensor_various_types():
expected_output = img_data_byte.float().div(255.0).numpy()
yield img_data_byte, expected_output, "L"

img_data_short = torch.ShortTensor(1, 4, 4).random_()
expected_output = img_data_short.numpy()
img_data_short = torch.ShortTensor(1, 4, 4).random_(0, 32767)
expected_output = img_data_short.numpy().astype(np.uint16)
yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B"

img_data_int = torch.IntTensor(1, 4, 4).random_()
Expand All @@ -631,8 +631,8 @@ def _get_2d_tensor_various_types():
expected_output = img_data_byte.float().div(255.0).numpy()
yield img_data_byte, expected_output, "L"

img_data_short = torch.ShortTensor(4, 4).random_()
expected_output = img_data_short.numpy()
img_data_short = torch.ShortTensor(4, 4).random_(0, 32767)
expected_output = img_data_short.numpy().astype(np.uint16)
yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B"

img_data_int = torch.IntTensor(4, 4).random_()
Expand Down
12 changes: 12 additions & 0 deletions test/test_transforms_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6879,6 +6879,18 @@ def test_functional_error(self):
F.pil_to_tensor(object())


@pytest.mark.parametrize("f", [F.to_tensor, F.pil_to_tensor])
def test_I16_to_tensor(f):
# See https://github.com/pytorch/vision/issues/8188
I16_pil_img = PIL.Image.fromarray(np.random.randint(0, 2**16, (10, 10), dtype=np.uint16))
assert I16_pil_img.mode == "I;16"

cm = pytest.warns(UserWarning, match="deprecated") if f is F.to_tensor else contextlib.nullcontext()
with cm:
out = f(I16_pil_img)
assert out.dtype == torch.uint16


@needs_cvcuda
class TestToCVCUDATensor:
@pytest.mark.parametrize("image_type", (torch.Tensor, tv_tensors.Image))
Expand Down
2 changes: 1 addition & 1 deletion torchvision/transforms/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ def to_tensor(pic: Union[PILImage, np.ndarray]) -> Tensor:
return torch.from_numpy(nppic).to(dtype=default_float_dtype)

# handle PIL Image
mode_to_nptype = {"I": np.int32, "I;16" if sys.byteorder == "little" else "I;16B": np.int16, "F": np.float32}
mode_to_nptype = {"I": np.int32, "I;16" if sys.byteorder == "little" else "I;16B": np.uint16, "F": np.float32}
img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True))

if pic.mode == "1":
Expand Down
Loading