From 8864dc0a2ff798510966943980d95c21e368eebb Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Sun, 29 Mar 2026 11:06:54 +0200 Subject: [PATCH 1/8] handle I;16 mode in pil_to_tensor by converting to int32 --- test/test_transforms_v2.py | 12 ++++++++++++ torchvision/transforms/functional.py | 7 +++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 1c9bfc772b3..f75caccb42c 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -6879,6 +6879,18 @@ def test_functional_error(self): F.pil_to_tensor(object()) +@pytest.mark.parametrize("f", [F.to_tensor, F.pil_to_tensor]) +def test_I16_to_tensor(f): + # See https://github.com/pytorch/vision/issues/8188 + I16_pil_img = PIL.Image.fromarray(np.random.randint(0, 2**16, (10, 10), dtype=np.uint16)) + assert I16_pil_img.mode == "I;16" + + cm = pytest.warns(UserWarning, match="deprecated") if f is F.to_tensor else contextlib.nullcontext() + with cm: + out = f(I16_pil_img) + assert out.dtype == torch.int32 + + @needs_cvcuda class TestToCVCUDATensor: @pytest.mark.parametrize("image_type", (torch.Tensor, tv_tensors.Image)) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 7b950b0c45b..112f83cc101 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -164,7 +164,7 @@ def to_tensor(pic: Union[PILImage, np.ndarray]) -> Tensor: return torch.from_numpy(nppic).to(dtype=default_float_dtype) # handle PIL Image - mode_to_nptype = {"I": np.int32, "I;16" if sys.byteorder == "little" else "I;16B": np.int16, "F": np.float32} + mode_to_nptype = {"I": np.int32, "I;16" if sys.byteorder == "little" else "I;16B": np.int32, "F": np.float32} img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)) if pic.mode == "1": @@ -206,7 +206,10 @@ def pil_to_tensor(pic: Any) -> Tensor: return torch.as_tensor(nppic) # handle PIL Image - img = torch.as_tensor(np.array(pic, copy=True)) + img = np.array(pic, copy=True) + if pic.mode == "I;16": + img = img.astype(np.int32) + img = torch.as_tensor(img) img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic)) # put it from HWC to CHW format img = img.permute((2, 0, 1)) From 088b3397b34a8a6ac018bfec7b0fd234dfc866d2 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Sun, 29 Mar 2026 11:20:01 +0200 Subject: [PATCH 2/8] retrigger cla check From 2ad73ef114e22039d7b1d451e49d45f55beb279d Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Mon, 30 Mar 2026 18:43:06 +0200 Subject: [PATCH 3/8] use uint16 instead of int32 for I;16 mode as suggested --- test/test_transforms_v2.py | 2 +- torchvision/transforms/functional.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index f75caccb42c..78c95240a22 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -6888,7 +6888,7 @@ def test_I16_to_tensor(f): cm = pytest.warns(UserWarning, match="deprecated") if f is F.to_tensor else contextlib.nullcontext() with cm: out = f(I16_pil_img) - assert out.dtype == torch.int32 + assert out.dtype == torch.uint16 @needs_cvcuda diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 112f83cc101..fc0a612d592 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -164,7 +164,7 @@ def to_tensor(pic: Union[PILImage, np.ndarray]) -> Tensor: return torch.from_numpy(nppic).to(dtype=default_float_dtype) # handle PIL Image - mode_to_nptype = {"I": np.int32, "I;16" if sys.byteorder == "little" else "I;16B": np.int32, "F": np.float32} + mode_to_nptype = {"I": np.int32, "I;16" if sys.byteorder == "little" else "I;16B": np.uint16, "F": np.float32} img = torch.from_numpy(np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)) if pic.mode == "1": @@ -208,7 +208,7 @@ def pil_to_tensor(pic: Any) -> Tensor: # handle PIL Image img = np.array(pic, copy=True) if pic.mode == "I;16": - img = img.astype(np.int32) + img = img.astype(np.uint16) img = torch.as_tensor(img) img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic)) # put it from HWC to CHW format From 71cf6220f639674c189788d91e8c244cb54e66b1 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Wed, 1 Apr 2026 21:13:35 +0200 Subject: [PATCH 4/8] fix test expected output to match uint16 round-trip through I;16 PIL mode --- test/test_transforms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_transforms.py b/test/test_transforms.py index d93800d59bc..32bf139e8a9 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -615,7 +615,7 @@ def _get_1_channel_tensor_various_types(): yield img_data_byte, expected_output, "L" img_data_short = torch.ShortTensor(1, 4, 4).random_() - expected_output = img_data_short.numpy() + expected_output = img_data_short.numpy().view(np.uint16) yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B" img_data_int = torch.IntTensor(1, 4, 4).random_() @@ -632,7 +632,7 @@ def _get_2d_tensor_various_types(): yield img_data_byte, expected_output, "L" img_data_short = torch.ShortTensor(4, 4).random_() - expected_output = img_data_short.numpy() + expected_output = img_data_short.numpy().view(np.uint16) yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B" img_data_int = torch.IntTensor(4, 4).random_() From 1fe1709a960ed7062ad4ff5699fe5ab394a5d3da Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Wed, 1 Apr 2026 21:25:05 +0200 Subject: [PATCH 5/8] use non-negative values for I;16 test data to match unsigned semantics --- test/test_transforms.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_transforms.py b/test/test_transforms.py index 32bf139e8a9..5b2658c99a4 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -614,8 +614,8 @@ def _get_1_channel_tensor_various_types(): expected_output = img_data_byte.float().div(255.0).numpy() yield img_data_byte, expected_output, "L" - img_data_short = torch.ShortTensor(1, 4, 4).random_() - expected_output = img_data_short.numpy().view(np.uint16) + img_data_short = torch.ShortTensor(1, 4, 4).random_(0, 32767) + expected_output = img_data_short.numpy().astype(np.uint16) yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B" img_data_int = torch.IntTensor(1, 4, 4).random_() @@ -631,8 +631,8 @@ def _get_2d_tensor_various_types(): expected_output = img_data_byte.float().div(255.0).numpy() yield img_data_byte, expected_output, "L" - img_data_short = torch.ShortTensor(4, 4).random_() - expected_output = img_data_short.numpy().view(np.uint16) + img_data_short = torch.ShortTensor(4, 4).random_(0, 32767) + expected_output = img_data_short.numpy().astype(np.uint16) yield img_data_short, expected_output, "I;16" if sys.byteorder == "little" else "I;16B" img_data_int = torch.IntTensor(4, 4).random_() From 3fbe21e9d1cac5e4307ea7b2120f851ebf81712e Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Thu, 2 Apr 2026 15:36:37 +0200 Subject: [PATCH 6/8] remove redundant astype in pil_to_tensor --- torchvision/transforms/functional.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index fc0a612d592..dacdd283bfb 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -206,10 +206,7 @@ def pil_to_tensor(pic: Any) -> Tensor: return torch.as_tensor(nppic) # handle PIL Image - img = np.array(pic, copy=True) - if pic.mode == "I;16": - img = img.astype(np.uint16) - img = torch.as_tensor(img) + img = torch.as_tensor(np.array(pic, copy=True)) img = img.view(pic.size[1], pic.size[0], F_pil.get_image_num_channels(pic)) # put it from HWC to CHW format img = img.permute((2, 0, 1)) From aefaae12effd48dc781dbe3283f63c7aa2180b75 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Thu, 2 Apr 2026 16:15:58 +0200 Subject: [PATCH 7/8] add docs note about uint16/32/64 not being officially supported --- docs/source/transforms.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index 529815ead9a..e360aee4a64 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -99,6 +99,15 @@ have values in ``[0, MAX_DTYPE]`` where ``MAX_DTYPE`` is the largest value that can be represented in that dtype. Typically, images of dtype ``torch.uint8`` are expected to have values in ``[0, 255]``. +.. note:: + + ``torch.uint16``, ``torch.uint32``, and ``torch.uint64`` dtypes are not + officially supported by the torchvision transforms. While some operations + may work, most transforms expect ``torch.uint8`` or ``torch.float32`` + inputs. If you're working with uint16 images (e.g. from 16-bit medical or + scientific imaging), consider converting to ``float32`` first using + :class:`~torchvision.transforms.v2.ToDtype`. + Use :class:`~torchvision.transforms.v2.ToDtype` to convert both the dtype and range of the inputs. From 2813bc12bbc0e6f517bc678d34a73f0c2adb24e1 Mon Sep 17 00:00:00 2001 From: knQzx <75641500+knQzx@users.noreply.github.com> Date: Tue, 14 Apr 2026 23:15:28 +0200 Subject: [PATCH 8/8] move uint16 note after ToDtype line and drop float32 specifics --- docs/source/transforms.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/docs/source/transforms.rst b/docs/source/transforms.rst index e360aee4a64..bd2b9cf540d 100644 --- a/docs/source/transforms.rst +++ b/docs/source/transforms.rst @@ -99,17 +99,15 @@ have values in ``[0, MAX_DTYPE]`` where ``MAX_DTYPE`` is the largest value that can be represented in that dtype. Typically, images of dtype ``torch.uint8`` are expected to have values in ``[0, 255]``. +Use :class:`~torchvision.transforms.v2.ToDtype` to convert both the dtype and +range of the inputs. + .. note:: ``torch.uint16``, ``torch.uint32``, and ``torch.uint64`` dtypes are not officially supported by the torchvision transforms. While some operations may work, most transforms expect ``torch.uint8`` or ``torch.float32`` - inputs. If you're working with uint16 images (e.g. from 16-bit medical or - scientific imaging), consider converting to ``float32`` first using - :class:`~torchvision.transforms.v2.ToDtype`. - -Use :class:`~torchvision.transforms.v2.ToDtype` to convert both the dtype and -range of the inputs. + inputs. .. _v1_or_v2: