From 9ff258cd28b3be2eaaf15132b1cbf52c6a2d8fca Mon Sep 17 00:00:00 2001 From: ddl-rliu <140021987+ddl-rliu@users.noreply.github.com> Date: Mon, 6 Apr 2026 15:33:18 -0700 Subject: [PATCH] Revert "Add FileDownloadConfig annotation for FlyteFile inputs (1.16.4) (#9)" This reverts commit e30a8edfa72124787aa041c9dffa0d9c315a7a72. --- flytekit/core/type_engine.py | 48 ------------------- flytekit/models/core/types.py | 40 ++-------------- flytekit/types/file/file.py | 35 ++------------ pyproject.toml | 3 +- tests/flytekit/unit/core/test_flyte_file.py | 30 +----------- tests/flytekit/unit/models/core/test_types.py | 22 --------- 6 files changed, 9 insertions(+), 169 deletions(-) diff --git a/flytekit/core/type_engine.py b/flytekit/core/type_engine.py index d865cef300..58ba0b8556 100644 --- a/flytekit/core/type_engine.py +++ b/flytekit/core/type_engine.py @@ -10,7 +10,6 @@ import json import mimetypes import os -import re import sys import textwrap import threading @@ -105,53 +104,6 @@ def get_batch_size(t: Type) -> Optional[int]: return None -class FileDownloadConfig: - """ - This is used to annotate a FlyteFile when we want to download the file with a specific extension. For example, - - ```python - # ContainerTask - def t1(file: Annotated[FlyteFile, FileDownloadConfig(file_extension="csv")]): - ... # copilot downloads the file to e.g. /inputs/file.csv - - versus... - - def t1(file: FlyteFile["csv"]): - ... # copilot downloads the file to e.g. /inputs/file - ``` - - file_extension: (Default is "") The file extension (e.g. "csv", "parquet") to use during copilot download. - enable_legacy_filename: (Default is False) When true and file_extension is non-empty, the copilot download phase - writes the blob to both the full path (with extension) and the old path (without extension), preserving backward compatibility for - workflows with tasks that may read from both. - """ - - def __init__(self, file_extension: str = "", enable_legacy_filename: bool = False): - self._file_extension = file_extension - self._enable_legacy_filename = enable_legacy_filename - - if self._file_extension is not "": - pattern = r"^[a-zA-Z0-9]+(\.[a-zA-Z0-9]+)*$" - if not re.match(pattern, self._file_extension): - raise ValueError(f"Invalid file extension: {self._file_extension}") - - @property - def file_extension(self) -> str: - return self._file_extension - - @property - def enable_legacy_filename(self) -> bool: - return self._enable_legacy_filename - - -def get_file_download_config(t: Type) -> Optional[FileDownloadConfig]: - if is_annotated(t): - for arg in get_args(t): - if isinstance(arg, FileDownloadConfig): - return arg - return None - - def modify_literal_uris(lit: Literal): """ Modifies the literal object recursively to replace the URIs with the native paths in case they are of diff --git a/flytekit/models/core/types.py b/flytekit/models/core/types.py index e01068f95e..4508961bbc 100644 --- a/flytekit/models/core/types.py +++ b/flytekit/models/core/types.py @@ -38,19 +38,13 @@ class BlobDimensionality(object): SINGLE = _types_pb2.BlobType.SINGLE MULTIPART = _types_pb2.BlobType.MULTIPART - def __init__(self, format, dimensionality, file_extension="", enable_legacy_filename=False): + def __init__(self, format, dimensionality): """ :param Text format: A string describing the format of the underlying blob data. :param int dimensionality: An integer from BlobType.BlobDimensionality enum - :param Text file_extension: The file extension (e.g. "csv", "parquet") to use - during copilot download, e.g. "csv", "parquet". Empty by default. - :param bool enable_legacy_filename: When True and file_extension is set, the copilot - download phase writes the blob to both the extended path and the base path. """ self._format = format self._dimensionality = dimensionality - self._file_extension = file_extension - self._enable_legacy_filename = enable_legacy_filename @property def format(self): @@ -68,34 +62,11 @@ def dimensionality(self): """ return self._dimensionality - @property - def file_extension(self): - """ - The file extension (e.g. "csv", "parquet") to use during copilot download. - Default is "", which means no extension is appended. - :rtype: Text - """ - return self._file_extension - - @property - def enable_legacy_filename(self): - """ - When True and file_extension is set, the copilot download writes the blob to - both the full path (with extension) and the old path (without extension). - :rtype: bool - """ - return self._enable_legacy_filename - def to_flyte_idl(self): """ :rtype: flyteidl.core.types_pb2.BlobType """ - return _types_pb2.BlobType( - format=self.format, - dimensionality=self.dimensionality, - file_extension=self._file_extension, - enable_legacy_filename=self._enable_legacy_filename, - ) + return _types_pb2.BlobType(format=self.format, dimensionality=self.dimensionality) @classmethod def from_flyte_idl(cls, proto): @@ -103,9 +74,4 @@ def from_flyte_idl(cls, proto): :param flyteidl.core.types_pb2.BlobType proto: :rtype: BlobType """ - return cls( - format=proto.format, - dimensionality=proto.dimensionality, - file_extension=proto.file_extension, - enable_legacy_filename=proto.enable_legacy_filename, - ) + return cls(format=proto.format, dimensionality=proto.dimensionality) diff --git a/flytekit/types/file/file.py b/flytekit/types/file/file.py index 53309f7dab..780188f9e5 100644 --- a/flytekit/types/file/file.py +++ b/flytekit/types/file/file.py @@ -24,7 +24,6 @@ AsyncTypeTransformer, TypeEngine, TypeTransformerFailedError, - get_file_download_config, get_underlying_type, ) from flytekit.exceptions.user import FlyteAssertion @@ -463,26 +462,8 @@ def get_format(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: return "" return cast(FlyteFile, t).extension() - @staticmethod - def get_file_extension(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: - if t is os.PathLike: - return "" - file_download_config = get_file_download_config(t) - if file_download_config is None: - return "" - return file_download_config.file_extension or "" - - @staticmethod - def get_enable_legacy_filename(t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> str: - if t is os.PathLike: - return False - file_download_config = get_file_download_config(t) - if file_download_config is None: - return False - return file_download_config.enable_legacy_filename or False - - def _blob_type(self, format: str, file_extension: str = "", enable_legacy_filename: bool = False) -> BlobType: - return BlobType(format=format, dimensionality=BlobType.BlobDimensionality.SINGLE, file_extension=file_extension, enable_legacy_filename=enable_legacy_filename) + def _blob_type(self, format: str) -> BlobType: + return BlobType(format=format, dimensionality=BlobType.BlobDimensionality.SINGLE) def assert_type( self, t: typing.Union[typing.Type[FlyteFile], os.PathLike], v: typing.Union[FlyteFile, os.PathLike, str] @@ -495,11 +476,7 @@ def assert_type( ) def get_literal_type(self, t: typing.Union[typing.Type[FlyteFile], os.PathLike]) -> LiteralType: - return LiteralType(blob=self._blob_type( - format=FlyteFilePathTransformer.get_format(t), - file_extension=FlyteFilePathTransformer.get_file_extension(t), - enable_legacy_filename=FlyteFilePathTransformer.get_enable_legacy_filename(t), - )) + return LiteralType(blob=self._blob_type(format=FlyteFilePathTransformer.get_format(t))) def get_mime_type_from_extension(self, extension: str) -> typing.Union[str, typing.Sequence[str]]: extension_to_mime_type = { @@ -573,11 +550,7 @@ async def async_to_literal( raise ValueError(f"Incorrect type {python_type}, must be either a FlyteFile or os.PathLike") # information used by all cases - meta = BlobMetadata(type=self._blob_type( - format=FlyteFilePathTransformer.get_format(python_type), - file_extension=FlyteFilePathTransformer.get_file_extension(python_type), - enable_legacy_filename=FlyteFilePathTransformer.get_enable_legacy_filename(python_type), - )) + meta = BlobMetadata(type=self._blob_type(format=FlyteFilePathTransformer.get_format(python_type))) if isinstance(python_val, FlyteFile): # Cast the source path to str type to avoid error raised when the source path is used as the blob uri, diff --git a/pyproject.toml b/pyproject.toml index 32c9842585..7ed9400d37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,8 +20,7 @@ dependencies = [ "diskcache>=5.2.1", "docker>=4.0.0", "docstring-parser>=0.9.0", - # Points to the 1.16.4-domino branch - "flyteidl @ git+https://github.com/dominodatalab/flyte.git@f638b2661963885651dd3be8cc9b958010f041cf#subdirectory=flyteidl", + "flyteidl>=1.15.4b0,<2.0.0a0", "fsspec>=2023.3.0", # Bug in 2025.5.0, 2025.5.0post1 https://github.com/fsspec/gcsfs/issues/687 # Bug in 2024.2.0 https://github.com/fsspec/gcsfs/pull/643 diff --git a/tests/flytekit/unit/core/test_flyte_file.py b/tests/flytekit/unit/core/test_flyte_file.py index 91158fb229..fb0903c567 100644 --- a/tests/flytekit/unit/core/test_flyte_file.py +++ b/tests/flytekit/unit/core/test_flyte_file.py @@ -17,7 +17,7 @@ from flytekit.core.hash import HashMethod from flytekit.core.launch_plan import LaunchPlan from flytekit.core.task import task -from flytekit.core.type_engine import FileDownloadConfig, TypeEngine +from flytekit.core.type_engine import TypeEngine from flytekit.core.workflow import workflow from flytekit.models.core.types import BlobType from flytekit.models.literals import LiteralMap, Blob, BlobMetadata @@ -764,34 +764,6 @@ def test_headers(): assert len(FlyteFilePathTransformer.get_additional_headers(".gz")) == 1 -def test_transform_flytefile_with_file_download_config(): - csv_file_no_config = FlyteFile["csv"] - lt = FlyteFilePathTransformer().get_literal_type(csv_file_no_config) - assert lt.blob.file_extension == "" - assert lt.blob.enable_legacy_filename == False - - legacy_file = Annotated[FlyteFile["csv"], FileDownloadConfig(file_extension="csv", enable_legacy_filename=True)] - lt = FlyteFilePathTransformer().get_literal_type(legacy_file) - assert lt.blob.file_extension == "csv" - assert lt.blob.enable_legacy_filename == True - - -def test_file_download_config_valid_compound_extension(): - config = FileDownloadConfig(file_extension="tar.gz") - assert config.file_extension == "tar.gz" - - -@pytest.mark.parametrize("bad_ext", [ - ".csv", - "my file", - "../../escape", - "csv!", -]) -def test_file_download_config_rejects_invalid_extensions(bad_ext): - with pytest.raises(ValueError, match="Invalid file extension"): - FileDownloadConfig(file_extension=bad_ext) - - def test_new_remote_file(): nf = FlyteFile.new_remote_file(name="foo.txt") assert isinstance(nf, FlyteFile) diff --git a/tests/flytekit/unit/models/core/test_types.py b/tests/flytekit/unit/models/core/test_types.py index bf4124eb67..21d6cea396 100644 --- a/tests/flytekit/unit/models/core/test_types.py +++ b/tests/flytekit/unit/models/core/test_types.py @@ -15,33 +15,11 @@ def test_blob_type(): ) assert o.format == "csv" assert o.dimensionality == _types.BlobType.BlobDimensionality.SINGLE - assert o.file_extension == "" - assert o.enable_legacy_filename == False o2 = _types.BlobType.from_flyte_idl(o.to_flyte_idl()) assert o == o2 assert o2.format == "csv" assert o2.dimensionality == _types.BlobType.BlobDimensionality.SINGLE - assert o2.file_extension == "" - assert o2.enable_legacy_filename == False - - o = _types.BlobType( - format="csv", - dimensionality=_types.BlobType.BlobDimensionality.SINGLE, - file_extension="csv", - enable_legacy_filename=True, - ) - assert o.format == "csv" - assert o.dimensionality == _types.BlobType.BlobDimensionality.SINGLE - assert o.file_extension == "csv" - assert o.enable_legacy_filename == True - - o2 = _types.BlobType.from_flyte_idl(o.to_flyte_idl()) - assert o == o2 - assert o2.format == "csv" - assert o2.dimensionality == _types.BlobType.BlobDimensionality.SINGLE - assert o2.file_extension == "csv" - assert o2.enable_legacy_filename == True def test_enum_type():