Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion neuracore/core/endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
from neuracore.core.get_latest_sync_point import get_latest_sync_point
from neuracore.core.utils.download import download_with_progress
from neuracore.ml.logging.endpoint_log_streamer import EndpointLogStreamer
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.utils.endpoint_storage_handler import EndpointStorageHandler
from neuracore.ml.utils.preprocessing_utils import PreprocessingConfiguration

from .auth import get_auth
from .const import API_URL, PING_ENDPOINT, PREDICT_ENDPOINT, SET_CHECKPOINT_ENDPOINT
Expand Down
6 changes: 2 additions & 4 deletions neuracore/core/utils/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,8 @@
)
from neuracore.core.exceptions import InsufficientSynchronizedPointError
from neuracore.ml.logging.json_line_formatter import JsonLineLogFormatter
from neuracore.ml.utils.preprocessing_utils import (
PreprocessingConfiguration,
resolve_preprocessing_config,
)
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.utils.preprocessing_utils import resolve_preprocessing_config

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: since it's already in the utils file, no need to call it preprocessing_utils


logger = logging.getLogger(__name__)

Expand Down
6 changes: 2 additions & 4 deletions neuracore/ml/datasets/pytorch_synchronized_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,10 @@
)
from neuracore.ml import BatchedTrainingSamples
from neuracore.ml.datasets.pytorch_neuracore_dataset import PytorchNeuracoreDataset
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.utils.json_serialization import JsonValue, to_json_serializable
from neuracore.ml.utils.memory_monitor import MemoryMonitor
from neuracore.ml.utils.preprocessing_utils import (
PreprocessingConfiguration,
apply_preprocessing_methods,
)
from neuracore.ml.utils.preprocessing_utils import apply_preprocessing_methods

logger = logging.getLogger(__name__)

Expand Down
3 changes: 2 additions & 1 deletion neuracore/ml/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""Preprocessing runtime utilities."""

from .base import PreprocessingMethod
from .base import PreprocessingConfiguration, PreprocessingMethod

__all__ = [
"PreprocessingConfiguration",
"PreprocessingMethod",
]
29 changes: 29 additions & 0 deletions neuracore/ml/preprocessing/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,32 @@ def to_dict(self) -> dict[str, Any]:
params[param_name] = getattr(self, param_name, None)

return {"_target_": target_name, **params}

def __str__(self) -> str:
"""Return a human-readable representation of the preprocessing method."""
params = {k: v for k, v in self.to_dict().items() if k != "_target_"}
param_str = ", ".join(f"{name}={value!r}" for name, value in params.items())
return f"{self.__class__.__name__}({param_str})"

def __repr__(self) -> str:
"""Return a human-readable representation for debugging."""
return self.__str__()
Comment on lines +41 to +49

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel __str__ and __repr__ should be flipped here.

__repr__ shoul tell you more about the class, __str__ is supposed to be a easy to interpret string representation,



class PreprocessingConfiguration(dict[DataType, list[PreprocessingMethod]]):
"""Runtime preprocessing pipeline keyed by data type."""

def __str__(self) -> str:
"""Return a human-readable representation of the preprocessing pipeline."""
if not self:
return "PreprocessingConfiguration({})"
lines = []
for data_type in sorted(self, key=lambda dt: dt.value):
methods = self[data_type]
method_strs = ", ".join(str(method) for method in methods)
lines.append(f" {data_type.value}: [{method_strs}]")
return "PreprocessingConfiguration({\n" + "\n".join(lines) + "\n})"

def __repr__(self) -> str:
"""Return a human-readable representation for debugging."""
return self.__str__()
6 changes: 2 additions & 4 deletions neuracore/ml/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from neuracore.ml.logging.cloud_training_logger import CloudTrainingLogger
from neuracore.ml.logging.json_line_formatter import JsonLineLogFormatter
from neuracore.ml.logging.tensorboard_training_logger import TensorboardTrainingLogger
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.trainers.batch_autotuner import (
find_optimal_batch_size,
is_valid_batch_size,
Expand All @@ -49,10 +50,7 @@
from neuracore.ml.utils.algorithm_loader import AlgorithmLoader
from neuracore.ml.utils.algorithm_storage_handler import AlgorithmStorageHandler
from neuracore.ml.utils.device_utils import cpu_count, get_default_device
from neuracore.ml.utils.preprocessing_utils import (
PreprocessingConfiguration,
resolve_preprocessing_config,
)
from neuracore.ml.utils.preprocessing_utils import resolve_preprocessing_config
from neuracore.ml.utils.training_config import (
resolve_to_complete_config,
resolve_user_input_config,
Expand Down
14 changes: 8 additions & 6 deletions neuracore/ml/utils/nc_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@
from omegaconf import OmegaConf

from neuracore.ml.core.neuracore_model import NeuracoreModel
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.utils.algorithm_loader import AlgorithmLoader
from neuracore.ml.utils.device_utils import get_default_device
from neuracore.ml.utils.json_serialization import to_json_serializable
from neuracore.ml.utils.preprocessing_utils import (
PreprocessingConfiguration,
resolve_preprocessing_config,
)
from neuracore.ml.utils.preprocessing_utils import resolve_preprocessing_config

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -387,7 +385,9 @@ def load_model_from_nc_archive(
_archive_path(extracted_files, "output_cross_embodiment_description")
) as f:
output_cross_embodiment_description = json.load(f)
input_preprocessing_config: PreprocessingConfiguration = {}
input_preprocessing_config: PreprocessingConfiguration = (
PreprocessingConfiguration()
)
if "input_preprocessing_config" in extracted_files:
with open(
_archive_path(extracted_files, "input_preprocessing_config")
Expand All @@ -401,7 +401,9 @@ def load_model_from_nc_archive(
logger.warning(
"Input preprocessing config in model archive is empty"
)
output_preprocessing_config: PreprocessingConfiguration = {}
output_preprocessing_config: PreprocessingConfiguration = (
PreprocessingConfiguration()
)
if "output_preprocessing_config" in extracted_files:
with open(
_archive_path(extracted_files, "output_preprocessing_config")
Expand Down
2 changes: 1 addition & 1 deletion neuracore/ml/utils/policy_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
)
from neuracore.core.utils.http_session import thread_local_session
from neuracore.ml import BatchedInferenceInputs
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.utils.device_utils import get_default_device
from neuracore.ml.utils.nc_archive import load_model_from_nc_archive
from neuracore.ml.utils.preprocessing_utils import (
PreprocessingConfiguration,
apply_preprocessing_methods,
validate_preprocessing_configuration,
)
Expand Down
11 changes: 6 additions & 5 deletions neuracore/ml/utils/preprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
if TYPE_CHECKING:
from neuracore_types import BatchedNCData

from neuracore.ml.preprocessing.base import PreprocessingMethod

PreprocessingConfiguration = dict[DataType, list[PreprocessingMethod]]
from neuracore.ml.preprocessing.base import (
PreprocessingConfiguration,
PreprocessingMethod,
)


def validate_preprocessing_configuration(
Expand Down Expand Up @@ -56,10 +57,10 @@ def resolve_preprocessing_config(
from hydra.utils import instantiate

preprocessing_methods = instantiate(config_dict, _convert_="all")
resolved_config = {
resolved_config = PreprocessingConfiguration({
DataType(data_type): methods
for data_type, methods in preprocessing_methods.items()
}
})
validate_preprocessing_configuration(preprocessing_config=resolved_config)
return resolved_config

Expand Down
10 changes: 7 additions & 3 deletions neuracore/ml/utils/training_storage_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
from neuracore.core.config.get_current_org import get_current_org
from neuracore.core.const import API_URL
from neuracore.core.utils.http_session import thread_local_session
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.utils.nc_archive import create_nc_archive
from neuracore.ml.utils.preprocessing_utils import PreprocessingConfiguration
from neuracore.ml.utils.upload_storage_mixin import UploadStorageMixin

logger = logging.getLogger(__name__)
Expand All @@ -30,8 +30,12 @@ def __init__(
algorithm_config: dict = {},
input_cross_embodiment_description: dict[str, Any] = {},
output_cross_embodiment_description: dict[str, Any] = {},
input_preprocessing_config: PreprocessingConfiguration = {},
output_preprocessing_config: PreprocessingConfiguration = {},
input_preprocessing_config: PreprocessingConfiguration = (
PreprocessingConfiguration()
),
output_preprocessing_config: PreprocessingConfiguration = (
PreprocessingConfiguration()
),
) -> None:
"""Initialize the storage handler.

Expand Down
22 changes: 13 additions & 9 deletions neuracore/ml/utils/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@

import neuracore as nc
from neuracore.ml.logging.json_line_formatter import JsonLineLogFormatter
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.preprocessing.methods.resize_pad import ResizePad
from neuracore.ml.utils.device_utils import get_default_device
from neuracore.ml.utils.preprocessing_utils import PreprocessingConfiguration

from ..core.ml_types import BatchedTrainingOutputs, BatchedTrainingSamples
from ..datasets.pytorch_dummy_dataset import MAX_LEN_PER_DATA_TYPE, PytorchDummyDataset
Expand Down Expand Up @@ -150,14 +150,18 @@ def run_validation(
logger.info(f"Supported output data types: {supported_output_data_types}")

# Build validation preprocessing configuration
input_preprocessing_config: PreprocessingConfiguration = {
DataType.RGB_IMAGES: [ResizePad(size=(224, 224))],
DataType.DEPTH_IMAGES: [ResizePad(size=(224, 224))],
}
output_preprocessing_config: PreprocessingConfiguration = {
DataType.RGB_IMAGES: [ResizePad(size=(224, 224))],
DataType.DEPTH_IMAGES: [ResizePad(size=(224, 224))],
}
input_preprocessing_config: PreprocessingConfiguration = (
PreprocessingConfiguration({
DataType.RGB_IMAGES: [ResizePad(size=(224, 224))],
DataType.DEPTH_IMAGES: [ResizePad(size=(224, 224))],
})
)
output_preprocessing_config: PreprocessingConfiguration = (
PreprocessingConfiguration({
DataType.RGB_IMAGES: [ResizePad(size=(224, 224))],
DataType.DEPTH_IMAGES: [ResizePad(size=(224, 224))],
})
)

# Create dummy cross-embodiment descriptions
input_cross_embodiment_description = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
PytorchSynchronizedDataset,
_cacheable_cross_embodiment_description,
)
from neuracore.ml.preprocessing.base import PreprocessingConfiguration
from neuracore.ml.preprocessing.methods.resize_pad import ResizePad
from neuracore.ml.utils.preprocessing_utils import PreprocessingConfiguration

DATA_ITEMS = 3

Expand Down
23 changes: 23 additions & 0 deletions tests/unit/ml/preprocessing/test_preprocessing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,26 @@ def test_resolve_preprocessing_config_to_dict_is_json_serializable():
# before the fix.
serialized = json.dumps(method.to_dict())
assert '"size"' in serialized


def test_preprocessing_configuration_str_is_human_readable():
pytest.importorskip("hydra")
pytest.importorskip("torch")
cfg = OmegaConf.create({
"RGB_IMAGES": [{
"_target_": "neuracore.ml.preprocessing.methods.resize_pad.ResizePad",
"size": [224, 224],
}],
"DEPTH_IMAGES": [{
"_target_": "neuracore.ml.preprocessing.methods.resize_pad.ResizePad",
"size": [224, 224],
}],
})
resolved = resolve_preprocessing_config(cfg)

rendered = str(resolved)
assert "PreprocessingConfiguration({" in rendered
assert "RGB_IMAGES: [ResizePad(size=[224, 224])]" in rendered
assert "DEPTH_IMAGES: [ResizePad(size=[224, 224])]" in rendered
assert "object at 0x" not in rendered
assert "DataType." not in rendered
Comment on lines +61 to +65

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good idea to assert the whole string here for strict correctness.

Loading