diff --git a/cookiecutter.json b/cookiecutter.json deleted file mode 100644 index 8df2587f..00000000 --- a/cookiecutter.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "__please_use_databricks_cli_for_project_creation": "Please refer to README.md" -} diff --git a/template/update_layout.tmpl b/template/update_layout.tmpl index a666e23c..006aec52 100644 --- a/template/update_layout.tmpl +++ b/template/update_layout.tmpl @@ -35,22 +35,22 @@ # Remove Delta and Feature Store code in cases of MLflow Recipes. {{ if (eq .input_include_mlflow_recipes `yes`) }} # delta_paths - {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/Train.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/Train.py`) }} # feature_store_paths {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `feature_engineering`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/feature_engineering`) }} - {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithFeatureStore.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/TrainWithFeatureStore.py`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `resources/feature-engineering-workflow-resource.yml`) }} # Remove Delta and MLflow Recipes code in cases of Feature Store. {{ else if (eq .input_include_feature_store `yes`) }} # delta_paths - {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/Train.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/Train.py`) }} # recipe_paths {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/profiles`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/steps`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/data`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/__init__.py`) }} - {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithMLflowRecipes.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/TrainWithMLflowRecipes.py`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/recipe.yaml`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/README.md`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/ingest_test.py`) }} @@ -65,7 +65,7 @@ {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/steps`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/data`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/__init__.py`) }} - {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithMLflowRecipes.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/TrainWithMLflowRecipes.py`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/recipe.yaml`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/README.md`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/ingest_test.py`) }} @@ -76,7 +76,7 @@ # feature_store_paths {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `feature_engineering`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/feature_engineering`) }} - {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithFeatureStore.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/TrainWithFeatureStore.py`) }} {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `resources/feature-engineering-workflow-resource.yml`) }} {{ end }} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl index cdb7f98e..b84e027a 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/README.md.tmpl @@ -146,7 +146,7 @@ In each module, there is `compute_features_fn` method that you need to implement The output dataframe will be persisted in a [time-series Feature Store table]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "machine-learning/feature-store/time-series.html")) }}). See the example modules' documentation for more information. * Python unit tests for feature computation modules in `tests/feature_engineering` folder. -* Feature engineering notebook, `feature_engineering/notebooks/GenerateAndWriteFeatures.py`, that reads input dataframes, dynamically loads feature computation modules, executes their `compute_features_fn` method and writes the outputs to a Feature Store table (creating it if missing). +* Feature engineering notebook, `feature_engineering/GenerateAndWriteFeatures.py`, that reads input dataframes, dynamically loads feature computation modules, executes their `compute_features_fn` method and writes the outputs to a Feature Store table (creating it if missing). * Training notebook that [trains]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "machine-learning/feature-store/train-models-with-feature-store.html")) }} ) a regression model by creating a training dataset using the Feature Store client. * Model deployment and batch inference notebooks that deploy and use the trained model. * An automated integration test is provided (in `.github/workflows/{{ .input_project_name }}-run-tests.yml`) that executes a multi task run on Databricks involving the feature engineering and model training notebooks. @@ -200,7 +200,7 @@ Otherwise, e.g. if iterating on ML code for a new project, follow the steps belo You can iterate on the feature transform modules locally in your favorite IDE before running them on Databricks. #### Running code on Databricks -You can iterate on ML code by running the provided `feature_engineering/notebooks/GenerateAndWriteFeatures.py` notebook on Databricks using +You can iterate on ML code by running the provided `feature_engineering/GenerateAndWriteFeatures.py` notebook on Databricks using [Repos]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "repos/index.html")) }}). This notebook drives execution of the feature transforms code defined under ``features``. You can use multiple browser tabs to edit logic in `features` and run the feature engineering pipeline in the `GenerateAndWriteFeatures.py` notebook. diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/BatchInference.py.tmpl similarity index 85% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/BatchInference.py.tmpl index 99bff2b9..8f893a6b 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/BatchInference.py.tmpl @@ -39,14 +39,7 @@ dbutils.widgets.text( # COMMAND ---------- -import os - -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path - -# COMMAND ---------- - -# MAGIC %pip install -r ../../../requirements.txt +# MAGIC %pip install -r ../../requirements.txt # COMMAND ---------- @@ -54,18 +47,7 @@ dbutils.library.restartPython() # COMMAND ---------- -import sys -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path -%cd .. -sys.path.append("../..") - -# COMMAND ---------- - # DBTITLE 1,Define input and output variables -{{- if (eq .input_include_models_in_unity_catalog "no") }} -from utils import get_deployed_model_stage_for_env{{end}} env = dbutils.widgets.get("env") input_table_name = dbutils.widgets.get("input_table_name") @@ -75,7 +57,7 @@ assert input_table_name != "", "input_table_name notebook parameter must be spec assert output_table_name != "", "output_table_name notebook parameter must be specified" assert model_name != "", "model_name notebook parameter must be specified" {{- if (eq .input_include_models_in_unity_catalog "no") }} -stage = get_deployed_model_stage_for_env(env) +stage = {"dev": "Staging", "staging": "Staging", "prod": "Production", "test": "Production"}[env] model_uri = f"models:/{model_name}/{stage}"{{else}} alias = "champion" model_uri = f"models:/{model_name}@{alias}"{{end}} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/ModelDeployment.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/ModelDeployment.py.tmpl similarity index 90% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/ModelDeployment.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/ModelDeployment.py.tmpl index 64f526ce..951ba945 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/ModelDeployment.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/ModelDeployment.py.tmpl @@ -29,15 +29,6 @@ dbutils.widgets.dropdown("env", "None", ["None", "staging", "prod"], "Environmen # COMMAND ---------- -import os -import sys -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path -%cd .. -sys.path.append("../..") - -# COMMAND ---------- - from deploy import deploy model_uri = dbutils.jobs.taskValues.get("Train", "model_uri", debugValue="") diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl index f60c9617..c2677996 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/deploy.py.tmpl @@ -1,8 +1,4 @@ import sys -import pathlib - -sys.path.append(str(pathlib.Path(__file__).parent.parent.parent.resolve())) -{{if (eq .input_include_models_in_unity_catalog "no")}}from utils import get_deployed_model_stage_for_env{{end}} from mlflow.tracking import MlflowClient {{ if (eq .input_include_models_in_unity_catalog "no") }} @@ -19,7 +15,7 @@ def deploy(model_uri, env): _, model_name, version = model_uri.split("/") client = MlflowClient() mv = client.get_model_version(model_name, version) - target_stage = get_deployed_model_stage_for_env(env) + target_stage = {"dev": "Staging", "staging": "Staging", "prod": "Production", "test": "Production"}[env] if mv.current_stage != target_stage: client.transition_model_version_stage( name=model_name, diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/feature_engineering/notebooks/GenerateAndWriteFeatures.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/feature_engineering/GenerateAndWriteFeatures.py.tmpl similarity index 96% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/feature_engineering/notebooks/GenerateAndWriteFeatures.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/feature_engineering/GenerateAndWriteFeatures.py.tmpl index c8ac612c..262dae93 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/feature_engineering/notebooks/GenerateAndWriteFeatures.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/feature_engineering/GenerateAndWriteFeatures.py.tmpl @@ -63,13 +63,6 @@ dbutils.widgets.text( # COMMAND ---------- -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path -%cd ../features - -# COMMAND ---------- - # DBTITLE 1,Define input and output variables input_table_path = dbutils.widgets.get("input_table_path") output_table_name = dbutils.widgets.get("output_table_name") diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/notebooks/MonitoredMetricViolationCheck.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/MonitoredMetricViolationCheck.py.tmpl similarity index 91% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/notebooks/MonitoredMetricViolationCheck.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/MonitoredMetricViolationCheck.py.tmpl index bfec9085..7db01bf1 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/notebooks/MonitoredMetricViolationCheck.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/MonitoredMetricViolationCheck.py.tmpl @@ -40,15 +40,6 @@ dbutils.widgets.text( # COMMAND ---------- -import os -import sys -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path -%cd .. -sys.path.append("../..") - -# COMMAND ---------- - from metric_violation_check_query import sql_query table_name_under_monitor = dbutils.widgets.get("table_name_under_monitor") diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/metric_violation_check_query.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/metric_violation_check_query.py.tmpl index 1dc40225..7be5eb29 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/metric_violation_check_query.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/monitoring/metric_violation_check_query.py.tmpl @@ -1,10 +1,5 @@ # This file is used for the main SQL query that checks the last {num_evaluation_windows} metric violations and whether at least {num_violation_windows} of those runs violate the condition. -import sys -import pathlib - -sys.path.append(str(pathlib.Path(__file__).parent.parent.parent.resolve())) - """The SQL query is divided into three main parts. The first part selects the top {num_evaluation_windows} values of the metric to be monitored, ordered by the time window, and saves as recent_metrics. ```sql diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl index a76b3ec0..231b1b44 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/README.md.tmpl @@ -163,7 +163,7 @@ Its central purpose is to evaluate a registered model and validate its quality b Model validation contains three components: * [model-workflow-resource.yml](./model-workflow-resource.yml) contains the resource config and input parameters for model validation. * [validation.py](../validation/validation.py) defines custom metrics and validation thresholds that are referenced by the above resource config files. -* [notebooks/ModelValidation](../validation/notebooks/ModelValidation.py) contains the validation job implementation. In most cases you don't need to modify this file. +* [ModelValidation](../validation/ModelValidation.py) contains the validation job implementation. In most cases you don't need to modify this file. To set up and enable model validation, update [validation.py](../validation/validation.py) to return desired custom metrics and validation thresholds, then resolve the `TODOs` in the ModelValidation task of [model-workflow-resource.yml](./model-workflow-resource.yml). @@ -177,9 +177,9 @@ Its central purpose is to track production model performances, feature distribut Monitoring contains four components: * [metric_violation_check_query.py](../monitoring/metric_violation_check_query.py) defines a query that checks for violation of the monitored metric. -* [notebooks/MonitoredMetricViolationCheck](../monitoring/notebooks/MonitoredMetricViolationCheck.py) acts as an entry point, executing the violation check query against the monitored inference table. +* [MonitoredMetricViolationCheck](../monitoring/MonitoredMetricViolationCheck.py) acts as an entry point, executing the violation check query against the monitored inference table. It emits a boolean value based on the query result. -* [monitoring-resource.yml](./monitoring-resource.yml) contains the resource config, inputs parameters for monitoring, and orchestrates model retraining based on monitoring. It first runs the [notebooks/MonitoredMetricViolationCheck](../monitoring/notebooks/MonitoredMetricViolationCheck.py) +* [monitoring-resource.yml](./monitoring-resource.yml) contains the resource config, inputs parameters for monitoring, and orchestrates model retraining based on monitoring. It first runs the [MonitoredMetricViolationCheck](../monitoring/MonitoredMetricViolationCheck.py) entry point then decides whether to execute the model retraining workflow. To set up and enable monitoring: @@ -226,7 +226,7 @@ resources: - task_key: batch_inference_job <<: *new_cluster notebook_task: - notebook_path: ../deployment/batch_inference/notebooks/BatchInference.py + notebook_path: ../deployment/batch_inference/BatchInference.py base_parameters: env: ${bundle.target} input_table_name: batch_inference_input_table_name @@ -234,12 +234,12 @@ resources: ``` The example above defines a Databricks job with name `${bundle.target}-{{ .input_project_name }}-batch-inference-job` -that runs the notebook under `{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py` to regularly apply your ML model for batch inference. +that runs the notebook under `{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/BatchInference.py` to regularly apply your ML model for batch inference. At the start of the resource definition, we declared an anchor `new_cluster` that will be referenced and used later. For more information about anchors in yaml schema, please refer to the [yaml documentation](https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases). We specify a `batch_inference_job` under `resources/jobs` to define a databricks workflow with internal key `batch_inference_job` and job name `{bundle.target}-{{ .input_project_name }}-batch-inference-job`. -The workflow contains a single task with task key `batch_inference_job`. The task runs notebook `../deployment/batch_inference/notebooks/BatchInference.py` with provided parameters `env` and `input_table_name` passing to the notebook. +The workflow contains a single task with task key `batch_inference_job`. The task runs notebook `../deployment/batch_inference/BatchInference.py` with provided parameters `env` and `input_table_name` passing to the notebook. After setting up databricks CLI, you can run command `databricks bundle schema` to learn more about databricks CLI bundles schema. The notebook_path is the relative path starting from the resource yaml file. @@ -281,7 +281,7 @@ resources: - task_key: batch_inference_job <<: *new_cluster notebook_task: - notebook_path: ../deployment/batch_inference/notebooks/BatchInference.py + notebook_path: ../deployment/batch_inference/BatchInference.py base_parameters: env: ${bundle.target} input_table_name: ${var.batch_inference_input_table} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl index e11b6306..74b9fdaa 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl @@ -20,7 +20,7 @@ resources: - task_key: batch_inference_job <<: *new_cluster notebook_task: - notebook_path: ../deployment/batch_inference/notebooks/BatchInference.py + notebook_path: ../deployment/batch_inference/BatchInference.py base_parameters: env: ${bundle.target} {{ if (eq .input_include_feature_store `yes`) }}{{ if (eq .input_include_models_in_unity_catalog `yes`) }}input_table_name: ${bundle.target}.{{ .input_schema_name }}.feature_store_inference_input # TODO: create input table for inference diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl index 8818181e..fc9d8df4 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/feature-engineering-workflow-resource.yml.tmpl @@ -23,7 +23,7 @@ resources: - task_key: PickupFeatures job_cluster_key: write_feature_table_job_cluster notebook_task: - notebook_path: ../feature_engineering/notebooks/GenerateAndWriteFeatures.py + notebook_path: ../feature_engineering/GenerateAndWriteFeatures.py base_parameters: # TODO modify these arguments to reflect your setup. input_table_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled @@ -40,7 +40,7 @@ resources: - task_key: DropoffFeatures job_cluster_key: write_feature_table_job_cluster notebook_task: - notebook_path: ../feature_engineering/notebooks/GenerateAndWriteFeatures.py + notebook_path: ../feature_engineering/GenerateAndWriteFeatures.py base_parameters: # TODO: modify these arguments to reflect your setup. input_table_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl index 3c063b75..84deaebd 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl @@ -23,7 +23,7 @@ resources: - task_key: Train job_cluster_key: model_training_job_cluster {{ if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) }}notebook_task: - notebook_path: ../training/notebooks/Train.py + notebook_path: ../training/Train.py base_parameters: env: ${bundle.target} # TODO: Update training_data_path @@ -34,7 +34,7 @@ resources: # git source information of current ML resource deployment. It will be persisted as part of the workflow run git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} {{ else if (eq .input_include_feature_store `yes`) }}notebook_task: - notebook_path: ../training/notebooks/TrainWithFeatureStore.py + notebook_path: ../training/TrainWithFeatureStore.py base_parameters: env: ${bundle.target} # TODO: Update training_data_path @@ -49,7 +49,7 @@ resources: # git source information of current ML resource deployment. It will be persisted as part of the workflow run git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} {{- else -}}notebook_task: - notebook_path: ../training/notebooks/TrainWithMLflowRecipes.py + notebook_path: ../training/TrainWithMLflowRecipes.py base_parameters: env: ${bundle.target} # git source information of current ML resource deployment. It will be persisted as part of the workflow run @@ -59,7 +59,7 @@ resources: depends_on: - task_key: Train notebook_task: - notebook_path: ../validation/notebooks/ModelValidation.py + notebook_path: ../validation/ModelValidation.py base_parameters: {{- if (eq .input_include_mlflow_recipes `yes`) }} env: ${bundle.target}{{ end }} @@ -105,7 +105,7 @@ resources: depends_on: - task_key: ModelValidation notebook_task: - notebook_path: ../deployment/model_deployment/notebooks/ModelDeployment.py + notebook_path: ../deployment/model_deployment/ModelDeployment.py base_parameters: env: ${bundle.target} # git source information of current ML resource deployment. It will be persisted as part of the workflow run diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/monitoring-resource.yml.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/monitoring-resource.yml.tmpl index c68d64c7..de14e471 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/monitoring-resource.yml.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/monitoring-resource.yml.tmpl @@ -41,7 +41,7 @@ resources: - task_key: monitored_metric_violation_check <<: *new_cluster notebook_task: - notebook_path: ../monitoring/notebooks/MonitoredMetricViolationCheck.py + notebook_path: ../monitoring/MonitoredMetricViolationCheck.py base_parameters: env: ${bundle.target} table_name_under_monitor: {{ .input_inference_table_name }} diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/README.md.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/README.md.tmpl index efc2e449..ed004fec 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/README.md.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/README.md.tmpl @@ -72,7 +72,7 @@ Otherwise, e.g. if iterating on ML code for a new project, follow the steps belo `dbx sync repo --profile {{ .input_project_name }}-dev --source . --dest-repo your-repo-name`, where `your-repo-name` should be the last segment of the full repo name (`/Repos/username/your-repo-name`) #### Running code on Databricks -You can iterate on ML code by running the provided `{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithMLflowRecipes.py` notebook on Databricks using +You can iterate on ML code by running the provided `{{template `project_name_alphanumeric_underscore` .}}/training/TrainWithMLflowRecipes.py` notebook on Databricks using [Repos]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "repos/index.html")) }}). This notebook drives execution of the ML code defined under ``{{template `project_name_alphanumeric_underscore` .}}/training/steps``. You can use multiple browser tabs to edit logic in `steps` and run the training recipe in the `TrainWithMLflowRecipes.py` notebook. diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/Train.py.tmpl similarity index 95% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/Train.py.tmpl index eb909157..3286e277 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/Train.py.tmpl @@ -26,13 +26,7 @@ # COMMAND ---------- -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path - -# COMMAND ---------- - -# MAGIC %pip install -r ../../requirements.txt +# MAGIC %pip install -r ../requirements.txt # COMMAND ---------- diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithFeatureStore.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/TrainWithFeatureStore.py.tmpl similarity index 98% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithFeatureStore.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/TrainWithFeatureStore.py.tmpl index aa2e35f3..d19d3584 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithFeatureStore.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/TrainWithFeatureStore.py.tmpl @@ -26,13 +26,7 @@ # COMMAND ---------- -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path - -# COMMAND ---------- - -# MAGIC %pip install -r ../../requirements.txt +# MAGIC %pip install -r ../requirements.txt # COMMAND ---------- diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithMLflowRecipes.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/TrainWithMLflowRecipes.py.tmpl similarity index 91% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithMLflowRecipes.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/TrainWithMLflowRecipes.py.tmpl index c06cc6c4..b2a9069f 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/TrainWithMLflowRecipes.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/TrainWithMLflowRecipes.py.tmpl @@ -42,13 +42,7 @@ # COMMAND ---------- -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path - -# COMMAND ---------- - -# MAGIC %pip install -r ../../requirements.txt +# MAGIC %pip install -r ../requirements.txt # COMMAND ---------- @@ -56,13 +50,6 @@ dbutils.library.restartPython() # COMMAND ---------- -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path -%cd ../ - -# COMMAND ---------- - from mlflow.recipes import Recipe try: diff --git a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/ModelValidation.py.tmpl similarity index 97% rename from template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl rename to template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/ModelValidation.py.tmpl index c2852616..ed8436be 100644 --- a/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl +++ b/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/ModelValidation.py.tmpl @@ -43,13 +43,7 @@ # COMMAND ---------- -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path - -# COMMAND ---------- - -# MAGIC %pip install -r ../../requirements.txt +# MAGIC %pip install -r ../requirements.txt # COMMAND ---------- @@ -57,13 +51,6 @@ dbutils.library.restartPython() # COMMAND ---------- -import os -notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) -%cd $notebook_path -%cd ../ - -# COMMAND ---------- - {{ if (eq .input_include_mlflow_recipes `yes`) }}dbutils.widgets.dropdown( "env", "prod", ["staging", "prod"], "Environment(for input data)" ){{ end -}}