-
Notifications
You must be signed in to change notification settings - Fork 186
Update keras, tf and new model usage, numpy 2.0 updates #1206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 23 commits
d6b1ec1
9920fab
7592895
a3592fb
3ecaf6b
0a2efd3
fb321a2
2207920
96344db
0458e73
2fe4ddd
c41303e
0615268
f08af16
e5f4041
052d058
3965667
f1046a9
8f1b4e0
fdc671e
34c47fe
57066fb
5de7abe
e1afcf7
0b00aed
8edd1dc
03b4fa1
ffbac1a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| from .. import dp_logging | ||
| from . import labeler_utils | ||
| from .base_model import AutoSubRegistrationMeta, BaseModel, BaseTrainableModel | ||
| from .character_level_cnn_model import ArgMaxLayer | ||
|
|
||
| _file_dir = os.path.dirname(os.path.abspath(__file__)) | ||
|
|
||
|
|
@@ -29,6 +30,8 @@ class CharLoadTFModel(BaseTrainableModel, metaclass=AutoSubRegistrationMeta): | |
|
|
||
| # boolean if the label mapping requires the mapping for index 0 reserved | ||
| requires_zero_mapping = False | ||
| _SOFTMAX_OUTPUT = "softmax_output" | ||
| _ARGMAX_OUTPUT = "argmax_output" | ||
|
|
||
| def __init__( | ||
| self, model_path: str, label_mapping: dict[str, int], parameters: dict = None | ||
|
|
@@ -61,6 +64,35 @@ def __init__( | |
|
|
||
| BaseModel.__init__(self, label_mapping, parameters) | ||
|
|
||
| @classmethod | ||
| def _create_model_outputs( | ||
| cls, softmax_output: tf.Tensor, argmax_output: tf.Tensor | None = None | ||
| ) -> dict[str, tf.Tensor]: | ||
| """Return normalized dict outputs for training and inference.""" | ||
| if argmax_output is None: | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ensure normalized dict based model outputs |
||
| argmax_output = ArgMaxLayer(name=cls._ARGMAX_OUTPUT)(softmax_output) | ||
| return { | ||
| cls._SOFTMAX_OUTPUT: softmax_output, | ||
| cls._ARGMAX_OUTPUT: argmax_output, | ||
| } | ||
|
|
||
| @classmethod | ||
| def _normalize_model_outputs(cls, model: tf.keras.Model) -> tf.keras.Model: | ||
| """Convert list-style outputs to the normalized dict structure.""" | ||
| return labeler_utils.normalize_tf_model_outputs( | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. conversion of previous style for consistency requirement by keras 3 |
||
| model, | ||
| [cls._SOFTMAX_OUTPUT, cls._ARGMAX_OUTPUT], | ||
| lambda softmax_output, extra_outputs: cls._create_model_outputs( | ||
| softmax_output, extra_outputs[0] | ||
| ), | ||
| ) | ||
|
|
||
| def _new_softmax_head_name(self) -> str: | ||
| """Return a layer name unique within the current model graph.""" | ||
| return labeler_utils.get_tf_rebuild_layer_name( | ||
| self._model, f"{self._SOFTMAX_OUTPUT}_rebuild" | ||
| ) | ||
|
|
||
| def __eq__(self, other: object) -> bool: | ||
| """ | ||
| Check if two models are equal with one another. | ||
|
|
@@ -215,15 +247,34 @@ def load_from_disk(cls, dirpath: str) -> CharLoadTFModel: | |
| tf_model = tf.keras.models.load_model(dirpath) | ||
|
|
||
| loaded_model = cls(dirpath, label_mapping, parameters) | ||
| loaded_model._model = tf_model | ||
| loaded_model._model = cls._normalize_model_outputs(tf_model) | ||
|
|
||
| # load self | ||
| loaded_model._model_num_labels = loaded_model.num_labels | ||
| loaded_model._model_default_ind = loaded_model.label_mapping[ | ||
| loaded_model._parameters["default_label"] | ||
| ] | ||
| loaded_model._compile_model(loaded_model.num_labels) | ||
| return loaded_model | ||
|
|
||
| def _compile_model(self, num_labels: int) -> None: | ||
| """Compile the model with dict-based losses and metrics.""" | ||
| losses = { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ensure we utilize dict based solution |
||
| self._SOFTMAX_OUTPUT: "categorical_crossentropy", | ||
| self._ARGMAX_OUTPUT: None, | ||
| } | ||
| f1_score_training = labeler_utils.F1Score( | ||
| num_classes=num_labels, average="micro" | ||
| ) | ||
| metrics = { | ||
| self._SOFTMAX_OUTPUT: [ | ||
| "categorical_crossentropy", | ||
| "acc", | ||
| f1_score_training, | ||
| ] | ||
| } | ||
| self._model.compile(loss=losses, optimizer="adam", metrics=metrics) | ||
|
|
||
| def _construct_model(self) -> None: | ||
| """ | ||
| Model constructor for the data labeler. | ||
|
|
@@ -237,46 +288,28 @@ def _construct_model(self) -> None: | |
| model_loc = self._parameters["model_path"] | ||
|
|
||
| self._model: tf.keras.Model = tf.keras.models.load_model(model_loc) | ||
| self._model = tf.keras.Model(self._model.inputs, self._model.outputs) | ||
| softmax_output_layer_name = self._model.output_names[0] | ||
| self._model = self._normalize_model_outputs(self._model) | ||
| softmax_output = self._model.output[self._SOFTMAX_OUTPUT] | ||
| softmax_layer = softmax_output._keras_history[0] | ||
| softmax_output_layer_name = softmax_layer.name | ||
| softmax_layer_ind = cast( | ||
| int, | ||
| labeler_utils.get_tf_layer_index_from_name( | ||
| self._model, softmax_output_layer_name | ||
| ), | ||
| ) | ||
| softmax_layer = self._model.get_layer(softmax_output_layer_name) | ||
|
|
||
| new_softmax_layer = softmax_layer.output | ||
| new_softmax_layer = softmax_output | ||
| if softmax_layer.weights[0].shape[-1] != num_labels: | ||
| new_softmax_layer = tf.keras.layers.Dense( | ||
| num_labels, activation="softmax", name="softmax_output" | ||
| num_labels, | ||
| activation="softmax", | ||
| name=self._new_softmax_head_name(), | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. allows iteration on layer name due to keras reqs |
||
| )(self._model.layers[softmax_layer_ind - 1].output) | ||
|
|
||
| # Add argmax layer to get labels directly as an output | ||
| argmax_layer = tf.keras.ops.argmax(new_softmax_layer, axis=2) | ||
|
|
||
| argmax_outputs = [new_softmax_layer, argmax_layer] | ||
| self._model = tf.keras.Model(self._model.inputs, argmax_outputs) | ||
| self._model = tf.keras.Model(self._model.inputs, self._model.outputs) | ||
|
|
||
| # Compile the model w/ metrics | ||
| softmax_output_layer_name = self._model.output_names[0] | ||
| losses = {softmax_output_layer_name: "categorical_crossentropy"} | ||
|
|
||
| # use f1 score metric | ||
| f1_score_training = labeler_utils.F1Score( | ||
| num_classes=num_labels, average="micro" | ||
| ) | ||
| metrics = { | ||
| softmax_output_layer_name: [ | ||
| "categorical_crossentropy", | ||
| "acc", | ||
| f1_score_training, | ||
| ] | ||
| } | ||
|
|
||
| self._model.compile(loss=losses, optimizer="adam", metrics=metrics) | ||
| output_dict = self._create_model_outputs(new_softmax_layer) | ||
| self._model = tf.keras.Model(self._model.inputs, output_dict) | ||
| self._compile_model(num_labels) | ||
|
|
||
| self._epoch_id = 0 | ||
| self._model_num_labels = num_labels | ||
|
|
@@ -305,32 +338,14 @@ def _reconstruct_model(self) -> None: | |
| # Add the final Softmax layer to the previous spot | ||
| # self._model.layers[-2] to skip: original softmax | ||
| final_softmax_layer = tf.keras.layers.Dense( | ||
| num_labels, activation="softmax", name="softmax_output" | ||
| num_labels, | ||
| activation="softmax", | ||
| name=self._new_softmax_head_name(), | ||
| )(self._model.layers[-2].output) | ||
|
|
||
| # Add argmax layer to get labels directly as an output | ||
| argmax_layer = tf.keras.ops.argmax(final_softmax_layer, axis=2) | ||
|
|
||
| argmax_outputs = [final_softmax_layer, argmax_layer] | ||
| self._model = tf.keras.Model(self._model.inputs, argmax_outputs) | ||
|
|
||
| # Compile the model | ||
| softmax_output_layer_name = self._model.output_names[0] | ||
| losses = {softmax_output_layer_name: "categorical_crossentropy"} | ||
|
|
||
| # use f1 score metric | ||
| f1_score_training = labeler_utils.F1Score( | ||
| num_classes=num_labels, average="micro" | ||
| ) | ||
| metrics = { | ||
| softmax_output_layer_name: [ | ||
| "categorical_crossentropy", | ||
| "acc", | ||
| f1_score_training, | ||
| ] | ||
| } | ||
|
|
||
| self._model.compile(loss=losses, optimizer="adam", metrics=metrics) | ||
| output_dict = self._create_model_outputs(final_softmax_layer) | ||
| self._model = tf.keras.Model(self._model.inputs, output_dict) | ||
| self._compile_model(num_labels) | ||
|
|
||
| self._epoch_id = 0 | ||
| self._model_num_labels = num_labels | ||
|
|
@@ -381,42 +396,60 @@ def fit( | |
| f1_report: dict = {} | ||
|
|
||
| self._model.reset_metrics() | ||
| softmax_output_layer_name = self._model.output_names[0] | ||
|
|
||
| start_time = time.time() | ||
| batch_id = 0 | ||
| target_output = self._SOFTMAX_OUTPUT | ||
| for x_train, y_train in train_data: | ||
| model_results = self._model.train_on_batch( | ||
| x_train, {softmax_output_layer_name: y_train} | ||
| x_train, | ||
| {target_output: y_train}, | ||
| return_dict=True, | ||
| ) | ||
| acc_value = next( | ||
| (value for key, value in model_results.items() if key.endswith("acc")), | ||
| np.nan, | ||
| ) | ||
| f1_value = next( | ||
| (value for key, value in model_results.items() if "f1" in key.lower()), | ||
| np.nan, | ||
| ) | ||
|
Comment on lines
+413
to
+420
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. due to dict based output |
||
| sys.stdout.flush() | ||
| if verbose: | ||
| sys.stdout.write( | ||
| "\rEPOCH %d, batch_id %d: loss: %f - acc: %f - " | ||
| "f1_score %f" % (self._epoch_id, batch_id, *model_results[1:]) | ||
| "f1_score %f" | ||
| % ( | ||
| self._epoch_id, | ||
| batch_id, | ||
| model_results.get("loss", np.nan), | ||
| acc_value, | ||
| f1_value, | ||
| ) | ||
| ) | ||
| batch_id += 1 | ||
|
|
||
| for i, metric_label in enumerate(self._model.metrics_names): | ||
| history[metric_label] = model_results[i] | ||
| history.update(model_results) | ||
|
|
||
| if val_data: | ||
| f1, f1_report = self._validate_training(val_data) # type: ignore | ||
| history["f1_report"] = f1_report | ||
|
|
||
| val_f1 = f1_report["weighted avg"]["f1-score"] if f1_report else np.NAN | ||
| val_f1 = f1_report["weighted avg"]["f1-score"] if f1_report else np.nan | ||
| val_precision = ( | ||
| f1_report["weighted avg"]["precision"] if f1_report else np.NAN | ||
| f1_report["weighted avg"]["precision"] if f1_report else np.nan | ||
| ) | ||
| val_recall = f1_report["weighted avg"]["recall"] if f1_report else np.NAN | ||
| val_recall = f1_report["weighted avg"]["recall"] if f1_report else np.nan | ||
| epoch_time = time.time() - start_time | ||
| logger.info( | ||
| "\rEPOCH %d (%ds), loss: %f - acc: %f - f1_score %f -- " | ||
| "val_f1: %f - val_precision: %f - val_recall %f" | ||
| % ( | ||
| self._epoch_id, | ||
| epoch_time, | ||
| *model_results[1:], | ||
| model_results.get("loss", np.nan), | ||
| acc_value, | ||
| f1_value, | ||
| val_f1, | ||
| val_precision, | ||
| val_recall, | ||
|
|
@@ -463,7 +496,7 @@ def _validate_training( | |
| y_val_pred.append( | ||
| self._model.predict( | ||
| x_val, batch_size=batch_size_test, verbose=verbose_keras | ||
| )[1] | ||
| )[self._ARGMAX_OUTPUT] | ||
| ) | ||
| y_val_test.append(np.argmax(y_val, axis=-1)) | ||
| batch_id += 1 | ||
|
|
@@ -536,10 +569,10 @@ def predict( | |
| if show_confidences: | ||
| confidences[ | ||
| allocation_index : allocation_index + num_samples_in_batch | ||
| ] = model_output[0].numpy() | ||
| ] = model_output[self._SOFTMAX_OUTPUT].numpy() | ||
| predictions[ | ||
| allocation_index : allocation_index + num_samples_in_batch | ||
| ] = model_output[1].numpy() | ||
| ] = model_output[self._ARGMAX_OUTPUT].numpy() | ||
|
|
||
| allocation_index += num_samples_in_batch | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
normalize layer names