From d6b1ec129489c00681de65145d15991c1818e568 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:21:04 -0500
Subject: [PATCH 01/28] refactor: move from deprecated pkg_resources

---
 dataprofiler/labelers/base_data_labeler.py                     | 3 +++
 dataprofiler/labelers/data_labelers.py                         | 2 ++
 dataprofiler/labelers/data_processing.py                       | 2 ++
 dataprofiler/tests/labelers/test_char_tf_load_model.py         | 2 ++
 dataprofiler/tests/labelers/test_character_level_cnn_model.py  | 2 ++
 dataprofiler/tests/labelers/test_column_name_model.py          | 2 ++
 dataprofiler/tests/labelers/test_data_processing.py            | 2 ++
 .../labelers/test_integration_column_name_data_labeler.py      | 2 ++
 .../tests/labelers/test_integration_regex_data_labeler.py      | 2 ++
 dataprofiler/tests/labelers/test_regex_model.py                | 2 ++
 10 files changed, 21 insertions(+)

diff --git a/dataprofiler/labelers/base_data_labeler.py b/dataprofiler/labelers/base_data_labeler.py
index f9a4a0ab..43585e26 100644
--- a/dataprofiler/labelers/base_data_labeler.py
+++ b/dataprofiler/labelers/base_data_labeler.py
@@ -2,10 +2,12 @@
 
 from __future__ import annotations
 
+import importlib.resources
 import json
 import os
 import sys
 import warnings
+from pathlib import Path
 from typing import cast
 
 import numpy as np
@@ -17,6 +19,7 @@
 from . import data_processing, utils
 from .base_model import BaseModel
 
+
 default_labeler_dir = utils.find_resources_dir("labelers")
 
 
diff --git a/dataprofiler/labelers/data_labelers.py b/dataprofiler/labelers/data_labelers.py
index 961b45e6..5dda6e62 100644
--- a/dataprofiler/labelers/data_labelers.py
+++ b/dataprofiler/labelers/data_labelers.py
@@ -2,7 +2,9 @@
 
 from __future__ import annotations
 
+import importlib.resources
 import os
+from pathlib import Path
 
 import pandas as pd
 
diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py
index ba17a3bd..107a7872 100644
--- a/dataprofiler/labelers/data_processing.py
+++ b/dataprofiler/labelers/data_processing.py
@@ -4,6 +4,7 @@
 
 import abc
 import copy
+import importlib
 import inspect
 import json
 import math
@@ -12,6 +13,7 @@
 import types
 import warnings
 from collections import Counter
+from pathlib import Path
 from typing import Any, Generator, Iterable, TypeVar, cast
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_char_tf_load_model.py b/dataprofiler/tests/labelers/test_char_tf_load_model.py
index 40879e57..ec3c3058 100644
--- a/dataprofiler/tests/labelers/test_char_tf_load_model.py
+++ b/dataprofiler/tests/labelers/test_char_tf_load_model.py
@@ -1,7 +1,9 @@
+import importlib.resources
 import json
 import os
 import unittest
 from io import StringIO
+from pathlib import Path
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
index cbc35b13..32f6ac05 100644
--- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py
+++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
@@ -1,7 +1,9 @@
+import importlib
 import json
 import os
 import unittest
 from io import StringIO
+from pathlib import Path
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_column_name_model.py b/dataprofiler/tests/labelers/test_column_name_model.py
index dfd4274e..97c81c95 100644
--- a/dataprofiler/tests/labelers/test_column_name_model.py
+++ b/dataprofiler/tests/labelers/test_column_name_model.py
@@ -1,8 +1,10 @@
+import importlib
 import json
 import os
 import sys
 import unittest
 from io import StringIO
+from pathlib import Path
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_data_processing.py b/dataprofiler/tests/labelers/test_data_processing.py
index 7624ccca..7db9d900 100644
--- a/dataprofiler/tests/labelers/test_data_processing.py
+++ b/dataprofiler/tests/labelers/test_data_processing.py
@@ -1,9 +1,11 @@
+import importlib
 import json
 import os
 import random
 import re
 import unittest
 from io import StringIO
+from pathlib import Path
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py
index 8b19731f..05524e18 100644
--- a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py
+++ b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py
@@ -1,4 +1,6 @@
+import importlib
 import unittest
+from pathlib import Path
 
 import numpy as np
 
diff --git a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py
index df72b99e..37aaae33 100644
--- a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py
+++ b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py
@@ -1,5 +1,7 @@
+import importlib
 import os
 import unittest
+from pathlib import Path
 
 import numpy as np
 
diff --git a/dataprofiler/tests/labelers/test_regex_model.py b/dataprofiler/tests/labelers/test_regex_model.py
index 66ac6448..7ff7481d 100644
--- a/dataprofiler/tests/labelers/test_regex_model.py
+++ b/dataprofiler/tests/labelers/test_regex_model.py
@@ -1,7 +1,9 @@
+import importlib
 import json
 import os
 import unittest
 from io import StringIO
+from pathlib import Path
 from unittest import mock
 
 import numpy as np

From 9920fabc14fe184d2d9350a1ad7af7031365c38b Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:22:37 -0500
Subject: [PATCH 02/28] fix: to use func

---
 dataprofiler/labelers/base_data_labeler.py                   | 2 --
 dataprofiler/labelers/data_labelers.py                       | 2 --
 dataprofiler/labelers/utils.py                               | 1 +
 dataprofiler/tests/labelers/test_char_tf_load_model.py       | 2 --
 .../tests/labelers/test_character_level_cnn_model.py         | 2 --
 dataprofiler/tests/labelers/test_column_name_model.py        | 2 --
 dataprofiler/tests/labelers/test_data_processing.py          | 5 +++--
 .../labelers/test_integration_column_name_data_labeler.py    | 2 --
 .../tests/labelers/test_integration_regex_data_labeler.py    | 2 --
 dataprofiler/tests/labelers/test_regex_model.py              | 2 --
 10 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/dataprofiler/labelers/base_data_labeler.py b/dataprofiler/labelers/base_data_labeler.py
index 43585e26..b2038b43 100644
--- a/dataprofiler/labelers/base_data_labeler.py
+++ b/dataprofiler/labelers/base_data_labeler.py
@@ -2,12 +2,10 @@
 
 from __future__ import annotations
 
-import importlib.resources
 import json
 import os
 import sys
 import warnings
-from pathlib import Path
 from typing import cast
 
 import numpy as np
diff --git a/dataprofiler/labelers/data_labelers.py b/dataprofiler/labelers/data_labelers.py
index 5dda6e62..961b45e6 100644
--- a/dataprofiler/labelers/data_labelers.py
+++ b/dataprofiler/labelers/data_labelers.py
@@ -2,9 +2,7 @@
 
 from __future__ import annotations
 
-import importlib.resources
 import os
-from pathlib import Path
 
 import pandas as pd
 
diff --git a/dataprofiler/labelers/utils.py b/dataprofiler/labelers/utils.py
index 99553869..d393fa6a 100644
--- a/dataprofiler/labelers/utils.py
+++ b/dataprofiler/labelers/utils.py
@@ -2,6 +2,7 @@
 
 import importlib.resources
 import sys
+import sysconfig
 import warnings
 from pathlib import Path
 from typing import Any, Callable, List
diff --git a/dataprofiler/tests/labelers/test_char_tf_load_model.py b/dataprofiler/tests/labelers/test_char_tf_load_model.py
index ec3c3058..40879e57 100644
--- a/dataprofiler/tests/labelers/test_char_tf_load_model.py
+++ b/dataprofiler/tests/labelers/test_char_tf_load_model.py
@@ -1,9 +1,7 @@
-import importlib.resources
 import json
 import os
 import unittest
 from io import StringIO
-from pathlib import Path
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
index 32f6ac05..cbc35b13 100644
--- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py
+++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
@@ -1,9 +1,7 @@
-import importlib
 import json
 import os
 import unittest
 from io import StringIO
-from pathlib import Path
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_column_name_model.py b/dataprofiler/tests/labelers/test_column_name_model.py
index 97c81c95..dfd4274e 100644
--- a/dataprofiler/tests/labelers/test_column_name_model.py
+++ b/dataprofiler/tests/labelers/test_column_name_model.py
@@ -1,10 +1,8 @@
-import importlib
 import json
 import os
 import sys
 import unittest
 from io import StringIO
-from pathlib import Path
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_data_processing.py b/dataprofiler/tests/labelers/test_data_processing.py
index 7db9d900..5ec15594 100644
--- a/dataprofiler/tests/labelers/test_data_processing.py
+++ b/dataprofiler/tests/labelers/test_data_processing.py
@@ -1,11 +1,12 @@
-import importlib
+pass
 import json
 import os
 import random
 import re
 import unittest
 from io import StringIO
-from pathlib import Path
+
+pass
 from unittest import mock
 
 import numpy as np
diff --git a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py
index 05524e18..8b19731f 100644
--- a/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py
+++ b/dataprofiler/tests/labelers/test_integration_column_name_data_labeler.py
@@ -1,6 +1,4 @@
-import importlib
 import unittest
-from pathlib import Path
 
 import numpy as np
 
diff --git a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py
index 37aaae33..df72b99e 100644
--- a/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py
+++ b/dataprofiler/tests/labelers/test_integration_regex_data_labeler.py
@@ -1,7 +1,5 @@
-import importlib
 import os
 import unittest
-from pathlib import Path
 
 import numpy as np
 
diff --git a/dataprofiler/tests/labelers/test_regex_model.py b/dataprofiler/tests/labelers/test_regex_model.py
index 7ff7481d..66ac6448 100644
--- a/dataprofiler/tests/labelers/test_regex_model.py
+++ b/dataprofiler/tests/labelers/test_regex_model.py
@@ -1,9 +1,7 @@
-import importlib
 import json
 import os
 import unittest
 from io import StringIO
-from pathlib import Path
 from unittest import mock
 
 import numpy as np

From 7592895357dd4bab54dac9917cd4b4c9b809e9c4 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:22:43 -0500
Subject: [PATCH 03/28] fix: add missing change

---
 dataprofiler/labelers/data_processing.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py
index 107a7872..ba17a3bd 100644
--- a/dataprofiler/labelers/data_processing.py
+++ b/dataprofiler/labelers/data_processing.py
@@ -4,7 +4,6 @@
 
 import abc
 import copy
-import importlib
 import inspect
 import json
 import math
@@ -13,7 +12,6 @@
 import types
 import warnings
 from collections import Counter
-from pathlib import Path
 from typing import Any, Generator, Iterable, TypeVar, cast
 
 import numpy as np

From a3592fb2b351899aa4a30af9be08775e11088d65 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:23:21 -0500
Subject: [PATCH 04/28] refactor: resources to be in package

---
 dataprofiler/labelers/utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dataprofiler/labelers/utils.py b/dataprofiler/labelers/utils.py
index d393fa6a..99553869 100644
--- a/dataprofiler/labelers/utils.py
+++ b/dataprofiler/labelers/utils.py
@@ -2,7 +2,6 @@
 
 import importlib.resources
 import sys
-import sysconfig
 import warnings
 from pathlib import Path
 from typing import Any, Callable, List

From 3ecaf6bc1cd0a05145d1e9a4b9a5d3b91ff635a3 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:24:42 -0500
Subject: [PATCH 05/28] fix: tests bc of almost

---
 dataprofiler/tests/profilers/test_float_column_profile.py | 5 +++++
 dataprofiler/tests/profilers/test_int_column_profile.py   | 5 +++++
 dataprofiler/tests/profilers/test_text_column_profile.py  | 5 +++++
 3 files changed, 15 insertions(+)

diff --git a/dataprofiler/tests/profilers/test_float_column_profile.py b/dataprofiler/tests/profilers/test_float_column_profile.py
index d9ec122c..6061302c 100644
--- a/dataprofiler/tests/profilers/test_float_column_profile.py
+++ b/dataprofiler/tests/profilers/test_float_column_profile.py
@@ -1777,6 +1777,11 @@ def test_diff(self):
             profile_diff.pop("median_absolute_deviation"),
             places=2,
         )
+        self.assertAlmostEqual(
+            expected_diff.get("t-test").get("welch").pop("p-value"),
+            profile_diff.get("t-test").get("welch").pop("p-value"),
+            places=10,
+        )
         self.assertDictEqual(expected_diff, profile_diff)
 
         # Assert type error is properly called
diff --git a/dataprofiler/tests/profilers/test_int_column_profile.py b/dataprofiler/tests/profilers/test_int_column_profile.py
index 960e5318..bb5b136a 100644
--- a/dataprofiler/tests/profilers/test_int_column_profile.py
+++ b/dataprofiler/tests/profilers/test_int_column_profile.py
@@ -1097,6 +1097,11 @@ def test_diff(self):
             profile_diff.pop("median_absolute_deviation"),
             places=2,
         )
+        self.assertAlmostEqual(
+            expected_diff.get("t-test").get("welch").pop("p-value"),
+            profile_diff.get("t-test").get("welch").pop("p-value"),
+            places=10,
+        )
         self.assertDictEqual(expected_diff, profile_diff)
 
         # Assert type error is properly called
diff --git a/dataprofiler/tests/profilers/test_text_column_profile.py b/dataprofiler/tests/profilers/test_text_column_profile.py
index 699e35cb..092426bf 100644
--- a/dataprofiler/tests/profilers/test_text_column_profile.py
+++ b/dataprofiler/tests/profilers/test_text_column_profile.py
@@ -662,6 +662,11 @@ def test_diff(self):
             profile_diff.pop("median_absolute_deviation"),
             places=2,
         )
+        self.assertAlmostEqual(
+            expected_diff.get("t-test").get("welch").pop("p-value"),
+            profile_diff.get("t-test").get("welch").pop("p-value"),
+            places=10,
+        )
         self.assertDictEqual(expected_diff, profile_diff)
 
     @mock.patch("time.time", return_value=0.0)

From 0a2efd3f791b53eabd3f677bc748b303e69ed4f4 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:24:52 -0500
Subject: [PATCH 06/28] feat: refactor to pass in a path or string or None

---
 dataprofiler/labelers/base_data_labeler.py | 1 -
 dataprofiler/labelers/utils.py             | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataprofiler/labelers/base_data_labeler.py b/dataprofiler/labelers/base_data_labeler.py
index b2038b43..f9a4a0ab 100644
--- a/dataprofiler/labelers/base_data_labeler.py
+++ b/dataprofiler/labelers/base_data_labeler.py
@@ -17,7 +17,6 @@
 from . import data_processing, utils
 from .base_model import BaseModel
 
-
 default_labeler_dir = utils.find_resources_dir("labelers")
 
 
diff --git a/dataprofiler/labelers/utils.py b/dataprofiler/labelers/utils.py
index 99553869..e10d138f 100644
--- a/dataprofiler/labelers/utils.py
+++ b/dataprofiler/labelers/utils.py
@@ -3,6 +3,7 @@
 import importlib.resources
 import sys
 import warnings
+from importlib.resources.abc import Traversable
 from pathlib import Path
 from typing import Any, Callable, List
 

From fb321a26ece1425acc81da9f678af4e1bce3ee0b Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:24:52 -0500
Subject: [PATCH 07/28] fix: import for older versions

---
 dataprofiler/labelers/utils.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/dataprofiler/labelers/utils.py b/dataprofiler/labelers/utils.py
index e10d138f..02a9744f 100644
--- a/dataprofiler/labelers/utils.py
+++ b/dataprofiler/labelers/utils.py
@@ -3,9 +3,16 @@
 import importlib.resources
 import sys
 import warnings
-from importlib.resources.abc import Traversable
 from pathlib import Path
-from typing import Any, Callable, List
+from typing import TYPE_CHECKING, Any, Callable, List
+
+if TYPE_CHECKING:
+    try:
+        # Newer Pythons / newer typeshed
+        from importlib.resources.abc import Traversable
+    except ModuleNotFoundError:
+        # Older Pythons
+        from importlib.abc import Traversable
 
 try:
     # Newer Pythons / newer typeshed

From 2207920a5827d214d2251e7653796a508714a0a0 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:24:52 -0500
Subject: [PATCH 08/28] fix: Tranversable must be done at runtime

---
 dataprofiler/labelers/utils.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/dataprofiler/labelers/utils.py b/dataprofiler/labelers/utils.py
index 02a9744f..9f39d21b 100644
--- a/dataprofiler/labelers/utils.py
+++ b/dataprofiler/labelers/utils.py
@@ -4,15 +4,14 @@
 import sys
 import warnings
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, List
-
-if TYPE_CHECKING:
-    try:
-        # Newer Pythons / newer typeshed
-        from importlib.resources.abc import Traversable
-    except ModuleNotFoundError:
-        # Older Pythons
-        from importlib.abc import Traversable
+from typing import Any, Callable, List
+
+try:
+    # Newer Pythons / newer typeshed
+    from importlib.resources.abc import Traversable
+except ModuleNotFoundError:
+    # Older Pythons
+    from importlib.abc import Traversable
 
 try:
     # Newer Pythons / newer typeshed

From 96344db2be1b302fccbbc5077b30370df4b60d93 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:24:52 -0500
Subject: [PATCH 09/28] refactor: keras reqs and others

---
 requirements-dev.txt  | 2 +-
 requirements-ml.txt   | 7 +++----
 requirements-test.txt | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 8c7c7868..163dae50 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,7 +1,7 @@
 check-manifest>=0.50
 black>=24.3.0
 isort==5.12.0
-pre-commit==2.19.0
+pre-commit==4.3.0
 tox==3.25.1
 tox-conda==0.10.2
 types-setuptools==67.7.0.1
diff --git a/requirements-ml.txt b/requirements-ml.txt
index 31f9ca63..4abb91c4 100644
--- a/requirements-ml.txt
+++ b/requirements-ml.txt
@@ -1,7 +1,6 @@
 scikit-learn>=0.23.2
-keras<=3.4.0
+keras<=3.11.0
 rapidfuzz>=2.6.1
-tensorflow>=2.16.0; sys.platform != 'darwin'
-tensorflow>=2.16.0; sys_platform == 'darwin' and platform_machine != 'arm64'
-tensorflow-macos>=2.16.0; sys_platform == 'darwin' and platform_machine == 'arm64'
+tensorflow>=2.16.0
+tensorflow-metal; sys_platform == 'darwin' and platform_machine == 'arm64'
 tqdm>=4.0.0
diff --git a/requirements-test.txt b/requirements-test.txt
index 725b2384..cf127b60 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -6,4 +6,4 @@ pytest-cov>=2.8.1
 pytest-xdist>=2.1.0
 pytest-forked>=1.3.0
 toolz>=0.10.0
-memray>=1.7.0,<1.12.0
+memray>=1.18.0

From 0458e73f65b3665bec3efbf6889d83cc5605a333 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:24:52 -0500
Subject: [PATCH 10/28] refactor: losses for keras and tests

---
 .pre-commit-config.yaml                       | 12 ++--
 dataprofiler/labelers/base_model.py           |  2 +-
 dataprofiler/labelers/char_load_tf_model.py   |  5 +-
 .../labelers/character_level_cnn_model.py     | 66 ++++++++-----------
 dataprofiler/labelers/data_processing.py      |  4 +-
 .../test_character_level_cnn_model.py         |  2 +-
 .../tests/labelers/test_data_labelers.py      |  2 +-
 requirements-ml.txt                           |  1 -
 8 files changed, 38 insertions(+), 56 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1cd76047..902e256f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
   # Flake8: complexity and style checking
   # https://flake8.pycqa.org/en/latest/user/using-hooks.html
   - repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
+    rev: 7.3.0
     hooks:
       - id: flake8
         additional_dependencies: [flake8-docstrings]
@@ -58,7 +58,7 @@ repos:
             'chardet>=3.0.4,<7.0.0',
             fastavro>=1.0.0.post1,
             python-snappy>=0.7.1,
-            charset-normalizer>=1.3.6,
+            'charset-normalizer>=1.3.6,<7.0.0',
             psutil>=4.0.0,
             scipy>=1.4.1,
             requests>=2.28.1,
@@ -82,11 +82,9 @@ repos:
 
             # requirements-ml.txt
             scikit-learn>=0.23.2,
-            'keras>=2.4.3,<=3.4.0',
+            'keras>=3.11.0',
             rapidfuzz>=2.6.1,
-            "tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin'",
-            "tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64'",
-            "tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
+            "tensorflow>=2.16.0",
             tqdm>=4.0.0,
 
             # requirements-reports.txt
@@ -101,7 +99,7 @@ repos:
             pytest-xdist>=2.1.0,
             pytest-forked>=1.3.0,
             toolz>=0.10.0,
-            'memray>=1.7.0,<1.12.0',
+            'memray>=1.18.0',
           ]
   # Check-manifest: ensures required non-Python files are included in MANIFEST.in
   # https://github.com/mgedmin/check-manifest/blob/master/.pre-commit-hooks.yaml
diff --git a/dataprofiler/labelers/base_model.py b/dataprofiler/labelers/base_model.py
index 032c2ea3..08b453ec 100644
--- a/dataprofiler/labelers/base_model.py
+++ b/dataprofiler/labelers/base_model.py
@@ -78,7 +78,7 @@ def __eq__(self, other: object) -> bool:
         :rtype: bool
         """
         if (
-            type(self) != type(other)
+            type(self) is not type(other)
             or not isinstance(other, BaseModel)
             or self._parameters != other._parameters
             or self._label_mapping != other._label_mapping
diff --git a/dataprofiler/labelers/char_load_tf_model.py b/dataprofiler/labelers/char_load_tf_model.py
index a4a44e03..ecd04f6e 100644
--- a/dataprofiler/labelers/char_load_tf_model.py
+++ b/dataprofiler/labelers/char_load_tf_model.py
@@ -262,8 +262,7 @@ def _construct_model(self) -> None:
 
         # Compile the model w/ metrics
         softmax_output_layer_name = self._model.output_names[0]
-        losses = {softmax_output_layer_name: "categorical_crossentropy"}
-
+        losses = ["categorical_crossentropy", None, None]
         # use f1 score metric
         f1_score_training = labeler_utils.F1Score(
             num_classes=num_labels, average="micro"
@@ -316,7 +315,7 @@ def _reconstruct_model(self) -> None:
 
         # Compile the model
         softmax_output_layer_name = self._model.output_names[0]
-        losses = {softmax_output_layer_name: "categorical_crossentropy"}
+        losses = ["categorical_crossentropy", None, None]
 
         # use f1 score metric
         f1_score_training = labeler_utils.F1Score(
diff --git a/dataprofiler/labelers/character_level_cnn_model.py b/dataprofiler/labelers/character_level_cnn_model.py
index 2cbb7051..f732c8e0 100644
--- a/dataprofiler/labelers/character_level_cnn_model.py
+++ b/dataprofiler/labelers/character_level_cnn_model.py
@@ -450,6 +450,7 @@ def load_from_disk(cls, dirpath: str) -> CharacterLevelCnnModel:
         loaded_model._model_default_ind = loaded_model.label_mapping[
             loaded_model._parameters["default_label"]
         ]
+        loaded_model._compile_loss(loaded_model._model, loaded_model.num_labels)
         return loaded_model
 
     @staticmethod
@@ -475,6 +476,28 @@ def _argmax_threshold_layer(
         # matrix.
         return ThreshArgMaxLayer(threshold, num_labels, default_ind)
 
+    @staticmethod
+    def _compile_loss(model: tf.keras.Model, num_labels: int) -> None:
+        """Compiles the loss for the given model and number of labels."""
+        # Compile the model
+        softmax_output_layer_name = model.output_names[0]
+        # losses = {softmax_output_layer_name: "categorical_crossentropy"}
+        losses = ["categorical_crossentropy", None, None]
+
+        # use f1 score metric
+        f1_score_training = labeler_utils.F1Score(
+            num_classes=num_labels, average="micro"
+        )
+        metrics = {
+            softmax_output_layer_name: [
+                "categorical_crossentropy",
+                "acc",
+                f1_score_training,
+            ]
+        }
+
+        model.compile(loss=losses, optimizer="adam", metrics=metrics)
+
     def _construct_model(self) -> None:
         """
         Construct model for the data labeler.
@@ -570,24 +593,7 @@ def _construct_model(self) -> None:
             final_predicted_layer(argmax_layer, self._model.outputs[0]),
         ]
         self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
-
-        # Compile the model
-        softmax_output_layer_name = self._model.output_names[0]
-        losses = {softmax_output_layer_name: "categorical_crossentropy"}
-
-        # use f1 score metric
-        f1_score_training = labeler_utils.F1Score(
-            num_classes=num_labels, average="micro"
-        )
-        metrics = {
-            softmax_output_layer_name: [
-                "categorical_crossentropy",
-                "acc",
-                f1_score_training,
-            ]
-        }
-
-        self._model.compile(loss=losses, optimizer="adam", metrics=metrics)
+        self._compile_loss(self._model, num_labels)
 
         self._epoch_id = 0
         self._model_num_labels = num_labels
@@ -632,24 +638,7 @@ def _reconstruct_model(self) -> None:
             final_predicted_layer(argmax_layer, final_softmax_layer),
         ]
         self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
-
-        # Compile the model
-        softmax_output_layer_name = self._model.output_names[0]
-        losses = {softmax_output_layer_name: "categorical_crossentropy"}
-
-        # use f1 score metric
-        f1_score_training = labeler_utils.F1Score(
-            num_classes=num_labels, average="micro"
-        )
-        metrics = {
-            softmax_output_layer_name: [
-                "categorical_crossentropy",
-                "acc",
-                f1_score_training,
-            ]
-        }
-
-        self._model.compile(loss=losses, optimizer="adam", metrics=metrics)
+        self._compile_loss(self._model, num_labels)
         self._epoch_id = 0
         self._model_num_labels = num_labels
         self._model_default_ind = default_ind
@@ -699,14 +688,11 @@ def fit(
         f1_report: dict = {}
 
         self._model.reset_metrics()
-        softmax_output_layer_name = self._model.output_names[0]
 
         start_time = time.time()
         batch_id = 0
         for x_train, y_train in train_data:
-            model_results = self._model.train_on_batch(
-                x_train, {softmax_output_layer_name: y_train}
-            )
+            model_results = self._model.train_on_batch(x_train, y_train)
             sys.stdout.flush()
             if verbose:
                 sys.stdout.write(
diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py
index ba17a3bd..2ee1cb81 100644
--- a/dataprofiler/labelers/data_processing.py
+++ b/dataprofiler/labelers/data_processing.py
@@ -73,7 +73,7 @@ def __eq__(self, other: object) -> bool:
         :rtype: bool
         """
         if (
-            type(self) != type(other)
+            type(self) is not type(other)
             or not isinstance(other, BaseDataProcessor)
             or self._parameters != other._parameters
         ):
@@ -1589,7 +1589,7 @@ def __eq__(self, other: object) -> bool:
         :rtype: bool
         """
         if (
-            type(self) != type(other)
+            type(self) is not type(other)
             or not isinstance(other, StructCharPostprocessor)
             or self._parameters["default_label"] != other._parameters["default_label"]
             or self._parameters["pad_label"] != other._parameters["pad_label"]
diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
index cbc35b13..79d1b3f7 100644
--- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py
+++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
@@ -430,7 +430,7 @@ def test_save(self, mock_open, *mocks):
         StringIO.close(mock_file)
 
     @mock.patch("tensorflow.keras.Model.save", return_value=None)
-    @mock.patch("tensorflow.keras.models.load_model", return_value=mock.Mock())
+    @mock.patch("tensorflow.keras.models.load_model", return_value=mock.MagicMock())
     @mock.patch("builtins.open", side_effect=mock_open)
     def test_load(self, *mocks):
         dir = os.path.join(_resource_labeler_dir, "unstructured_model/")
diff --git a/dataprofiler/tests/labelers/test_data_labelers.py b/dataprofiler/tests/labelers/test_data_labelers.py
index b0cd4c7e..e7ef0383 100644
--- a/dataprofiler/tests/labelers/test_data_labelers.py
+++ b/dataprofiler/tests/labelers/test_data_labelers.py
@@ -399,7 +399,7 @@ def test_has_public_functions(self, *args):
 
     @staticmethod
     def _setup_mock_load_model(mock_load_model):
-        mock_load_model.return_value = mock.Mock()
+        mock_load_model.return_value = mock.MagicMock()
 
     def test_load_labeler(self, mock_open, mock_load_model):
 
diff --git a/requirements-ml.txt b/requirements-ml.txt
index 4abb91c4..c403d5b1 100644
--- a/requirements-ml.txt
+++ b/requirements-ml.txt
@@ -2,5 +2,4 @@ scikit-learn>=0.23.2
 keras<=3.11.0
 rapidfuzz>=2.6.1
 tensorflow>=2.16.0
-tensorflow-metal; sys_platform == 'darwin' and platform_machine == 'arm64'
 tqdm>=4.0.0

From 2fe4dddedaca89ed376c262513af1e7892611291 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:24:52 -0500
Subject: [PATCH 11/28] fix: remove unneeded global

---
 dataprofiler/plugins/decorators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataprofiler/plugins/decorators.py b/dataprofiler/plugins/decorators.py
index c781f430..f099c1aa 100644
--- a/dataprofiler/plugins/decorators.py
+++ b/dataprofiler/plugins/decorators.py
@@ -1,4 +1,5 @@
 """Contains function for generating plugins data."""
+
 from collections import defaultdict
 from typing import Any, DefaultDict, Dict
 
@@ -21,7 +22,6 @@ def __inner_factory_function(fn):
         :param fn: Plugin function
         :return: function
         """
-        global plugins_dict
         plugins_dict[typ][name] = fn
         return fn
 

From c41303e98dc92fc15e43d27917bf5608b904cd87 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:29:50 -0500
Subject: [PATCH 12/28] fix: accidentally duplicated test on rebase

---
 dataprofiler/tests/profilers/test_float_column_profile.py | 5 -----
 dataprofiler/tests/profilers/test_int_column_profile.py   | 5 -----
 dataprofiler/tests/profilers/test_text_column_profile.py  | 5 -----
 3 files changed, 15 deletions(-)

diff --git a/dataprofiler/tests/profilers/test_float_column_profile.py b/dataprofiler/tests/profilers/test_float_column_profile.py
index 6061302c..d9ec122c 100644
--- a/dataprofiler/tests/profilers/test_float_column_profile.py
+++ b/dataprofiler/tests/profilers/test_float_column_profile.py
@@ -1777,11 +1777,6 @@ def test_diff(self):
             profile_diff.pop("median_absolute_deviation"),
             places=2,
         )
-        self.assertAlmostEqual(
-            expected_diff.get("t-test").get("welch").pop("p-value"),
-            profile_diff.get("t-test").get("welch").pop("p-value"),
-            places=10,
-        )
         self.assertDictEqual(expected_diff, profile_diff)
 
         # Assert type error is properly called
diff --git a/dataprofiler/tests/profilers/test_int_column_profile.py b/dataprofiler/tests/profilers/test_int_column_profile.py
index bb5b136a..960e5318 100644
--- a/dataprofiler/tests/profilers/test_int_column_profile.py
+++ b/dataprofiler/tests/profilers/test_int_column_profile.py
@@ -1097,11 +1097,6 @@ def test_diff(self):
             profile_diff.pop("median_absolute_deviation"),
             places=2,
         )
-        self.assertAlmostEqual(
-            expected_diff.get("t-test").get("welch").pop("p-value"),
-            profile_diff.get("t-test").get("welch").pop("p-value"),
-            places=10,
-        )
         self.assertDictEqual(expected_diff, profile_diff)
 
         # Assert type error is properly called
diff --git a/dataprofiler/tests/profilers/test_text_column_profile.py b/dataprofiler/tests/profilers/test_text_column_profile.py
index 092426bf..699e35cb 100644
--- a/dataprofiler/tests/profilers/test_text_column_profile.py
+++ b/dataprofiler/tests/profilers/test_text_column_profile.py
@@ -662,11 +662,6 @@ def test_diff(self):
             profile_diff.pop("median_absolute_deviation"),
             places=2,
         )
-        self.assertAlmostEqual(
-            expected_diff.get("t-test").get("welch").pop("p-value"),
-            profile_diff.get("t-test").get("welch").pop("p-value"),
-            places=10,
-        )
         self.assertDictEqual(expected_diff, profile_diff)
 
     @mock.patch("time.time", return_value=0.0)

From 06152683b4f14d2f851af0ff9a184321c59865cf Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:36:50 -0500
Subject: [PATCH 13/28] fix: rebase duplicates

---
 dataprofiler/labelers/utils.py                      | 7 -------
 dataprofiler/tests/labelers/test_data_processing.py | 3 ---
 2 files changed, 10 deletions(-)

diff --git a/dataprofiler/labelers/utils.py b/dataprofiler/labelers/utils.py
index 9f39d21b..99553869 100644
--- a/dataprofiler/labelers/utils.py
+++ b/dataprofiler/labelers/utils.py
@@ -13,13 +13,6 @@
     # Older Pythons
     from importlib.abc import Traversable
 
-try:
-    # Newer Pythons / newer typeshed
-    from importlib.resources.abc import Traversable
-except ModuleNotFoundError:
-    # Older Pythons
-    from importlib.abc import Traversable
-
 
 def warn_missing_module(labeler_function: str, module_name: str) -> None:
     """
diff --git a/dataprofiler/tests/labelers/test_data_processing.py b/dataprofiler/tests/labelers/test_data_processing.py
index 5ec15594..7624ccca 100644
--- a/dataprofiler/tests/labelers/test_data_processing.py
+++ b/dataprofiler/tests/labelers/test_data_processing.py
@@ -1,12 +1,9 @@
-pass
 import json
 import os
 import random
 import re
 import unittest
 from io import StringIO
-
-pass
 from unittest import mock
 
 import numpy as np

From f08af16c9c76d566dee45c4759b8a2cc38133aa3 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 14:46:00 -0500
Subject: [PATCH 14/28] fix: keras reqs

---
 requirements-ml.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-ml.txt b/requirements-ml.txt
index c403d5b1..b3005a5c 100644
--- a/requirements-ml.txt
+++ b/requirements-ml.txt
@@ -1,5 +1,5 @@
 scikit-learn>=0.23.2
-keras<=3.11.0
+keras>=3.11.0
 rapidfuzz>=2.6.1
 tensorflow>=2.16.0
 tqdm>=4.0.0

From e5f404141570b339f3caa84079f289cc251bb593 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Wed, 6 May 2026 15:01:01 -0500
Subject: [PATCH 15/28] refactor: update to be more than 3.4.0 for keras

---
 .pre-commit-config.yaml | 2 +-
 requirements-ml.txt     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 902e256f..4dcd82ce 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -82,7 +82,7 @@ repos:
 
             # requirements-ml.txt
             scikit-learn>=0.23.2,
-            'keras>=3.11.0',
+            'keras>3.4.0',
             rapidfuzz>=2.6.1,
             "tensorflow>=2.16.0",
             tqdm>=4.0.0,
diff --git a/requirements-ml.txt b/requirements-ml.txt
index b3005a5c..0c02d6bc 100644
--- a/requirements-ml.txt
+++ b/requirements-ml.txt
@@ -1,5 +1,5 @@
 scikit-learn>=0.23.2
-keras>=3.11.0
+keras>3.4.0
 rapidfuzz>=2.6.1
 tensorflow>=2.16.0
 tqdm>=4.0.0

From 052d0581a8c739cf194c75e86786d6ab5f616ee2 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Mon, 11 May 2026 13:07:06 -0500
Subject: [PATCH 16/28] refactor: numpy2 and mypy

---
 .pre-commit-config.yaml                       |  4 ++--
 dataprofiler/data_readers/data_utils.py       |  2 +-
 dataprofiler/labelers/char_load_tf_model.py   |  6 +++---
 .../labelers/character_level_cnn_model.py     |  6 +++---
 .../labelers/classification_report_utils.py   | 12 ++++++-----
 dataprofiler/labelers/data_processing.py      | 16 +++++++-------
 dataprofiler/labelers/labeler_utils.py        | 15 +++++++------
 .../profilers/data_labeler_column_profile.py  |  2 +-
 dataprofiler/profilers/histogram_utils.py     | 21 ++++++++++++++-----
 .../profilers/numerical_column_stats.py       | 17 ++++++++-------
 dataprofiler/profilers/profiler_utils.py      |  2 +-
 requirements.txt                              |  2 +-
 12 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4dcd82ce..7fd5ca82 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,7 +50,7 @@ repos:
             # requirements.txt
             h5py>=2.10.0,
             wheel>=0.33.1,
-            numpy<2.0.0,
+            numpy>=1.0.0,
             'pandas>=1.1.2,<3.0.0',
             python-dateutil>=2.7.5,
             pytz>=2020.1,
@@ -107,7 +107,7 @@ repos:
     rev: "0.48"
     hooks:
       - id: check-manifest
-        additional_dependencies: ['h5py', 'wheel', 'future', 'numpy<2.0.0', 'pandas',
+        additional_dependencies: ['h5py', 'wheel', 'future', 'numpy>=1.0.0', 'pandas',
         'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro',
         'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests',
         'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3']
diff --git a/dataprofiler/data_readers/data_utils.py b/dataprofiler/data_readers/data_utils.py
index 611d25dc..6e213810 100644
--- a/dataprofiler/data_readers/data_utils.py
+++ b/dataprofiler/data_readers/data_utils.py
@@ -334,7 +334,7 @@ def reservoir(file: TextIOWrapper, sample_nrows: int) -> list:
         except StopIteration:
             break
         # Append new, replace old with dummy, and keep track of order
-        remove_index = rng.integers(0, sample_nrows)
+        remove_index = int(rng.integers(0, sample_nrows))
         values[indices[remove_index]] = str(None)
         indices[remove_index] = len(values)
         values.append(newval)
diff --git a/dataprofiler/labelers/char_load_tf_model.py b/dataprofiler/labelers/char_load_tf_model.py
index ecd04f6e..be60f358 100644
--- a/dataprofiler/labelers/char_load_tf_model.py
+++ b/dataprofiler/labelers/char_load_tf_model.py
@@ -403,11 +403,11 @@ def fit(
             f1, f1_report = self._validate_training(val_data)  # type: ignore
             history["f1_report"] = f1_report
 
-            val_f1 = f1_report["weighted avg"]["f1-score"] if f1_report else np.NAN
+            val_f1 = f1_report["weighted avg"]["f1-score"] if f1_report else np.nan
             val_precision = (
-                f1_report["weighted avg"]["precision"] if f1_report else np.NAN
+                f1_report["weighted avg"]["precision"] if f1_report else np.nan
             )
-            val_recall = f1_report["weighted avg"]["recall"] if f1_report else np.NAN
+            val_recall = f1_report["weighted avg"]["recall"] if f1_report else np.nan
             epoch_time = time.time() - start_time
             logger.info(
                 "\rEPOCH %d (%ds), loss: %f - acc: %f - f1_score %f -- "
diff --git a/dataprofiler/labelers/character_level_cnn_model.py b/dataprofiler/labelers/character_level_cnn_model.py
index f732c8e0..3fe135ac 100644
--- a/dataprofiler/labelers/character_level_cnn_model.py
+++ b/dataprofiler/labelers/character_level_cnn_model.py
@@ -708,11 +708,11 @@ def fit(
             f1, f1_report = self._validate_training(val_data)  # type: ignore
             history["f1_report"] = f1_report
 
-            val_f1 = f1_report["weighted avg"]["f1-score"] if f1_report else np.NAN
+            val_f1 = f1_report["weighted avg"]["f1-score"] if f1_report else np.nan
             val_precision = (
-                f1_report["weighted avg"]["precision"] if f1_report else np.NAN
+                f1_report["weighted avg"]["precision"] if f1_report else np.nan
             )
-            val_recall = f1_report["weighted avg"]["recall"] if f1_report else np.NAN
+            val_recall = f1_report["weighted avg"]["recall"] if f1_report else np.nan
             epoch_time = time.time() - start_time
             logger.info(
                 "\rEPOCH %d (%ds), loss: %f - acc: %f - f1_score %f -- "
diff --git a/dataprofiler/labelers/classification_report_utils.py b/dataprofiler/labelers/classification_report_utils.py
index 28e742e3..3146e829 100644
--- a/dataprofiler/labelers/classification_report_utils.py
+++ b/dataprofiler/labelers/classification_report_utils.py
@@ -31,8 +31,8 @@ def convert_confusion_matrix_to_MCM(conf_matrix: list | np.ndarray) -> np.ndarra
     """
     if not isinstance(conf_matrix, np.ndarray):
         conf_matrix = np.array(conf_matrix)
-    num_labels = conf_matrix.shape[0]
-    num_samples = np.sum(conf_matrix)
+    num_labels = len(conf_matrix)
+    num_samples: int = int(np.sum(conf_matrix))
     MCM = np.zeros((num_labels, 2, 2), dtype=np.int64)
 
     # True Positives
@@ -205,6 +205,8 @@ def precision_recall_fscore_support(
     f_score = (1 + beta2) * precision * recall / denom
 
     # Average the results
+    weights: np.ndarray | None
+    support: np.ndarray | None = true_sum
     if average == "weighted":
         weights = true_sum
         if weights.sum() == 0:
@@ -219,9 +221,9 @@ def precision_recall_fscore_support(
         precision = np.average(precision, weights=weights)
         recall = np.average(recall, weights=weights)
         f_score = np.average(f_score, weights=weights)
-        true_sum = None  # return no support
+        support = None  # return no support
 
-    return precision, recall, f_score, true_sum
+    return precision, recall, f_score, support
 
 
 def classification_report(
@@ -300,7 +302,7 @@ def classification_report(
     """
     # ALTERATION: replaced the _check_targets with this if statement since
     # no y_true, y_pred
-    y_type = "multiclass" if conf_matrix.shape[0] > 2 else "binary"
+    y_type = "multiclass" if len(conf_matrix) > 2 else "binary"
 
     labels_given = True
     if labels is None:
diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py
index 2ee1cb81..fe67c69a 100644
--- a/dataprofiler/labelers/data_processing.py
+++ b/dataprofiler/labelers/data_processing.py
@@ -692,7 +692,7 @@ def process(
         :return batch_data: A dict containing samples of size batch_size
         :rtype batch_data: dicts
         """
-        num_dim = sum([dim > 1 for dim in data.shape])
+        num_dim = sum(dim > 1 for dim in np.shape(data))
         if num_dim > 1:
             raise ValueError(
                 "Multidimensional data given to "
@@ -1213,8 +1213,8 @@ def match_sentence_lengths(
         :type inplace: bool
         :return: dict(pred=...) or dict(pred=..., conf=...)
         """
-        pred_buffer = []
-        conf_buffer = []
+        pred_buffer: np.ndarray = np.array([])
+        conf_buffer: np.ndarray = np.array([])
         result_ind = 0
         buffer_add_inds = np.cumsum(list(map(len, results["pred"]))).tolist()
         separator_len = len(flatten_separator)
@@ -1469,14 +1469,14 @@ def process(
                     "If `labels` are specified, `label_mapping` "
                     "must also be specified."
                 )
-            if data.shape != labels.shape:
+            if np.shape(data) != np.shape(labels):
                 raise ValueError(
                     f"Data and labels given to "
                     f"StructCharPreprocessor are of different "
-                    f"shapes, {data.shape} != {labels.shape}"
+                    f"shapes, {np.shape(data)} != {np.shape(labels)}"
                 )
 
-        num_dim = sum([dim > 1 for dim in data.shape])
+        num_dim = sum(dim > 1 for dim in np.shape(data))
         if num_dim > 1:
             warnings.warn(
                 "Data given to StructCharPreprocessor was "
@@ -1681,8 +1681,8 @@ def match_sentence_lengths(
         :type inplace: bool
         :return: dict(pred=...) or dict(pred=..., conf=...)
         """
-        pred_buffer = []
-        conf_buffer = []
+        pred_buffer: np.ndarray = np.array([])
+        conf_buffer: np.ndarray = np.array([])
         result_ind = 0
         buffer_add_inds = np.cumsum(list(map(len, results["pred"]))).tolist()
         separator_len = len(flatten_separator)
diff --git a/dataprofiler/labelers/labeler_utils.py b/dataprofiler/labelers/labeler_utils.py
index 3a24886f..efebaec4 100644
--- a/dataprofiler/labelers/labeler_utils.py
+++ b/dataprofiler/labelers/labeler_utils.py
@@ -78,8 +78,8 @@ class 1       1.00      0.67      0.80         3
 
 
 def evaluate_accuracy(
-    predicted_entities_in_index: list[list[int]],
-    true_entities_in_index: list[list[int]],
+    predicted_entities_in_index: list[list[int]] | np.ndarray,
+    true_entities_in_index: list[list[int]] | np.ndarray,
     num_labels: int,
     entity_rev_dict: dict[int, str],
     verbose: bool = True,
@@ -119,13 +119,16 @@ def evaluate_accuracy(
             if x[1] not in omitted_labels
         ]
 
-    max_len = len(predicted_entities_in_index[0])
-    true_labels_padded = np.zeros((len(true_entities_in_index), max_len))
-    for i, true_labels_row in enumerate(true_entities_in_index):
+    predicted_entities = [np.asarray(row) for row in predicted_entities_in_index]
+    true_entities = [np.asarray(row) for row in true_entities_in_index]
+
+    max_len = len(predicted_entities[0])
+    true_labels_padded = np.zeros((len(true_entities), max_len))
+    for i, true_labels_row in enumerate(true_entities):
         true_labels_padded[i][: len(true_labels_row)] = true_labels_row
 
     true_labels_flatten = np.hstack(true_labels_padded)  # type: ignore
-    predicted_labels_flatten = np.hstack(predicted_entities_in_index)
+    predicted_labels_flatten = np.hstack(predicted_entities)
 
     all_labels: list[str] = []
     if entity_rev_dict:
diff --git a/dataprofiler/profilers/data_labeler_column_profile.py b/dataprofiler/profilers/data_labeler_column_profile.py
index d9bfe1ee..81f9c0ce 100644
--- a/dataprofiler/profilers/data_labeler_column_profile.py
+++ b/dataprofiler/profilers/data_labeler_column_profile.py
@@ -427,7 +427,7 @@ def _update_predictions(
         start_index = 0
         if self.data_labeler.model.requires_zero_mapping:
             start_index = 1
-        for i in range(rank_predictions.shape[0]):
+        for i in range(len(rank_predictions)):
             sorted_rank = rank_predictions[i][-self._top_k_voting :]
             sorted_rank = sorted_rank[np.argsort(predictions["conf"][i][sorted_rank])]
             for rank_position, value in enumerate(sorted_rank):
diff --git a/dataprofiler/profilers/histogram_utils.py b/dataprofiler/profilers/histogram_utils.py
index df230c4c..d1b42297 100644
--- a/dataprofiler/profilers/histogram_utils.py
+++ b/dataprofiler/profilers/histogram_utils.py
@@ -11,11 +11,22 @@
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
-from numpy.lib.histograms import (  # type: ignore[attr-defined]
-    _get_outer_edges,
-    _hist_bin_selectors,
-    _unsigned_subtract,
-)
+
+try:
+    # numpy v2+
+    from numpy.lib._histograms_impl import (  # type: ignore[attr-defined]
+        _get_outer_edges,
+        _hist_bin_selectors,
+        _unsigned_subtract,
+    )
+
+except ModuleNotFoundError:
+    # numpy v1+
+    from numpy.lib.histograms import (
+        _get_outer_edges,
+        _hist_bin_selectors,
+        _unsigned_subtract,
+    )
 
 
 def _get_maximum_from_profile(profile):
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index fa0666a6..a9e18d29 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -6,7 +6,7 @@
 import copy
 import itertools
 import warnings
-from typing import Any, Callable, Dict, List, TypeVar, cast
+from typing import Any, Callable, Dict, List, TypeAlias, TypeVar, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -32,6 +32,7 @@ def __init__(self, function: Callable) -> None:
 
 
 NumericStatsMixinT = TypeVar("NumericStatsMixinT", bound="NumericStatsMixin")
+Numeric: TypeAlias = int | float | np.float64 | np.int64
 
 
 class NumericStatsMixin(BaseColumnProfiler[NumericStatsMixinT], metaclass=abc.ABCMeta):
@@ -56,10 +57,10 @@ def __init__(self, options: NumericalOptions = None) -> None:
                 "NumericalStatsMixin parameter 'options' must be "
                 "of type NumericalOptions."
             )
-        self.min: int | float | np.float64 | np.int64 | None = None
-        self.max: int | float | np.float64 | np.int64 | None = None
+        self.min: Numeric | None = None
+        self.max: Numeric | None = None
         self._top_k_modes: int = 5  # By default, return at max 5 modes
-        self.sum: int | float | np.float64 | np.int64 = np.float64(0)
+        self.sum: Numeric = np.float64(0)
         self._biased_variance: float | np.float64 = np.nan
         self._biased_skewness: float | np.float64 = np.nan
         self._biased_kurtosis: float | np.float64 = np.nan
@@ -298,14 +299,14 @@ def _add_helper(
             )
         if "min" in self.__calculations.keys():
             if other1.min is not None and other2.min is not None:
-                self.min = min(other1.min, other2.min)
+                self.min = min(other1.min, other2.min)  # type: ignore[type-var]
             elif other2.min is None:
                 self.min = other1.min
             else:
                 self.min = other2.min
         if "max" in self.__calculations.keys():
             if other1.max is not None and other2.max is not None:
-                self.max = max(other1.max, other2.max)
+                self.max = max(other1.max, other2.max)  # type: ignore[type-var]
             elif other2.max is None:
                 self.max = other1.max
             else:
@@ -1403,7 +1404,7 @@ def _assimilate_histogram(
         dest_hist_entity_count_per_bin: np.ndarray,
         dest_hist_bin_edges: np.ndarray,
         dest_hist_num_bin: int,
-    ) -> tuple[dict[str, np.ndarray[Any, Any]], float]:
+    ) -> tuple[dict[str, np.ndarray], float]:
         """
         Assimilates a histogram into another histogram using specifications.
 
@@ -1821,7 +1822,7 @@ def _get_variance(
         # Suppress any numpy warnings as we have a custom warning for invalid
         # or infinite data already
         with np.errstate(all="ignore"):
-            batch_biased_variance = np.var(df_series)  # Obtains biased variance
+            batch_biased_variance = cast(float | np.float64, np.var(df_series))
         subset_properties["biased_variance"] = batch_biased_variance
         sum_value = subset_properties["sum"]
         batch_count = subset_properties["match_count"]
diff --git a/dataprofiler/profilers/profiler_utils.py b/dataprofiler/profilers/profiler_utils.py
index e38e1b04..100131f7 100644
--- a/dataprofiler/profilers/profiler_utils.py
+++ b/dataprofiler/profilers/profiler_utils.py
@@ -602,7 +602,7 @@ def find_diff_of_matrices(
         mat1 = np.array(matrix1, dtype=np.float64)
         mat2 = np.array(matrix2, dtype=np.float64)
 
-        if mat1.shape == mat2.shape:
+        if np.shape(mat1) == np.shape(mat2):
             diff: np.ndarray = mat1 - mat2
             if ((diff == 0) | np.isnan(diff)).all():
                 return "unchanged"
diff --git a/requirements.txt b/requirements.txt
index 1036c433..355018fe 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 h5py>=2.10.0
 wheel>=0.33.1
-numpy<2.0.0
+numpy>=1.0.0
 pandas>=1.1.2,<3.0.0
 python-dateutil>=2.7.5
 pytz>=2020.1

From 3965667b5e8099adeab8953c79d666aa6212281c Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Mon, 11 May 2026 13:29:55 -0500
Subject: [PATCH 17/28] fix: mypy 3.10

---
 dataprofiler/labelers/data_processing.py         | 4 ++--
 dataprofiler/profilers/numerical_column_stats.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py
index fe67c69a..33d916cc 100644
--- a/dataprofiler/labelers/data_processing.py
+++ b/dataprofiler/labelers/data_processing.py
@@ -1216,7 +1216,7 @@ def match_sentence_lengths(
         pred_buffer: np.ndarray = np.array([])
         conf_buffer: np.ndarray = np.array([])
         result_ind = 0
-        buffer_add_inds = np.cumsum(list(map(len, results["pred"]))).tolist()
+        buffer_add_inds: list[int] = np.cumsum(list(map(len, results["pred"]))).tolist()
         separator_len = len(flatten_separator)
 
         if not inplace:
@@ -1684,7 +1684,7 @@ def match_sentence_lengths(
         pred_buffer: np.ndarray = np.array([])
         conf_buffer: np.ndarray = np.array([])
         result_ind = 0
-        buffer_add_inds = np.cumsum(list(map(len, results["pred"]))).tolist()
+        buffer_add_inds: list[int] = np.cumsum(list(map(len, results["pred"]))).tolist()
         separator_len = len(flatten_separator)
 
         if not inplace:
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index a9e18d29..1f799961 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -541,7 +541,7 @@ def mean(self) -> float | np.float64:
         """Return mean value."""
         if self.match_count == 0:
             return 0.0
-        return self.sum / self.match_count
+        return cast(float | np.float64, self.sum / self.match_count)
 
     @property
     def mode(self) -> list[float]:

From f1046a93da358e0e45f136c46f292e52f02853cb Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Mon, 11 May 2026 13:50:37 -0500
Subject: [PATCH 18/28] fix: bugs

---
 dataprofiler/labelers/char_load_tf_model.py      |  9 +++------
 dataprofiler/profilers/numerical_column_stats.py | 14 ++++++++++++--
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/dataprofiler/labelers/char_load_tf_model.py b/dataprofiler/labelers/char_load_tf_model.py
index be60f358..eef03c78 100644
--- a/dataprofiler/labelers/char_load_tf_model.py
+++ b/dataprofiler/labelers/char_load_tf_model.py
@@ -262,7 +262,7 @@ def _construct_model(self) -> None:
 
         # Compile the model w/ metrics
         softmax_output_layer_name = self._model.output_names[0]
-        losses = ["categorical_crossentropy", None, None]
+        losses = ["categorical_crossentropy", None]
         # use f1 score metric
         f1_score_training = labeler_utils.F1Score(
             num_classes=num_labels, average="micro"
@@ -315,7 +315,7 @@ def _reconstruct_model(self) -> None:
 
         # Compile the model
         softmax_output_layer_name = self._model.output_names[0]
-        losses = ["categorical_crossentropy", None, None]
+        losses = ["categorical_crossentropy", None]
 
         # use f1 score metric
         f1_score_training = labeler_utils.F1Score(
@@ -380,14 +380,11 @@ def fit(
         f1_report: dict = {}
 
         self._model.reset_metrics()
-        softmax_output_layer_name = self._model.output_names[0]
 
         start_time = time.time()
         batch_id = 0
         for x_train, y_train in train_data:
-            model_results = self._model.train_on_batch(
-                x_train, {softmax_output_layer_name: y_train}
-            )
+            model_results = self._model.train_on_batch(x_train, y_train)
             sys.stdout.flush()
             if verbose:
                 sys.stdout.write(
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index 1f799961..40272704 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -35,6 +35,16 @@ def __init__(self, function: Callable) -> None:
 Numeric: TypeAlias = int | float | np.float64 | np.int64
 
 
+def _as_float_scalar(value: Numeric | np.ndarray) -> float:
+    """Convert a scalar-like numeric value to a Python float."""
+    array_value = np.asarray(value)
+    if array_value.ndim == 0:
+        return float(array_value)
+    if array_value.size == 1:
+        return float(array_value.item())
+    raise TypeError("Expected a scalar numeric value.")
+
+
 class NumericStatsMixin(BaseColumnProfiler[NumericStatsMixinT], metaclass=abc.ABCMeta):
     """
     Abstract numerical column profile subclass of BaseColumnProfiler.
@@ -199,7 +209,7 @@ def _add_helper_merge_profile_histograms(
         # calculate the min of the first edge and the max of the last edge
         # between two arrays
         global_min_of_histogram_edges = (
-            float(self.min)
+            _as_float_scalar(self.min)
             if self.min is not None
             else min(
                 other1._stored_histogram["histogram"]["bin_edges"][0],
@@ -208,7 +218,7 @@ def _add_helper_merge_profile_histograms(
         )
 
         global_max_of_histogram_edges = (
-            float(self.max)
+            _as_float_scalar(self.max)
             if self.max is not None
             else max(
                 other1._stored_histogram["histogram"]["bin_edges"][-1],

From 8f1b4e0a923f5ea09a8e618b4ad5b7cc234a1fdb Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Mon, 11 May 2026 14:05:09 -0500
Subject: [PATCH 19/28] fix: float

---
 dataprofiler/profilers/numerical_column_stats.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index 40272704..6e61f7a6 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -634,7 +634,12 @@ def _perform_t_test(
             )
             invalid_stats = True
         if np.isnan(
-            [float(mean1), float(mean2), float(var1), float(var2)]
+            [
+                _as_float_scalar(mean1),
+                _as_float_scalar(mean2),
+                _as_float_scalar(var1),
+                _as_float_scalar(var2),
+            ]
         ).any() or None in [
             mean1,
             mean2,
@@ -1836,7 +1841,9 @@ def _get_variance(
         subset_properties["biased_variance"] = batch_biased_variance
         sum_value = subset_properties["sum"]
         batch_count = subset_properties["match_count"]
-        batch_mean = 0.0 if not batch_count else float(sum_value) / batch_count
+        batch_mean = (
+            0.0 if not batch_count else _as_float_scalar(sum_value) / batch_count
+        )
         subset_properties["mean"] = batch_mean
         self._biased_variance = self._merge_biased_variance(
             self.match_count,

From fdc671edbcd96200eb8fe14564db01cf5c9c964f Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Mon, 11 May 2026 14:25:29 -0500
Subject: [PATCH 20/28] refactor: for hist fix too

---
 dataprofiler/profilers/histogram_utils.py     | 16 +++++++++---
 .../profilers/numerical_column_stats.py       | 26 +++++++------------
 dataprofiler/profilers/profiler_utils.py      | 12 +++++++++
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/dataprofiler/profilers/histogram_utils.py b/dataprofiler/profilers/histogram_utils.py
index d1b42297..30c3ecd4 100644
--- a/dataprofiler/profilers/histogram_utils.py
+++ b/dataprofiler/profilers/histogram_utils.py
@@ -12,6 +12,8 @@
 
 import numpy as np
 
+from . import profiler_utils
+
 try:
     # numpy v2+
     from numpy.lib._histograms_impl import (  # type: ignore[attr-defined]
@@ -90,7 +92,7 @@ def _ptp(maximum: float, minimum: float):
 
     :return: the difference between the maximum and minimum
     """
-    return np.subtract(maximum, minimum)
+    return profiler_utils.as_float_scalar(np.subtract(maximum, minimum))
 
 
 def _calc_doane_bin_width_from_profile(profile):
@@ -191,7 +193,9 @@ def _calc_fd_bin_width_from_profile(profile):
 
     :return: An estimate of the optimal bin width for the given data.
     """
-    iqr = np.subtract(profile._get_percentile([75]), profile._get_percentile([25]))
+    iqr = profiler_utils.as_float_scalar(
+        np.subtract(profile._get_percentile([75]), profile._get_percentile([25]))
+    )
     dataset_size = _get_dataset_size_from_profile(profile)
 
     return 2.0 * iqr * dataset_size ** (-1.0 / 3.0)
@@ -300,7 +304,9 @@ def _get_bin_edges(
             n_equal_bins = 1
         else:
             # Do not call selectors on empty arrays
-            width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
+            width = profiler_utils.as_float_scalar(
+                _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
+            )
             if width:
                 n_equal_bins = int(
                     np.ceil(_unsigned_subtract(last_edge, first_edge) / width)
@@ -351,7 +357,9 @@ def _calculate_bins_from_profile(profile, bin_method):
         n_equal_bins = 1
     else:
         # Do not call selectors on empty arrays
-        width = _hist_bin_width_selectors_for_profile[bin_method](profile)
+        width = profiler_utils.as_float_scalar(
+            _hist_bin_width_selectors_for_profile[bin_method](profile)
+        )
         if width and not np.isnan(width):
             n_equal_bins = int(np.ceil(_ptp(maximum, minimum) / width))
         else:
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index 6e61f7a6..b0197ed4 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -35,16 +35,6 @@ def __init__(self, function: Callable) -> None:
 Numeric: TypeAlias = int | float | np.float64 | np.int64
 
 
-def _as_float_scalar(value: Numeric | np.ndarray) -> float:
-    """Convert a scalar-like numeric value to a Python float."""
-    array_value = np.asarray(value)
-    if array_value.ndim == 0:
-        return float(array_value)
-    if array_value.size == 1:
-        return float(array_value.item())
-    raise TypeError("Expected a scalar numeric value.")
-
-
 class NumericStatsMixin(BaseColumnProfiler[NumericStatsMixinT], metaclass=abc.ABCMeta):
     """
     Abstract numerical column profile subclass of BaseColumnProfiler.
@@ -209,7 +199,7 @@ def _add_helper_merge_profile_histograms(
         # calculate the min of the first edge and the max of the last edge
         # between two arrays
         global_min_of_histogram_edges = (
-            _as_float_scalar(self.min)
+            profiler_utils.as_float_scalar(self.min)
             if self.min is not None
             else min(
                 other1._stored_histogram["histogram"]["bin_edges"][0],
@@ -218,7 +208,7 @@ def _add_helper_merge_profile_histograms(
         )
 
         global_max_of_histogram_edges = (
-            _as_float_scalar(self.max)
+            profiler_utils.as_float_scalar(self.max)
             if self.max is not None
             else max(
                 other1._stored_histogram["histogram"]["bin_edges"][-1],
@@ -635,10 +625,10 @@ def _perform_t_test(
             invalid_stats = True
         if np.isnan(
             [
-                _as_float_scalar(mean1),
-                _as_float_scalar(mean2),
-                _as_float_scalar(var1),
-                _as_float_scalar(var2),
+                profiler_utils.as_float_scalar(mean1),
+                profiler_utils.as_float_scalar(mean2),
+                profiler_utils.as_float_scalar(var1),
+                profiler_utils.as_float_scalar(var2),
             ]
         ).any() or None in [
             mean1,
@@ -1842,7 +1832,9 @@ def _get_variance(
         sum_value = subset_properties["sum"]
         batch_count = subset_properties["match_count"]
         batch_mean = (
-            0.0 if not batch_count else _as_float_scalar(sum_value) / batch_count
+            0.0
+            if not batch_count
+            else profiler_utils.as_float_scalar(sum_value) / batch_count
         )
         subset_properties["mean"] = batch_mean
         self._biased_variance = self._merge_biased_variance(
diff --git a/dataprofiler/profilers/profiler_utils.py b/dataprofiler/profilers/profiler_utils.py
index 100131f7..2cc9846b 100644
--- a/dataprofiler/profilers/profiler_utils.py
+++ b/dataprofiler/profilers/profiler_utils.py
@@ -39,6 +39,18 @@
 from .. import rng_utils
 
 
+def as_float_scalar(
+    value: int | float | np.integer | np.floating | np.ndarray | list[float],
+) -> float:
+    """Convert a scalar-like value to a Python float."""
+    array_value = np.asarray(value)
+    if array_value.ndim == 0:
+        return float(array_value)
+    if array_value.size == 1:
+        return float(array_value.item())
+    raise TypeError("Expected a scalar-like numeric value.")
+
+
 def recursive_dict_update(d: dict, update_d: dict) -> dict:
     """
     Recursive updates nested dictionaries. Updating d with update_d.

From 34c47fe2ddaa0a2a2df3e056d3aa466903242cd9 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Mon, 11 May 2026 15:04:04 -0500
Subject: [PATCH 21/28] fix: issue with none in hist

---
 dataprofiler/profilers/histogram_utils.py     | 12 +++++-----
 .../profilers/numerical_column_stats.py       | 24 +++++++++----------
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/dataprofiler/profilers/histogram_utils.py b/dataprofiler/profilers/histogram_utils.py
index 30c3ecd4..a5d2defa 100644
--- a/dataprofiler/profilers/histogram_utils.py
+++ b/dataprofiler/profilers/histogram_utils.py
@@ -304,9 +304,9 @@ def _get_bin_edges(
             n_equal_bins = 1
         else:
             # Do not call selectors on empty arrays
-            width = profiler_utils.as_float_scalar(
-                _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
-            )
+            width = _hist_bin_selectors[bin_name](a, (first_edge, last_edge))
+            if width is not None:
+                width = profiler_utils.as_float_scalar(width)
             if width:
                 n_equal_bins = int(
                     np.ceil(_unsigned_subtract(last_edge, first_edge) / width)
@@ -357,9 +357,9 @@ def _calculate_bins_from_profile(profile, bin_method):
         n_equal_bins = 1
     else:
         # Do not call selectors on empty arrays
-        width = profiler_utils.as_float_scalar(
-            _hist_bin_width_selectors_for_profile[bin_method](profile)
-        )
+        width = _hist_bin_width_selectors_for_profile[bin_method](profile)
+        if width is not None:
+            width = profiler_utils.as_float_scalar(width)
         if width and not np.isnan(width):
             n_equal_bins = int(np.ceil(_ptp(maximum, minimum) / width))
         else:
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index b0197ed4..5135bcfd 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -623,19 +623,17 @@ def _perform_t_test(
                 RuntimeWarning,
             )
             invalid_stats = True
-        if np.isnan(
-            [
-                profiler_utils.as_float_scalar(mean1),
-                profiler_utils.as_float_scalar(mean2),
-                profiler_utils.as_float_scalar(var1),
-                profiler_utils.as_float_scalar(var2),
-            ]
-        ).any() or None in [
-            mean1,
-            mean2,
-            var1,
-            var2,
-        ]:
+        if (
+            None in [mean1, mean2, var1, var2]
+            or np.isnan(
+                [
+                    profiler_utils.as_float_scalar(mean1),
+                    profiler_utils.as_float_scalar(mean2),
+                    profiler_utils.as_float_scalar(var1),
+                    profiler_utils.as_float_scalar(var2),
+                ]
+            ).any()
+        ):
             warnings.warn(
                 "Null value(s) found in mean and/or variance values. "
                 "T-test cannot be performed.",

From 57066fbad78e4a048c73a1e577db02df5cf0ed0b Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Fri, 22 May 2026 11:04:43 -0500
Subject: [PATCH 22/28] fix: remove comment

---
 dataprofiler/labelers/character_level_cnn_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dataprofiler/labelers/character_level_cnn_model.py b/dataprofiler/labelers/character_level_cnn_model.py
index 3fe135ac..f1e925d5 100644
--- a/dataprofiler/labelers/character_level_cnn_model.py
+++ b/dataprofiler/labelers/character_level_cnn_model.py
@@ -481,7 +481,6 @@ def _compile_loss(model: tf.keras.Model, num_labels: int) -> None:
         """Compiles the loss for the given model and number of labels."""
         # Compile the model
         softmax_output_layer_name = model.output_names[0]
-        # losses = {softmax_output_layer_name: "categorical_crossentropy"}
         losses = ["categorical_crossentropy", None, None]
 
         # use f1 score metric

From 5de7abea54ae44547864040fb962ddf03076cc77 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Fri, 22 May 2026 11:29:50 -0500
Subject: [PATCH 23/28] refactor: to still utilize dict mapping for losses

---
 dataprofiler/labelers/char_load_tf_model.py   | 159 ++++++++++-------
 .../labelers/character_level_cnn_model.py     | 163 +++++++++++++-----
 dataprofiler/labelers/labeler_utils.py        |  58 +++++++
 .../test_character_level_cnn_model.py         |   9 +-
 4 files changed, 283 insertions(+), 106 deletions(-)

diff --git a/dataprofiler/labelers/char_load_tf_model.py b/dataprofiler/labelers/char_load_tf_model.py
index eef03c78..c8c40d70 100644
--- a/dataprofiler/labelers/char_load_tf_model.py
+++ b/dataprofiler/labelers/char_load_tf_model.py
@@ -17,6 +17,7 @@
 from .. import dp_logging
 from . import labeler_utils
 from .base_model import AutoSubRegistrationMeta, BaseModel, BaseTrainableModel
+from .character_level_cnn_model import ArgMaxLayer
 
 _file_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -29,6 +30,8 @@ class CharLoadTFModel(BaseTrainableModel, metaclass=AutoSubRegistrationMeta):
 
     # boolean if the label mapping requires the mapping for index 0 reserved
     requires_zero_mapping = False
+    _SOFTMAX_OUTPUT = "softmax_output"
+    _ARGMAX_OUTPUT = "argmax_output"
 
     def __init__(
         self, model_path: str, label_mapping: dict[str, int], parameters: dict = None
@@ -61,6 +64,35 @@ def __init__(
 
         BaseModel.__init__(self, label_mapping, parameters)
 
+    @classmethod
+    def _create_model_outputs(
+        cls, softmax_output: tf.Tensor, argmax_output: tf.Tensor | None = None
+    ) -> dict[str, tf.Tensor]:
+        """Return normalized dict outputs for training and inference."""
+        if argmax_output is None:
+            argmax_output = ArgMaxLayer(name=cls._ARGMAX_OUTPUT)(softmax_output)
+        return {
+            cls._SOFTMAX_OUTPUT: softmax_output,
+            cls._ARGMAX_OUTPUT: argmax_output,
+        }
+
+    @classmethod
+    def _normalize_model_outputs(cls, model: tf.keras.Model) -> tf.keras.Model:
+        """Convert list-style outputs to the normalized dict structure."""
+        return labeler_utils.normalize_tf_model_outputs(
+            model,
+            [cls._SOFTMAX_OUTPUT, cls._ARGMAX_OUTPUT],
+            lambda softmax_output, extra_outputs: cls._create_model_outputs(
+                softmax_output, extra_outputs[0]
+            ),
+        )
+
+    def _new_softmax_head_name(self) -> str:
+        """Return a layer name unique within the current model graph."""
+        return labeler_utils.get_tf_rebuild_layer_name(
+            self._model, f"{self._SOFTMAX_OUTPUT}_rebuild"
+        )
+
     def __eq__(self, other: object) -> bool:
         """
         Check if two models are equal with one another.
@@ -215,15 +247,34 @@ def load_from_disk(cls, dirpath: str) -> CharLoadTFModel:
             tf_model = tf.keras.models.load_model(dirpath)
 
         loaded_model = cls(dirpath, label_mapping, parameters)
-        loaded_model._model = tf_model
+        loaded_model._model = cls._normalize_model_outputs(tf_model)
 
         # load self
         loaded_model._model_num_labels = loaded_model.num_labels
         loaded_model._model_default_ind = loaded_model.label_mapping[
             loaded_model._parameters["default_label"]
         ]
+        loaded_model._compile_model(loaded_model.num_labels)
         return loaded_model
 
+    def _compile_model(self, num_labels: int) -> None:
+        """Compile the model with dict-based losses and metrics."""
+        losses = {
+            self._SOFTMAX_OUTPUT: "categorical_crossentropy",
+            self._ARGMAX_OUTPUT: None,
+        }
+        f1_score_training = labeler_utils.F1Score(
+            num_classes=num_labels, average="micro"
+        )
+        metrics = {
+            self._SOFTMAX_OUTPUT: [
+                "categorical_crossentropy",
+                "acc",
+                f1_score_training,
+            ]
+        }
+        self._model.compile(loss=losses, optimizer="adam", metrics=metrics)
+
     def _construct_model(self) -> None:
         """
         Model constructor for the data labeler.
@@ -237,45 +288,28 @@ def _construct_model(self) -> None:
         model_loc = self._parameters["model_path"]
 
         self._model: tf.keras.Model = tf.keras.models.load_model(model_loc)
-        self._model = tf.keras.Model(self._model.inputs, self._model.outputs)
-        softmax_output_layer_name = self._model.output_names[0]
+        self._model = self._normalize_model_outputs(self._model)
+        softmax_output = self._model.output[self._SOFTMAX_OUTPUT]
+        softmax_layer = softmax_output._keras_history[0]
+        softmax_output_layer_name = softmax_layer.name
         softmax_layer_ind = cast(
             int,
             labeler_utils.get_tf_layer_index_from_name(
                 self._model, softmax_output_layer_name
             ),
         )
-        softmax_layer = self._model.get_layer(softmax_output_layer_name)
 
-        new_softmax_layer = softmax_layer.output
+        new_softmax_layer = softmax_output
         if softmax_layer.weights[0].shape[-1] != num_labels:
             new_softmax_layer = tf.keras.layers.Dense(
-                num_labels, activation="softmax", name="softmax_output"
+                num_labels,
+                activation="softmax",
+                name=self._new_softmax_head_name(),
             )(self._model.layers[softmax_layer_ind - 1].output)
 
-        # Add argmax layer to get labels directly as an output
-        argmax_layer = tf.keras.ops.argmax(new_softmax_layer, axis=2)
-
-        argmax_outputs = [new_softmax_layer, argmax_layer]
-        self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
-        self._model = tf.keras.Model(self._model.inputs, self._model.outputs)
-
-        # Compile the model w/ metrics
-        softmax_output_layer_name = self._model.output_names[0]
-        losses = ["categorical_crossentropy", None]
-        # use f1 score metric
-        f1_score_training = labeler_utils.F1Score(
-            num_classes=num_labels, average="micro"
-        )
-        metrics = {
-            softmax_output_layer_name: [
-                "categorical_crossentropy",
-                "acc",
-                f1_score_training,
-            ]
-        }
-
-        self._model.compile(loss=losses, optimizer="adam", metrics=metrics)
+        output_dict = self._create_model_outputs(new_softmax_layer)
+        self._model = tf.keras.Model(self._model.inputs, output_dict)
+        self._compile_model(num_labels)
 
         self._epoch_id = 0
         self._model_num_labels = num_labels
@@ -304,32 +338,14 @@ def _reconstruct_model(self) -> None:
         # Add the final Softmax layer to the previous spot
         # self._model.layers[-2] to skip: original softmax
         final_softmax_layer = tf.keras.layers.Dense(
-            num_labels, activation="softmax", name="softmax_output"
+            num_labels,
+            activation="softmax",
+            name=self._new_softmax_head_name(),
         )(self._model.layers[-2].output)
 
-        # Add argmax layer to get labels directly as an output
-        argmax_layer = tf.keras.ops.argmax(final_softmax_layer, axis=2)
-
-        argmax_outputs = [final_softmax_layer, argmax_layer]
-        self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
-
-        # Compile the model
-        softmax_output_layer_name = self._model.output_names[0]
-        losses = ["categorical_crossentropy", None]
-
-        # use f1 score metric
-        f1_score_training = labeler_utils.F1Score(
-            num_classes=num_labels, average="micro"
-        )
-        metrics = {
-            softmax_output_layer_name: [
-                "categorical_crossentropy",
-                "acc",
-                f1_score_training,
-            ]
-        }
-
-        self._model.compile(loss=losses, optimizer="adam", metrics=metrics)
+        output_dict = self._create_model_outputs(final_softmax_layer)
+        self._model = tf.keras.Model(self._model.inputs, output_dict)
+        self._compile_model(num_labels)
 
         self._epoch_id = 0
         self._model_num_labels = num_labels
@@ -383,18 +399,37 @@ def fit(
 
         start_time = time.time()
         batch_id = 0
+        target_output = self._SOFTMAX_OUTPUT
         for x_train, y_train in train_data:
-            model_results = self._model.train_on_batch(x_train, y_train)
+            model_results = self._model.train_on_batch(
+                x_train,
+                {target_output: y_train},
+                return_dict=True,
+            )
+            acc_value = next(
+                (value for key, value in model_results.items() if key.endswith("acc")),
+                np.nan,
+            )
+            f1_value = next(
+                (value for key, value in model_results.items() if "f1" in key.lower()),
+                np.nan,
+            )
             sys.stdout.flush()
             if verbose:
                 sys.stdout.write(
                     "\rEPOCH %d, batch_id %d: loss: %f - acc: %f - "
-                    "f1_score %f" % (self._epoch_id, batch_id, *model_results[1:])
+                    "f1_score %f"
+                    % (
+                        self._epoch_id,
+                        batch_id,
+                        model_results.get("loss", np.nan),
+                        acc_value,
+                        f1_value,
+                    )
                 )
             batch_id += 1
 
-        for i, metric_label in enumerate(self._model.metrics_names):
-            history[metric_label] = model_results[i]
+        history.update(model_results)
 
         if val_data:
             f1, f1_report = self._validate_training(val_data)  # type: ignore
@@ -412,7 +447,9 @@ def fit(
                 % (
                     self._epoch_id,
                     epoch_time,
-                    *model_results[1:],
+                    model_results.get("loss", np.nan),
+                    acc_value,
+                    f1_value,
                     val_f1,
                     val_precision,
                     val_recall,
@@ -459,7 +496,7 @@ def _validate_training(
             y_val_pred.append(
                 self._model.predict(
                     x_val, batch_size=batch_size_test, verbose=verbose_keras
-                )[1]
+                )[self._ARGMAX_OUTPUT]
             )
             y_val_test.append(np.argmax(y_val, axis=-1))
             batch_id += 1
@@ -532,10 +569,10 @@ def predict(
             if show_confidences:
                 confidences[
                     allocation_index : allocation_index + num_samples_in_batch
-                ] = model_output[0].numpy()
+                ] = model_output[self._SOFTMAX_OUTPUT].numpy()
             predictions[
                 allocation_index : allocation_index + num_samples_in_batch
-            ] = model_output[1].numpy()
+            ] = model_output[self._ARGMAX_OUTPUT].numpy()
 
             allocation_index += num_samples_in_batch
 
diff --git a/dataprofiler/labelers/character_level_cnn_model.py b/dataprofiler/labelers/character_level_cnn_model.py
index f1e925d5..9ae1eefe 100644
--- a/dataprofiler/labelers/character_level_cnn_model.py
+++ b/dataprofiler/labelers/character_level_cnn_model.py
@@ -120,6 +120,7 @@ def get_config(self):
 
     def call(self, argmax_layer: tf.Tensor, confidence_layer: tf.Tensor) -> tf.Tensor:
         """Apply the threshold argmax to the input tensor."""
+        argmax_layer = tf.cast(argmax_layer, tf.int64)
         threshold_at_argmax = tf.gather(self.thresh_vec, argmax_layer)
 
         confidence_max_layer = tf.keras.backend.max(confidence_layer, axis=2)
@@ -146,6 +147,19 @@ def call(self, argmax_layer: tf.Tensor, confidence_layer: tf.Tensor) -> tf.Tenso
         return final_predicted_layer
 
 
+@tf.keras.utils.register_keras_serializable(package="CharacterLevelCnnModel")
+class ArgMaxLayer(tf.keras.layers.Layer):
+    """Keras layer returning integer argmax indices."""
+
+    def call(self, confidence_layer: tf.Tensor) -> tf.Tensor:
+        """Return argmax indices as int64."""
+        return tf.cast(tf.keras.ops.argmax(confidence_layer, axis=2), tf.int64)
+
+    def compute_output_shape(self, input_shape):
+        """Return the confidence tensor shape without the class dimension."""
+        return input_shape[:-1]
+
+
 @tf.keras.utils.register_keras_serializable(package="CharacterLevelCnnModel")
 class EncodingLayer(tf.keras.layers.Layer):
     """Encodes strings to integers."""
@@ -206,6 +220,9 @@ class CharacterLevelCnnModel(BaseTrainableModel, metaclass=AutoSubRegistrationMe
 
     # boolean if the label mapping requires the mapping for index 0 reserved
     requires_zero_mapping: bool = True
+    _SOFTMAX_OUTPUT = "softmax_output"
+    _ARGMAX_OUTPUT = "argmax_output"
+    _THRESH_OUTPUT = "thresh_argmax_output"
 
     def __init__(self, label_mapping: dict[str, int], parameters: dict = None) -> None:
         """
@@ -242,6 +259,54 @@ def __init__(self, label_mapping: dict[str, int], parameters: dict = None) -> No
 
         BaseModel.__init__(self, label_mapping, parameters)
 
+    @classmethod
+    def _create_model_outputs(
+        cls,
+        softmax_output: tf.Tensor,
+        default_ind: int,
+        num_labels: int,
+        argmax_output: tf.Tensor | None = None,
+        threshold_output: tf.Tensor | None = None,
+    ) -> dict[str, tf.Tensor]:
+        """Return normalized dict outputs for training and inference."""
+        if argmax_output is None:
+            argmax_output = ArgMaxLayer(name=cls._ARGMAX_OUTPUT)(softmax_output)
+        if threshold_output is None:
+            threshold_output = ThreshArgMaxLayer(
+                threshold_=0.0,
+                num_labels_=num_labels,
+                default_ind=default_ind,
+                name=cls._THRESH_OUTPUT,
+            )(argmax_output, softmax_output)
+        return {
+            cls._SOFTMAX_OUTPUT: softmax_output,
+            cls._ARGMAX_OUTPUT: argmax_output,
+            cls._THRESH_OUTPUT: threshold_output,
+        }
+
+    @classmethod
+    def _normalize_model_outputs(
+        cls, model: tf.keras.Model, default_ind: int, num_labels: int
+    ) -> tf.keras.Model:
+        """Convert list-style outputs to the normalized dict structure."""
+        return labeler_utils.normalize_tf_model_outputs(
+            model,
+            [cls._SOFTMAX_OUTPUT, cls._ARGMAX_OUTPUT, cls._THRESH_OUTPUT],
+            lambda softmax_output, extra_outputs: cls._create_model_outputs(
+                softmax_output,
+                default_ind,
+                num_labels,
+                extra_outputs[0],
+                extra_outputs[1],
+            ),
+        )
+
+    def _new_softmax_head_name(self) -> str:
+        """Return a layer name unique within the current model graph."""
+        return labeler_utils.get_tf_rebuild_layer_name(
+            self._model, f"{self._SOFTMAX_OUTPUT}_rebuild"
+        )
+
     def __eq__(self, other: object) -> bool:
         """
         Check if two models are equal with one another.
@@ -444,6 +509,12 @@ def load_from_disk(cls, dirpath: str) -> CharacterLevelCnnModel:
             loaded_model._construct_model()
             tf1_weights.append(loaded_model._model.weights[-1].value())
             loaded_model._model.set_weights(tf1_weights)
+        else:
+            loaded_model._model = cls._normalize_model_outputs(
+                tf_model,
+                loaded_model.label_mapping[loaded_model._parameters["default_label"]],
+                loaded_model.num_labels,
+            )
 
         # load self
         loaded_model._model_num_labels = loaded_model.num_labels
@@ -479,22 +550,21 @@ def _argmax_threshold_layer(
     @staticmethod
     def _compile_loss(model: tf.keras.Model, num_labels: int) -> None:
         """Compiles the loss for the given model and number of labels."""
-        # Compile the model
-        softmax_output_layer_name = model.output_names[0]
-        losses = ["categorical_crossentropy", None, None]
-
-        # use f1 score metric
+        losses = {
+            CharacterLevelCnnModel._SOFTMAX_OUTPUT: "categorical_crossentropy",
+            CharacterLevelCnnModel._ARGMAX_OUTPUT: None,
+            CharacterLevelCnnModel._THRESH_OUTPUT: None,
+        }
         f1_score_training = labeler_utils.F1Score(
             num_classes=num_labels, average="micro"
         )
         metrics = {
-            softmax_output_layer_name: [
+            CharacterLevelCnnModel._SOFTMAX_OUTPUT: [
                 "categorical_crossentropy",
                 "acc",
                 f1_score_training,
             ]
         }
-
         model.compile(loss=losses, optimizer="adam", metrics=metrics)
 
     def _construct_model(self) -> None:
@@ -577,21 +647,18 @@ def _construct_model(self) -> None:
                 self._model.add(tf.keras.layers.Dropout(self._parameters["dropout"]))
 
         # Add the final Softmax layer
-        self._model.add(tf.keras.layers.Dense(num_labels, activation="softmax"))
-
-        # Add argmax layer to get labels directly as an output
-        argmax_layer = tf.keras.ops.argmax(self._model.outputs[0], axis=2)
-
-        # Create confidence layers
-        final_predicted_layer = ThreshArgMaxLayer(
-            threshold_=0.0, num_labels_=num_labels, default_ind=default_ind
+        self._model.add(
+            tf.keras.layers.Dense(
+                num_labels,
+                activation="softmax",
+                name=self._SOFTMAX_OUTPUT,
+            )
         )
 
-        argmax_outputs = self._model.outputs + [
-            argmax_layer,
-            final_predicted_layer(argmax_layer, self._model.outputs[0]),
-        ]
-        self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
+        output_dict = self._create_model_outputs(
+            self._model.outputs[0], default_ind, num_labels
+        )
+        self._model = tf.keras.Model(self._model.inputs, output_dict)
         self._compile_loss(self._model, num_labels)
 
         self._epoch_id = 0
@@ -621,22 +688,15 @@ def _reconstruct_model(self) -> None:
         # Add the final Softmax layer to the previous spot
         # self._model.layers[-3] to skip: thresh and original softmax
         final_softmax_layer = tf.keras.layers.Dense(
-            num_labels, activation="softmax", name="dense_2"
+            num_labels,
+            activation="softmax",
+            name=self._new_softmax_head_name(),
         )(self._model.layers[-3].output)
 
-        # Add argmax layer to get labels directly as an output
-        argmax_layer = tf.keras.ops.argmax(final_softmax_layer, axis=2)
-
-        # Create confidence layers
-        final_predicted_layer = ThreshArgMaxLayer(
-            threshold_=0.0, num_labels_=num_labels, default_ind=default_ind
+        output_dict = self._create_model_outputs(
+            final_softmax_layer, default_ind, num_labels
         )
-
-        argmax_outputs = [final_softmax_layer] + [
-            argmax_layer,
-            final_predicted_layer(argmax_layer, final_softmax_layer),
-        ]
-        self._model = tf.keras.Model(self._model.inputs, argmax_outputs)
+        self._model = tf.keras.Model(self._model.inputs, output_dict)
         self._compile_loss(self._model, num_labels)
         self._epoch_id = 0
         self._model_num_labels = num_labels
@@ -690,18 +750,37 @@ def fit(
 
         start_time = time.time()
         batch_id = 0
+        target_output = self._SOFTMAX_OUTPUT
         for x_train, y_train in train_data:
-            model_results = self._model.train_on_batch(x_train, y_train)
+            model_results = self._model.train_on_batch(
+                x_train,
+                {target_output: y_train},
+                return_dict=True,
+            )
+            acc_value = next(
+                (value for key, value in model_results.items() if key.endswith("acc")),
+                np.nan,
+            )
+            f1_value = next(
+                (value for key, value in model_results.items() if "f1" in key.lower()),
+                np.nan,
+            )
             sys.stdout.flush()
             if verbose:
                 sys.stdout.write(
                     "\rEPOCH %d, batch_id %d: loss: %f - acc: %f - "
-                    "f1_score %f" % (self._epoch_id, batch_id, *model_results[1:])
+                    "f1_score %f"
+                    % (
+                        self._epoch_id,
+                        batch_id,
+                        model_results.get("loss", np.nan),
+                        acc_value,
+                        f1_value,
+                    )
                 )
             batch_id += 1
 
-        for i, metric_label in enumerate(self._model.metrics_names):
-            history[metric_label] = model_results[i]
+        history.update(model_results)
 
         if val_data:
             f1, f1_report = self._validate_training(val_data)  # type: ignore
@@ -719,7 +798,9 @@ def fit(
                 % (
                     self._epoch_id,
                     epoch_time,
-                    *model_results[1:],
+                    model_results.get("loss", np.nan),
+                    acc_value,
+                    f1_value,
                     val_f1,
                     val_precision,
                     val_recall,
@@ -768,7 +849,7 @@ def _validate_training(
                     tf.convert_to_tensor(x_val),
                     batch_size=batch_size_test,
                     verbose=verbose_keras,
-                )[1]
+                )[self._ARGMAX_OUTPUT]
             )
             y_val_test.append(np.argmax(y_val, axis=-1))
             batch_id += 1
@@ -861,10 +942,10 @@ def predict(
             if show_confidences:
                 confidences[
                     allocation_index : allocation_index + num_samples_in_batch
-                ] = model_output[0].numpy()
+                ] = model_output[self._SOFTMAX_OUTPUT].numpy()
             predictions[
                 allocation_index : allocation_index + num_samples_in_batch
-            ] = model_output[1].numpy()
+            ] = model_output[self._ARGMAX_OUTPUT].numpy()
             sentence_lengths[
                 allocation_index : allocation_index + num_samples_in_batch
             ] = list(map(lambda x: len(x[0]), batch_data))
diff --git a/dataprofiler/labelers/labeler_utils.py b/dataprofiler/labelers/labeler_utils.py
index efebaec4..2a74b87e 100644
--- a/dataprofiler/labelers/labeler_utils.py
+++ b/dataprofiler/labelers/labeler_utils.py
@@ -231,6 +231,64 @@ def get_tf_layer_index_from_name(model: tf.keras.Model, layer_name: str) -> int
     return None
 
 
+def normalize_tf_model_outputs(
+    model: tf.keras.Model,
+    output_names: list[str],
+    create_outputs_fn: Callable[
+        [tf.Tensor, list[tf.Tensor | None]], dict[str, tf.Tensor]
+    ],
+) -> tf.keras.Model:
+    """Convert a model's outputs into a named dict-output structure when possible."""
+    try:
+        model_output = model.output
+    except (AttributeError, IndexError):
+        model_output = None
+
+    try:
+        model_outputs_list = list(model.outputs)
+    except (AttributeError, IndexError, TypeError):
+        model_outputs_list = []
+
+    if isinstance(model_output, dict):
+        if set(model_output) == set(output_names):
+            return model
+        softmax_output = model_output.get(
+            output_names[0], next(iter(model_output.values()))
+        )
+        extra_outputs = [model_output.get(name) for name in output_names[1:]]
+    else:
+        if not model_outputs_list:
+            try:
+                last_output = model.layers[-1].output
+            except (AttributeError, IndexError):
+                return model
+            if not hasattr(last_output, "_keras_history"):
+                return model
+            model_outputs_list = [last_output]
+        softmax_output = model_outputs_list[0]
+        extra_outputs = [
+            model_outputs_list[index] if len(model_outputs_list) > index else None
+            for index in range(1, len(output_names))
+        ]
+
+    try:
+        output_dict = create_outputs_fn(softmax_output, extra_outputs)
+        return tf.keras.Model(model.inputs, output_dict)
+    except (AttributeError, TypeError, ValueError):
+        return model
+
+
+def get_tf_rebuild_layer_name(model: tf.keras.Model, base_name: str) -> str:
+    """Return a layer name unique within the current model graph."""
+    existing_names = {layer.name for layer in getattr(model, "layers", [])}
+    if base_name not in existing_names:
+        return base_name
+    suffix = 1
+    while f"{base_name}_{suffix}" in existing_names:
+        suffix += 1
+    return f"{base_name}_{suffix}"
+
+
 def hide_tf_logger_warnings() -> None:
     """Filter out a set of warnings from the tf logger."""
 
diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
index 79d1b3f7..554b61c8 100644
--- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py
+++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
@@ -446,8 +446,8 @@ def test_model_construct(self):
         cnn_model.details()
 
         expected_layers = [
-            "input_1",
-            "lambda",
+            "input_layer",
+            "encoding_layer",
             "embedding",
             "conv1d",
             "dropout",
@@ -465,8 +465,9 @@ def test_model_construct(self):
             "dropout_4",
             "dense_1",
             "dropout_5",
-            "dense_2",
-            "thresh_arg_max_layer",
+            "softmax_output",
+            "argmax_output",
+            "thresh_argmax_output",
         ]
         model_layers = [layer.name for layer in cnn_model._model.layers]
         self.assertEqual(len(expected_layers), len(model_layers))

From e1afcf7efdcb84c9854c96c891a61cff57a4258f Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Fri, 22 May 2026 12:04:15 -0500
Subject: [PATCH 24/28] fix: int pre-commit

---
 dataprofiler/profilers/int_column_profile.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataprofiler/profilers/int_column_profile.py b/dataprofiler/profilers/int_column_profile.py
index 014465c7..4b8ab728 100644
--- a/dataprofiler/profilers/int_column_profile.py
+++ b/dataprofiler/profilers/int_column_profile.py
@@ -163,7 +163,7 @@ def update(self, df_series: pd.Series) -> IntColumn:
         df_series = df_series.reset_index(drop=True)
         is_each_row_int = self._is_each_row_int(df_series)
         sample_size = len(is_each_row_int)
-        match_int_count = np.sum(is_each_row_int)
+        match_int_count: int = int(np.sum(is_each_row_int))
         profile = dict(match_count=match_int_count, sample_size=sample_size)
 
         BaseColumnProfiler._perform_property_calcs(

From 0b00aed1cd3f4d30d9e4aa4910b27a4a558cf622 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Fri, 22 May 2026 12:17:07 -0500
Subject: [PATCH 25/28] fix: train labeling

---
 dataprofiler/labelers/char_load_tf_model.py        | 7 +++++--
 dataprofiler/labelers/character_level_cnn_model.py | 8 ++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/dataprofiler/labelers/char_load_tf_model.py b/dataprofiler/labelers/char_load_tf_model.py
index c8c40d70..eb7c1b53 100644
--- a/dataprofiler/labelers/char_load_tf_model.py
+++ b/dataprofiler/labelers/char_load_tf_model.py
@@ -399,11 +399,14 @@ def fit(
 
         start_time = time.time()
         batch_id = 0
-        target_output = self._SOFTMAX_OUTPUT
         for x_train, y_train in train_data:
+            y_train_dict = {
+                self._SOFTMAX_OUTPUT: y_train,
+                self._ARGMAX_OUTPUT: None,
+            }
             model_results = self._model.train_on_batch(
                 x_train,
-                {target_output: y_train},
+                y_train_dict,
                 return_dict=True,
             )
             acc_value = next(
diff --git a/dataprofiler/labelers/character_level_cnn_model.py b/dataprofiler/labelers/character_level_cnn_model.py
index 9ae1eefe..20c4777c 100644
--- a/dataprofiler/labelers/character_level_cnn_model.py
+++ b/dataprofiler/labelers/character_level_cnn_model.py
@@ -750,11 +750,15 @@ def fit(
 
         start_time = time.time()
         batch_id = 0
-        target_output = self._SOFTMAX_OUTPUT
         for x_train, y_train in train_data:
+            y_train_dict = {
+                self._SOFTMAX_OUTPUT: y_train,
+                self._ARGMAX_OUTPUT: None,
+                self._THRESH_OUTPUT: None,
+            }
             model_results = self._model.train_on_batch(
                 x_train,
-                {target_output: y_train},
+                y_train_dict,
                 return_dict=True,
             )
             acc_value = next(

From 8edd1dc72746fac275e948bbd0733f9e2b5eaf63 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Fri, 22 May 2026 12:48:25 -0500
Subject: [PATCH 26/28] refactor notes, reqs, and change log

---
 .pre-commit-config.yaml                   | 36 +++++++++++++----------
 CHANGELOG.md                              | 10 +++++++
 MANIFEST.in                               |  1 +
 dataprofiler/profilers/histogram_utils.py |  5 ++++
 requirements-ml.txt                       |  2 +-
 requirements.txt                          |  2 +-
 6 files changed, 38 insertions(+), 18 deletions(-)
 create mode 100644 CHANGELOG.md

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7fd5ca82..cd3dc8be 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@ repos:
   # Black: format Python code
   # https://github.com/psf/black/blob/master/.pre-commit-hooks.yaml
   - repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 24.3.0
     hooks:
       - id: black
         exclude: (versioneer.py|dataprofiler/_version.py|_docs/)
@@ -50,29 +50,31 @@ repos:
             # requirements.txt
             h5py>=2.10.0,
             wheel>=0.33.1,
-            numpy>=1.0.0,
+            'numpy>=1.22.0,<3.0.0',
             'pandas>=1.1.2,<3.0.0',
             python-dateutil>=2.7.5,
             pytz>=2020.1,
             pyarrow>=1.0.1,
             'chardet>=3.0.4,<7.0.0',
-            fastavro>=1.0.0.post1,
+            fastavro>=1.1.0,
             python-snappy>=0.7.1,
-            'charset-normalizer>=1.3.6,<7.0.0',
+            charset-normalizer>=1.3.6,
             psutil>=4.0.0,
-            scipy>=1.4.1,
-            requests>=2.28.1,
+            scipy>=1.10.0,
+            requests>=2.32.4,
             networkx>=2.5.1,
             typing-extensions>=3.10.0.2,
             HLL>=2.0.3,
             datasketches>=4.1.0,
-            boto3>=1.28.61,
+            packaging>=23.0,
+            boto3>=1.37.15,
+            urllib3>=2.5.0,
 
             # requirements-dev.txt
-            check-manifest>=0.48,
-            black==22.3.0,
+            check-manifest>=0.50,
+            black>=24.3.0,
             isort==5.12.0,
-            pre-commit==2.19.0,
+            pre-commit==4.3.0,
             tox==3.25.1,
             types-setuptools==67.7.0.1,
             types-python-dateutil==2.8.19.12,
@@ -82,7 +84,7 @@ repos:
 
             # requirements-ml.txt
             scikit-learn>=0.23.2,
-            'keras>3.4.0',
+            'keras>3.4.0,<4.0.0',
             rapidfuzz>=2.6.1,
             "tensorflow>=2.16.0",
             tqdm>=4.0.0,
@@ -104,13 +106,15 @@ repos:
   # Check-manifest: ensures required non-Python files are included in MANIFEST.in
   # https://github.com/mgedmin/check-manifest/blob/master/.pre-commit-hooks.yaml
   - repo: https://github.com/mgedmin/check-manifest
-    rev: "0.48"
+    rev: "0.50"
     hooks:
       - id: check-manifest
-        additional_dependencies: ['h5py', 'wheel', 'future', 'numpy>=1.0.0', 'pandas',
-        'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro',
-        'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests',
-        'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3']
+        additional_dependencies: ['h5py', 'wheel', 'future', 'numpy>=1.22.0,<3.0.0',
+        'pandas', 'python-dateutil', 'pytz', 'pyarrow', 'chardet',
+        'fastavro>=1.1.0', 'python-snappy', 'charset-normalizer', 'psutil',
+        'scipy>=1.10.0', 'requests>=2.32.4', 'networkx', 'typing-extensions',
+        'HLL', 'datasketches', 'packaging>=23.0', 'boto3>=1.37.15',
+        'urllib3>=2.5.0']
   # Pyupgrade - standardize and modernize Python syntax for newer versions of the language
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.3.0
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..c7abf30e
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,10 @@
+# Changelog
+
+## Unreleased
+
+- Added compatibility support for NumPy 2.0 while constraining `numpy` to
+  `>=1.22.0,<3.0.0` to avoid future breakage from NumPy 3.
+- Added compatibility support for Keras versions newer than 3.4.0 while
+  constraining `keras` to `>3.4.0,<4.0.0` to avoid future breakage from Keras 4.
+- Updated the pre-commit configuration to align hook versions and hook
+  dependencies with the current project requirements.
diff --git a/MANIFEST.in b/MANIFEST.in
index bafea077..f5ba8819 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,6 +2,7 @@ global-exclude .DS_Store
 global-exclude */__pycache__/*
 
 include *.txt
+include CHANGELOG.md
 include CODEOWNERS
 recursive-include dataprofiler *.avro
 recursive-include dataprofiler *.csv
diff --git a/dataprofiler/profilers/histogram_utils.py b/dataprofiler/profilers/histogram_utils.py
index a5d2defa..b7aed9ad 100644
--- a/dataprofiler/profilers/histogram_utils.py
+++ b/dataprofiler/profilers/histogram_utils.py
@@ -7,6 +7,7 @@
 A copy of the license for numpy is available here:
 https://github.com/numpy/numpy/blob/main/LICENSE.txt
 """
+
 import operator
 from typing import List, Optional, Tuple, Union
 
@@ -16,6 +17,10 @@
 
 try:
     # numpy v2+
+    # NOTE: `numpy.lib._histograms_impl` is a private module, so this import may
+    # need to be revisited if NumPy exposes a public replacement for these
+    # helpers. NumPy's 2.4.0 release notes discuss public APIs replacing modules
+    # that moved private in 2.x: https://numpy.org/doc/stable/release/2.4.0-notes.html
     from numpy.lib._histograms_impl import (  # type: ignore[attr-defined]
         _get_outer_edges,
         _hist_bin_selectors,
diff --git a/requirements-ml.txt b/requirements-ml.txt
index 0c02d6bc..c8b373f2 100644
--- a/requirements-ml.txt
+++ b/requirements-ml.txt
@@ -1,5 +1,5 @@
 scikit-learn>=0.23.2
-keras>3.4.0
+keras>3.4.0,<4.0.0
 rapidfuzz>=2.6.1
 tensorflow>=2.16.0
 tqdm>=4.0.0
diff --git a/requirements.txt b/requirements.txt
index 355018fe..3db3daad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 h5py>=2.10.0
 wheel>=0.33.1
-numpy>=1.0.0
+numpy>=1.22.0,<3.0.0
 pandas>=1.1.2,<3.0.0
 python-dateutil>=2.7.5
 pytz>=2020.1

From 03b4fa1366109636cedbafe7973f21e90bfd643b Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Fri, 22 May 2026 12:56:00 -0500
Subject: [PATCH 27/28] fix: pre-commit

---
 dataprofiler/__init__.py                      |  1 +
 dataprofiler/_typing.py                       |  1 +
 dataprofiler/data_readers/avro_data.py        |  1 +
 dataprofiler/data_readers/base_data.py        |  1 +
 dataprofiler/data_readers/csv_data.py         |  1 +
 dataprofiler/data_readers/data_utils.py       |  1 +
 .../data_readers/filepath_or_buffer.py        |  1 +
 dataprofiler/data_readers/graph_data.py       |  1 +
 dataprofiler/data_readers/json_data.py        |  7 +--
 dataprofiler/data_readers/parquet_data.py     |  1 +
 .../data_readers/structured_mixins.py         |  1 +
 dataprofiler/labelers/__init__.py             |  1 +
 dataprofiler/labelers/base_model.py           |  1 +
 dataprofiler/labelers/char_load_tf_model.py   |  7 +--
 .../labelers/character_level_cnn_model.py     |  7 +--
 .../labelers/classification_report_utils.py   |  1 +
 dataprofiler/labelers/column_name_model.py    |  1 +
 dataprofiler/labelers/data_processing.py      | 16 ++++---
 dataprofiler/labelers/labeler_utils.py        |  1 +
 dataprofiler/labelers/regex_model.py          |  1 +
 dataprofiler/profilers/__init__.py            |  1 +
 .../profilers/categorical_column_profile.py   | 45 ++++++++++---------
 .../profilers/column_profile_compilers.py     |  1 +
 .../profilers/data_labeler_column_profile.py  |  1 +
 .../profilers/datetime_column_profile.py      |  5 ++-
 .../profilers/float_column_profile.py         |  1 +
 dataprofiler/profilers/graph_profiler.py      |  1 +
 dataprofiler/profilers/helpers/__init__.py    |  1 +
 .../profilers/helpers/report_helpers.py       |  1 +
 dataprofiler/profilers/int_column_profile.py  |  1 +
 dataprofiler/profilers/json_decoder.py        | 13 +++---
 .../profilers/numerical_column_stats.py       | 11 ++---
 .../profilers/order_column_profile.py         |  1 +
 dataprofiler/profilers/profile_builder.py     | 38 +++++++++-------
 dataprofiler/profilers/profiler_utils.py      |  7 ++-
 dataprofiler/profilers/text_column_profile.py |  1 +
 .../profilers/unstructured_labeler_profile.py |  1 +
 .../profilers/unstructured_text_profile.py    |  1 +
 dataprofiler/reports/graphs.py                |  1 +
 dataprofiler/reports/utils.py                 |  1 +
 dataprofiler/rng_utils.py                     |  1 +
 dataprofiler/settings.py                      |  1 +
 dataprofiler/tests/plugins/test_plugins.py    | 10 ++---
 .../profilers/test_float_column_profile.py    | 18 +++++++-
 .../tests/profilers/test_profiler_utils.py    |  1 -
 .../structured_space_time_analysis.py         |  1 +
 dataprofiler/tests/test_rng_utils.py          |  1 +
 47 files changed, 138 insertions(+), 81 deletions(-)

diff --git a/dataprofiler/__init__.py b/dataprofiler/__init__.py
index ed54be24..6b29c5b7 100644
--- a/dataprofiler/__init__.py
+++ b/dataprofiler/__init__.py
@@ -1,4 +1,5 @@
 """Package for dataprofiler."""
+
 from . import settings
 from ._version import get_versions
 from .data_readers.data import Data
diff --git a/dataprofiler/_typing.py b/dataprofiler/_typing.py
index fa362d1b..b7a62388 100644
--- a/dataprofiler/_typing.py
+++ b/dataprofiler/_typing.py
@@ -1,4 +1,5 @@
 """Contains typing aliases."""
+
 from typing import Dict, List, NewType, Union
 
 import numpy as np
diff --git a/dataprofiler/data_readers/avro_data.py b/dataprofiler/data_readers/avro_data.py
index 720b9d1f..7f15bdec 100644
--- a/dataprofiler/data_readers/avro_data.py
+++ b/dataprofiler/data_readers/avro_data.py
@@ -1,4 +1,5 @@
 """Contains class for saving and loading spreadsheet data."""
+
 from io import BytesIO, StringIO
 from typing import Any, Dict, List, Optional, Union
 
diff --git a/dataprofiler/data_readers/base_data.py b/dataprofiler/data_readers/base_data.py
index 27d8d5de..e6e85d3d 100644
--- a/dataprofiler/data_readers/base_data.py
+++ b/dataprofiler/data_readers/base_data.py
@@ -1,4 +1,5 @@
 """Contains abstract class for data loading and saving."""
+
 import locale
 import sys
 from collections import OrderedDict
diff --git a/dataprofiler/data_readers/csv_data.py b/dataprofiler/data_readers/csv_data.py
index 7e13d407..cb1a2e2d 100644
--- a/dataprofiler/data_readers/csv_data.py
+++ b/dataprofiler/data_readers/csv_data.py
@@ -1,4 +1,5 @@
 """Contains class that saves and loads spreadsheet data."""
+
 import csv
 import random
 import re
diff --git a/dataprofiler/data_readers/data_utils.py b/dataprofiler/data_readers/data_utils.py
index 6e213810..833d650e 100644
--- a/dataprofiler/data_readers/data_utils.py
+++ b/dataprofiler/data_readers/data_utils.py
@@ -1,4 +1,5 @@
 """Contains functions for data readers."""
+
 import json
 import logging
 import os
diff --git a/dataprofiler/data_readers/filepath_or_buffer.py b/dataprofiler/data_readers/filepath_or_buffer.py
index 56c21e28..201f690e 100644
--- a/dataprofiler/data_readers/filepath_or_buffer.py
+++ b/dataprofiler/data_readers/filepath_or_buffer.py
@@ -1,4 +1,5 @@
 """Contains functions and classes for handling filepaths and buffers."""
+
 from io import BytesIO, StringIO, TextIOWrapper
 from typing import IO, Any, Optional, Type, Union, cast
 
diff --git a/dataprofiler/data_readers/graph_data.py b/dataprofiler/data_readers/graph_data.py
index 337408a6..3cc83b04 100644
--- a/dataprofiler/data_readers/graph_data.py
+++ b/dataprofiler/data_readers/graph_data.py
@@ -1,4 +1,5 @@
 """Contains class for identifying, reading, and loading graph data."""
+
 import csv
 from typing import Dict, List, Optional, Union, cast
 
diff --git a/dataprofiler/data_readers/json_data.py b/dataprofiler/data_readers/json_data.py
index 93e5d7e6..cc71c57e 100644
--- a/dataprofiler/data_readers/json_data.py
+++ b/dataprofiler/data_readers/json_data.py
@@ -1,4 +1,5 @@
 """Contains class to save and load json data."""
+
 import json
 import re
 import warnings
@@ -71,9 +72,9 @@ def __init__(
 
         self._data_formats["records"] = self._get_data_as_records
         self._data_formats["json"] = self._get_data_as_json
-        self._data_formats[
-            "flattened_dataframe"
-        ] = self._get_data_as_flattened_dataframe
+        self._data_formats["flattened_dataframe"] = (
+            self._get_data_as_flattened_dataframe
+        )
         self._selected_data_format: str = options.get(
             "data_format", "flattened_dataframe"
         )
diff --git a/dataprofiler/data_readers/parquet_data.py b/dataprofiler/data_readers/parquet_data.py
index 4fa567b8..b679431b 100644
--- a/dataprofiler/data_readers/parquet_data.py
+++ b/dataprofiler/data_readers/parquet_data.py
@@ -1,4 +1,5 @@
 """Contains class to save and load parquet data."""
+
 from io import BytesIO, StringIO
 from typing import Any, Dict, List, Optional, Union
 
diff --git a/dataprofiler/data_readers/structured_mixins.py b/dataprofiler/data_readers/structured_mixins.py
index 3587291f..6b1da157 100644
--- a/dataprofiler/data_readers/structured_mixins.py
+++ b/dataprofiler/data_readers/structured_mixins.py
@@ -1,4 +1,5 @@
 """Contains mixin data class for loading datasets of tye SpreadSheet."""
+
 from logging import Logger
 from typing import Any, Dict, List, Optional, Union, cast
 
diff --git a/dataprofiler/labelers/__init__.py b/dataprofiler/labelers/__init__.py
index 1b2302fc..a355ead2 100644
--- a/dataprofiler/labelers/__init__.py
+++ b/dataprofiler/labelers/__init__.py
@@ -26,6 +26,7 @@
         2. structured_model
         3. regex_model
 """
+
 # import data labelers
 # import models
 from .base_data_labeler import BaseDataLabeler, TrainableDataLabeler
diff --git a/dataprofiler/labelers/base_model.py b/dataprofiler/labelers/base_model.py
index 08b453ec..c5d7aef5 100644
--- a/dataprofiler/labelers/base_model.py
+++ b/dataprofiler/labelers/base_model.py
@@ -1,4 +1,5 @@
 """Contains abstract classes for labeling data."""
+
 from __future__ import annotations
 
 import abc
diff --git a/dataprofiler/labelers/char_load_tf_model.py b/dataprofiler/labelers/char_load_tf_model.py
index eb7c1b53..8adbfa0b 100644
--- a/dataprofiler/labelers/char_load_tf_model.py
+++ b/dataprofiler/labelers/char_load_tf_model.py
@@ -1,4 +1,5 @@
 """Contains class for training data labeler model."""
+
 from __future__ import annotations
 
 import copy
@@ -573,9 +574,9 @@ def predict(
                 confidences[
                     allocation_index : allocation_index + num_samples_in_batch
                 ] = model_output[self._SOFTMAX_OUTPUT].numpy()
-            predictions[
-                allocation_index : allocation_index + num_samples_in_batch
-            ] = model_output[self._ARGMAX_OUTPUT].numpy()
+            predictions[allocation_index : allocation_index + num_samples_in_batch] = (
+                model_output[self._ARGMAX_OUTPUT].numpy()
+            )
 
             allocation_index += num_samples_in_batch
 
diff --git a/dataprofiler/labelers/character_level_cnn_model.py b/dataprofiler/labelers/character_level_cnn_model.py
index 20c4777c..7d78900a 100644
--- a/dataprofiler/labelers/character_level_cnn_model.py
+++ b/dataprofiler/labelers/character_level_cnn_model.py
@@ -1,4 +1,5 @@
 """Contains classes for char data labeling."""
+
 from __future__ import annotations
 
 import copy
@@ -947,9 +948,9 @@ def predict(
                 confidences[
                     allocation_index : allocation_index + num_samples_in_batch
                 ] = model_output[self._SOFTMAX_OUTPUT].numpy()
-            predictions[
-                allocation_index : allocation_index + num_samples_in_batch
-            ] = model_output[self._ARGMAX_OUTPUT].numpy()
+            predictions[allocation_index : allocation_index + num_samples_in_batch] = (
+                model_output[self._ARGMAX_OUTPUT].numpy()
+            )
             sentence_lengths[
                 allocation_index : allocation_index + num_samples_in_batch
             ] = list(map(lambda x: len(x[0]), batch_data))
diff --git a/dataprofiler/labelers/classification_report_utils.py b/dataprofiler/labelers/classification_report_utils.py
index 3146e829..840c236f 100644
--- a/dataprofiler/labelers/classification_report_utils.py
+++ b/dataprofiler/labelers/classification_report_utils.py
@@ -1,4 +1,5 @@
 """Contains functions for classification."""
+
 from __future__ import annotations
 
 import warnings
diff --git a/dataprofiler/labelers/column_name_model.py b/dataprofiler/labelers/column_name_model.py
index d698cfd6..1732983c 100644
--- a/dataprofiler/labelers/column_name_model.py
+++ b/dataprofiler/labelers/column_name_model.py
@@ -1,4 +1,5 @@
 """Contains class for column name data labeling model."""
+
 from __future__ import annotations
 
 import json
diff --git a/dataprofiler/labelers/data_processing.py b/dataprofiler/labelers/data_processing.py
index 33d916cc..70c980c3 100644
--- a/dataprofiler/labelers/data_processing.py
+++ b/dataprofiler/labelers/data_processing.py
@@ -176,9 +176,11 @@ def process(
         labels: np.ndarray | None = None,
         label_mapping: dict[str, int] | None = None,
         batch_size: int = 32,
-    ) -> Generator[tuple[np.ndarray, np.ndarray] | np.ndarray, None, None] | tuple[
-        np.ndarray, np.ndarray
-    ] | np.ndarray:
+    ) -> (
+        Generator[tuple[np.ndarray, np.ndarray] | np.ndarray, None, None]
+        | tuple[np.ndarray, np.ndarray]
+        | np.ndarray
+    ):
         """Preprocess data."""
         raise NotImplementedError()
 
@@ -1950,9 +1952,11 @@ def _validate_parameters(self, parameters: dict) -> None:
                 # being changed and is already set
                 aggregation_func = parameters.get(
                     "aggregation_func",
-                    self._parameters.get("aggregation_func")
-                    if hasattr(self, "_parameters")
-                    else None,
+                    (
+                        self._parameters.get("aggregation_func")
+                        if hasattr(self, "_parameters")
+                        else None
+                    ),
                 )
                 if value is None and aggregation_func == "priority":
                     errors.append(
diff --git a/dataprofiler/labelers/labeler_utils.py b/dataprofiler/labelers/labeler_utils.py
index 2a74b87e..3a1097ce 100644
--- a/dataprofiler/labelers/labeler_utils.py
+++ b/dataprofiler/labelers/labeler_utils.py
@@ -1,4 +1,5 @@
 """Contains functions for the data labeler."""
+
 from __future__ import annotations
 
 import logging
diff --git a/dataprofiler/labelers/regex_model.py b/dataprofiler/labelers/regex_model.py
index c6a690c1..dd74da71 100644
--- a/dataprofiler/labelers/regex_model.py
+++ b/dataprofiler/labelers/regex_model.py
@@ -1,4 +1,5 @@
 """Contains class for regex data labeling model."""
+
 from __future__ import annotations
 
 import copy
diff --git a/dataprofiler/profilers/__init__.py b/dataprofiler/profilers/__init__.py
index 4b068fcb..14834794 100644
--- a/dataprofiler/profilers/__init__.py
+++ b/dataprofiler/profilers/__init__.py
@@ -1,4 +1,5 @@
 """Package for providing statistics and predictions for a given dataset."""
+
 from . import json_decoder
 from .base_column_profilers import BaseColumnProfiler
 from .categorical_column_profile import CategoricalColumn
diff --git a/dataprofiler/profilers/categorical_column_profile.py b/dataprofiler/profilers/categorical_column_profile.py
index 1ca63090..d64f5aa5 100644
--- a/dataprofiler/profilers/categorical_column_profile.py
+++ b/dataprofiler/profilers/categorical_column_profile.py
@@ -1,4 +1,5 @@
 """Contains class for categorical column profiler."""
+
 from __future__ import annotations
 
 import math
@@ -277,28 +278,28 @@ def diff(self, other_profile: CategoricalColumn, options: dict = None) -> dict:
 
         # These stats are only diffed if both profiles are categorical
         if self.is_match and other_profile.is_match:
-            differences["statistics"][
-                "chi2-test"
-            ] = profiler_utils.perform_chi_squared_test_for_homogeneity(
-                self._categories,
-                self.sample_size,
-                other_profile._categories,
-                other_profile.sample_size,
+            differences["statistics"]["chi2-test"] = (
+                profiler_utils.perform_chi_squared_test_for_homogeneity(
+                    self._categories,
+                    self.sample_size,
+                    other_profile._categories,
+                    other_profile.sample_size,
+                )
             )
-            differences["statistics"][
-                "categories"
-            ] = profiler_utils.find_diff_of_lists_and_sets(
-                self.categories, other_profile.categories
+            differences["statistics"]["categories"] = (
+                profiler_utils.find_diff_of_lists_and_sets(
+                    self.categories, other_profile.categories
+                )
             )
-            differences["statistics"][
-                "gini_impurity"
-            ] = profiler_utils.find_diff_of_numbers(
-                self.gini_impurity, other_profile.gini_impurity
+            differences["statistics"]["gini_impurity"] = (
+                profiler_utils.find_diff_of_numbers(
+                    self.gini_impurity, other_profile.gini_impurity
+                )
             )
-            differences["statistics"][
-                "unalikeability"
-            ] = profiler_utils.find_diff_of_numbers(
-                self.unalikeability, other_profile.unalikeability
+            differences["statistics"]["unalikeability"] = (
+                profiler_utils.find_diff_of_numbers(
+                    self.unalikeability, other_profile.unalikeability
+                )
             )
             cat_count1 = dict(
                 sorted(self._categories.items(), key=itemgetter(1), reverse=True)
@@ -326,9 +327,9 @@ def diff(self, other_profile: CategoricalColumn, options: dict = None) -> dict:
                     )
                 differences["statistics"]["psi"] = total_psi
 
-            differences["statistics"][
-                "categorical_count"
-            ] = profiler_utils.find_diff_of_dicts(self_cat_count, other_cat_count)
+            differences["statistics"]["categorical_count"] = (
+                profiler_utils.find_diff_of_dicts(self_cat_count, other_cat_count)
+            )
 
         return differences
 
diff --git a/dataprofiler/profilers/column_profile_compilers.py b/dataprofiler/profilers/column_profile_compilers.py
index 07edf13d..cfeb8c69 100644
--- a/dataprofiler/profilers/column_profile_compilers.py
+++ b/dataprofiler/profilers/column_profile_compilers.py
@@ -1,4 +1,5 @@
 """For generating a report."""
+
 from __future__ import annotations
 
 import abc
diff --git a/dataprofiler/profilers/data_labeler_column_profile.py b/dataprofiler/profilers/data_labeler_column_profile.py
index 81f9c0ce..3ce6257f 100644
--- a/dataprofiler/profilers/data_labeler_column_profile.py
+++ b/dataprofiler/profilers/data_labeler_column_profile.py
@@ -1,4 +1,5 @@
 """Contains class for for profiling data labeler col."""
+
 from __future__ import annotations
 
 import operator
diff --git a/dataprofiler/profilers/datetime_column_profile.py b/dataprofiler/profilers/datetime_column_profile.py
index af99283a..1042ea0c 100644
--- a/dataprofiler/profilers/datetime_column_profile.py
+++ b/dataprofiler/profilers/datetime_column_profile.py
@@ -1,4 +1,5 @@
 """Contains class for profiling datetime column."""
+
 from __future__ import annotations
 
 import datetime
@@ -216,7 +217,7 @@ def _validate_datetime(date: str, date_format: str) -> datetime.datetime | float
         :return: either the str converted into a date format, or Nan
         """
         try:
-            converted_date: (datetime.datetime | float) = datetime.datetime.strptime(
+            converted_date: datetime.datetime | float = datetime.datetime.strptime(
                 date, date_format
             )
         except (ValueError, TypeError):
@@ -237,7 +238,7 @@ def _replace_day_suffix(date: str, pattern: re.Pattern) -> str | float:
         """
         try:
             new_date: str | float = pattern.sub(r"\1", date)
-        except (TypeError):
+        except TypeError:
             new_date = np.nan
         return new_date
 
diff --git a/dataprofiler/profilers/float_column_profile.py b/dataprofiler/profilers/float_column_profile.py
index bc426a44..3d6ede32 100644
--- a/dataprofiler/profilers/float_column_profile.py
+++ b/dataprofiler/profilers/float_column_profile.py
@@ -1,4 +1,5 @@
 """Float profile analysis for individual col within structured profiling."""
+
 from __future__ import annotations
 
 import copy
diff --git a/dataprofiler/profilers/graph_profiler.py b/dataprofiler/profilers/graph_profiler.py
index 0680a29a..345a0f2e 100644
--- a/dataprofiler/profilers/graph_profiler.py
+++ b/dataprofiler/profilers/graph_profiler.py
@@ -1,4 +1,5 @@
 """Class and functions to calculate and profile properties of graph data."""
+
 from __future__ import annotations
 
 import importlib
diff --git a/dataprofiler/profilers/helpers/__init__.py b/dataprofiler/profilers/helpers/__init__.py
index 43393433..2c72b2f3 100644
--- a/dataprofiler/profilers/helpers/__init__.py
+++ b/dataprofiler/profilers/helpers/__init__.py
@@ -1,4 +1,5 @@
 """This package provides helper functions for generating reports."""
+
 from .report_helpers import _prepare_report, calculate_quantiles
 
 __all__ = [
diff --git a/dataprofiler/profilers/helpers/report_helpers.py b/dataprofiler/profilers/helpers/report_helpers.py
index 0588252c..44ac8fb1 100644
--- a/dataprofiler/profilers/helpers/report_helpers.py
+++ b/dataprofiler/profilers/helpers/report_helpers.py
@@ -1,4 +1,5 @@
 """Contains helper functions for generating report."""
+
 from __future__ import annotations
 
 import math
diff --git a/dataprofiler/profilers/int_column_profile.py b/dataprofiler/profilers/int_column_profile.py
index 4b8ab728..ae4ed575 100644
--- a/dataprofiler/profilers/int_column_profile.py
+++ b/dataprofiler/profilers/int_column_profile.py
@@ -1,4 +1,5 @@
 """Int profile analysis for individual col within structured profiling."""
+
 from __future__ import annotations
 
 import numpy as np
diff --git a/dataprofiler/profilers/json_decoder.py b/dataprofiler/profilers/json_decoder.py
index fb4ff8cb..eb09db0d 100644
--- a/dataprofiler/profilers/json_decoder.py
+++ b/dataprofiler/profilers/json_decoder.py
@@ -1,4 +1,5 @@
 """Contains methods to decode components of a Profiler."""
+
 from __future__ import annotations
 
 import warnings
@@ -116,9 +117,9 @@ def get_structured_col_profiler_class(class_name: str) -> type[StructuredColProf
     :type class_name: str representing name of class
     :return: subclass of StructuredColProfiler object
     """
-    struct_col_profiler_class: None | (
-        type[StructuredColProfiler]
-    ) = _structured_col_profiler.get(class_name)
+    struct_col_profiler_class: None | (type[StructuredColProfiler]) = (
+        _structured_col_profiler.get(class_name)
+    )
     if struct_col_profiler_class is None:
         raise ValueError(
             f"Invalid structured col profiler class {class_name} " f"failed to load."
@@ -153,9 +154,9 @@ def load_column_profile(
         JSON
 
     """
-    column_profiler_cls: type[
-        BaseColumnProfiler[BaseColumnProfiler]
-    ] = get_column_profiler_class(serialized_json["class"])
+    column_profiler_cls: type[BaseColumnProfiler[BaseColumnProfiler]] = (
+        get_column_profiler_class(serialized_json["class"])
+    )
     return column_profiler_cls.load_from_dict(serialized_json["data"], config)
 
 
diff --git a/dataprofiler/profilers/numerical_column_stats.py b/dataprofiler/profilers/numerical_column_stats.py
index 5135bcfd..9ec2190a 100644
--- a/dataprofiler/profilers/numerical_column_stats.py
+++ b/dataprofiler/profilers/numerical_column_stats.py
@@ -258,9 +258,9 @@ def _add_helper_merge_profile_histograms(
 
         if self.user_set_histogram_bin is None:
             for method in self.histogram_bin_method_names:
-                self.histogram_methods[method][
-                    "suggested_bin_count"
-                ] = histogram_utils._calculate_bins_from_profile(self, method)
+                self.histogram_methods[method]["suggested_bin_count"] = (
+                    histogram_utils._calculate_bins_from_profile(self, method)
+                )
 
         self._get_quantiles()
 
@@ -1044,10 +1044,7 @@ def _merge_biased_kurtosis(
             / N**3
         )
         third_term = (
-            6
-            * delta**2
-            * (match_count1**2 * M2_2 + match_count2**2 * M2_1)
-            / N**2
+            6 * delta**2 * (match_count1**2 * M2_2 + match_count2**2 * M2_1) / N**2
         )
         fourth_term = 4 * delta * (match_count1 * M3_2 - match_count2 * M3_1) / N
         M4 = first_term + second_term + third_term + fourth_term
diff --git a/dataprofiler/profilers/order_column_profile.py b/dataprofiler/profilers/order_column_profile.py
index 30826232..0a437431 100644
--- a/dataprofiler/profilers/order_column_profile.py
+++ b/dataprofiler/profilers/order_column_profile.py
@@ -1,4 +1,5 @@
 """Index profile analysis for individual col within structured profiling."""
+
 from __future__ import annotations
 
 from abc import abstractmethod
diff --git a/dataprofiler/profilers/profile_builder.py b/dataprofiler/profilers/profile_builder.py
index 6e512658..7d904b6a 100644
--- a/dataprofiler/profilers/profile_builder.py
+++ b/dataprofiler/profilers/profile_builder.py
@@ -1919,10 +1919,10 @@ def diff(  # type: ignore[override]
             col_name = other_profile._profile[i].name
             other_profile_schema[col_name].append(i)
 
-        report["global_stats"][
-            "profile_schema"
-        ] = profiler_utils.find_diff_of_dicts_with_diff_keys(
-            self_profile_schema, other_profile_schema
+        report["global_stats"]["profile_schema"] = (
+            profiler_utils.find_diff_of_dicts_with_diff_keys(
+                self_profile_schema, other_profile_schema
+            )
         )
 
         # Only find the diff of columns if the schemas are exactly the same
@@ -2101,9 +2101,9 @@ def report(self, report_options: dict = None) -> dict:
                 self.options.null_replication_metrics.is_enabled
                 and i in self._null_replication_metrics
             ):
-                report["data_stats"][i][
-                    "null_replication_metrics"
-                ] = self._null_replication_metrics[i]
+                report["data_stats"][i]["null_replication_metrics"] = (
+                    self._null_replication_metrics[i]
+                )
 
         return _prepare_report(report, output_format, omit_keys)
 
@@ -2610,9 +2610,11 @@ def _update_null_replication_metrics(self, clean_samples: dict) -> None:
 
         total_row_sum = np.asarray(
             [
-                get_data_type_profiler(profile).sum
-                if get_data_type(profile) not in [None, "datetime"]
-                else np.nan
+                (
+                    get_data_type_profiler(profile).sum
+                    if get_data_type(profile) not in [None, "datetime"]
+                    else np.nan
+                )
                 for profile in self._profile
             ]
         )
@@ -2704,17 +2706,21 @@ def _merge_null_replication_metrics(self, other: StructuredProfiler) -> dict:
 
         self_row_sum = np.asarray(
             [
-                get_data_type_profiler(profile).sum
-                if get_data_type(profile)
-                else np.nan
+                (
+                    get_data_type_profiler(profile).sum
+                    if get_data_type(profile)
+                    else np.nan
+                )
                 for profile in self._profile
             ]
         )
         other_row_sum = np.asarray(
             [
-                get_data_type_profiler(profile).sum
-                if get_data_type(profile)
-                else np.nan
+                (
+                    get_data_type_profiler(profile).sum
+                    if get_data_type(profile)
+                    else np.nan
+                )
                 for profile in other._profile
             ]
         )
diff --git a/dataprofiler/profilers/profiler_utils.py b/dataprofiler/profilers/profiler_utils.py
index 2cc9846b..7986cec0 100644
--- a/dataprofiler/profilers/profiler_utils.py
+++ b/dataprofiler/profilers/profiler_utils.py
@@ -1,4 +1,5 @@
 """Contains functions for profilers."""
+
 from __future__ import annotations
 
 import collections
@@ -429,13 +430,11 @@ def __sub__(self: T, other: T) -> Any:
 def find_diff_of_numbers(
     stat1: int | float | np.float64 | np.int64 | None,
     stat2: int | float | np.float64 | np.int64 | None,
-) -> Any:
-    ...
+) -> Any: ...
 
 
 @overload
-def find_diff_of_numbers(stat1: T | None, stat2: T | None) -> Any:
-    ...
+def find_diff_of_numbers(stat1: T | None, stat2: T | None) -> Any: ...
 
 
 def find_diff_of_numbers(stat1, stat2):
diff --git a/dataprofiler/profilers/text_column_profile.py b/dataprofiler/profilers/text_column_profile.py
index bea8dbd6..eb79643f 100644
--- a/dataprofiler/profilers/text_column_profile.py
+++ b/dataprofiler/profilers/text_column_profile.py
@@ -1,4 +1,5 @@
 """Text profile analysis for individual col within structured profiling.."""
+
 from __future__ import annotations
 
 import itertools
diff --git a/dataprofiler/profilers/unstructured_labeler_profile.py b/dataprofiler/profilers/unstructured_labeler_profile.py
index 1c7b16c0..22789c4e 100644
--- a/dataprofiler/profilers/unstructured_labeler_profile.py
+++ b/dataprofiler/profilers/unstructured_labeler_profile.py
@@ -1,4 +1,5 @@
 """Profile analysis for applying labels within unstructured profiling."""
+
 from __future__ import annotations
 
 from collections import defaultdict
diff --git a/dataprofiler/profilers/unstructured_text_profile.py b/dataprofiler/profilers/unstructured_text_profile.py
index 96b7d062..3f1b6dd7 100644
--- a/dataprofiler/profilers/unstructured_text_profile.py
+++ b/dataprofiler/profilers/unstructured_text_profile.py
@@ -1,4 +1,5 @@
 """For profiling unstructured text data."""
+
 from __future__ import annotations
 
 import itertools
diff --git a/dataprofiler/reports/graphs.py b/dataprofiler/reports/graphs.py
index 1f0b4301..4e630a1c 100644
--- a/dataprofiler/reports/graphs.py
+++ b/dataprofiler/reports/graphs.py
@@ -1,4 +1,5 @@
 """Contains functions for generating graph data report."""
+
 # !/usr/bin/env python3
 from __future__ import annotations
 
diff --git a/dataprofiler/reports/utils.py b/dataprofiler/reports/utils.py
index a10b8fe5..975dc7d8 100644
--- a/dataprofiler/reports/utils.py
+++ b/dataprofiler/reports/utils.py
@@ -1,4 +1,5 @@
 """Contains functions for checking for installations/dependencies."""
+
 import sys
 import warnings
 from typing import Any, Callable, List, TypeVar, cast
diff --git a/dataprofiler/rng_utils.py b/dataprofiler/rng_utils.py
index 32906665..2fd14f0f 100644
--- a/dataprofiler/rng_utils.py
+++ b/dataprofiler/rng_utils.py
@@ -1,4 +1,5 @@
 """Create a random number generator using a manual seed DATAPROFILER_SEED."""
+
 import os
 import warnings
 
diff --git a/dataprofiler/settings.py b/dataprofiler/settings.py
index 1ba017f4..a81c3477 100644
--- a/dataprofiler/settings.py
+++ b/dataprofiler/settings.py
@@ -1,2 +1,3 @@
 """Configure settings for dataprofiler."""
+
 _seed = None
diff --git a/dataprofiler/tests/plugins/test_plugins.py b/dataprofiler/tests/plugins/test_plugins.py
index ec148a52..9368975d 100644
--- a/dataprofiler/tests/plugins/test_plugins.py
+++ b/dataprofiler/tests/plugins/test_plugins.py
@@ -28,8 +28,8 @@ def test_plugin():
     @mock.patch("dataprofiler.plugins.__init__.os.path.isdir")
     @mock.patch("dataprofiler.plugins.__init__.os.listdir")
     def test_load_plugin(self, mock_listdir, mock_isdir, mock_importlib_util):
-        mock_listdir.side_effect = (
-            lambda folder_dir: ["__pycache__", "py"]
+        mock_listdir.side_effect = lambda folder_dir: (
+            ["__pycache__", "py"]
             if folder_dir.endswith("plugins")
             else ["stillnotrealpy", "a.json", None]
         )
@@ -38,10 +38,8 @@ def test_load_plugin(self, mock_listdir, mock_isdir, mock_importlib_util):
         load_plugins()
         mock_importlib_util.spec_from_file_location.assert_not_called()
 
-        mock_listdir.side_effect = (
-            lambda folder_dir: ["folder"]
-            if folder_dir.endswith("plugins")
-            else ["file.py"]
+        mock_listdir.side_effect = lambda folder_dir: (
+            ["folder"] if folder_dir.endswith("plugins") else ["file.py"]
         )
         mock_spec = mock.Mock()
         mock_importlib_util.spec_from_file_location.return_value = mock_spec
diff --git a/dataprofiler/tests/profilers/test_float_column_profile.py b/dataprofiler/tests/profilers/test_float_column_profile.py
index d9ec122c..19fe4c8a 100644
--- a/dataprofiler/tests/profilers/test_float_column_profile.py
+++ b/dataprofiler/tests/profilers/test_float_column_profile.py
@@ -835,7 +835,14 @@ def test_total_histogram_bin_variance(self):
 
     def test_histogram_loss(self):
         # run time is small
-        (diff_var, avg_diffvar, total_var, avg_totalvar, run_time, avg_runtime,) = (
+        (
+            diff_var,
+            avg_diffvar,
+            total_var,
+            avg_totalvar,
+            run_time,
+            avg_runtime,
+        ) = (
             0.3,
             0.2,
             0.1,
@@ -855,7 +862,14 @@ def test_histogram_loss(self):
         self.assertEqual(expected_loss, est_loss)
 
         # run time is big
-        (diff_var, avg_diffvar, total_var, avg_totalvar, run_time, avg_runtime,) = (
+        (
+            diff_var,
+            avg_diffvar,
+            total_var,
+            avg_totalvar,
+            run_time,
+            avg_runtime,
+        ) = (
             0.3,
             0.2,
             0.1,
diff --git a/dataprofiler/tests/profilers/test_profiler_utils.py b/dataprofiler/tests/profilers/test_profiler_utils.py
index 4eee1963..0ea0c0fc 100644
--- a/dataprofiler/tests/profilers/test_profiler_utils.py
+++ b/dataprofiler/tests/profilers/test_profiler_utils.py
@@ -472,7 +472,6 @@ def test_odd_merge_profile_list(self, mock_data_labeler, *mocks):
 
 
 class TestAutoMultiProcessToggle(unittest.TestCase):
-
     """
     Validate profile_utils.auto_multiprocess_toggle is properly working.
     """
diff --git a/dataprofiler/tests/space_time_analysis/structured_space_time_analysis.py b/dataprofiler/tests/space_time_analysis/structured_space_time_analysis.py
index df57854f..5af7bc2a 100644
--- a/dataprofiler/tests/space_time_analysis/structured_space_time_analysis.py
+++ b/dataprofiler/tests/space_time_analysis/structured_space_time_analysis.py
@@ -1,4 +1,5 @@
 """Contains space and time analysis tests for the Dataprofiler"""
+
 import json
 import os
 import random
diff --git a/dataprofiler/tests/test_rng_utils.py b/dataprofiler/tests/test_rng_utils.py
index 6ee2ed35..8a4c4d22 100644
--- a/dataprofiler/tests/test_rng_utils.py
+++ b/dataprofiler/tests/test_rng_utils.py
@@ -1,4 +1,5 @@
 """Validates that generator intakes DATAPROFILER_SEED properly."""
+
 import os
 import unittest
 import unittest.mock

From ffbac1aff2c6fbb36f297b993dab5a6d135e7ab4 Mon Sep 17 00:00:00 2001
From: Jeremy Goodsitt <jeremy.goodsitt@ipcopilot.ai>
Date: Fri, 22 May 2026 14:44:22 -0500
Subject: [PATCH 28/28] refactor: add unit tests validating usage of the old
 load format

---
 .../tests/labelers/test_char_tf_load_model.py | 20 ++++++++++++
 .../test_character_level_cnn_model.py         | 32 +++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/dataprofiler/tests/labelers/test_char_tf_load_model.py b/dataprofiler/tests/labelers/test_char_tf_load_model.py
index 40879e57..6160b8fa 100644
--- a/dataprofiler/tests/labelers/test_char_tf_load_model.py
+++ b/dataprofiler/tests/labelers/test_char_tf_load_model.py
@@ -227,6 +227,26 @@ def test_predict(self, *mocks):
         self.assertIn("conf", result)
         self.assertEqual((2, 2, model.num_labels), np.array(result["conf"]).shape)
 
+    def test_normalize_old_list_output_model(self, *mocks):
+        inputs = tf.keras.Input(shape=(2,), dtype=tf.int64)
+        embedded = tf.keras.layers.Embedding(input_dim=100, output_dim=8)(inputs)
+        softmax_output = tf.keras.layers.Dense(
+            self.label_mapping["ADDRESS"] + 1,
+            activation="softmax",
+        )(embedded)
+        argmax_output = tf.keras.layers.Lambda(
+            lambda x: tf.cast(tf.argmax(x, axis=2), tf.int64)
+        )(softmax_output)
+        old_format_model = tf.keras.Model(inputs, [softmax_output, argmax_output])
+
+        normalized_model = CharLoadTFModel._normalize_model_outputs(old_format_model)
+
+        self.assertIsInstance(normalized_model.output, dict)
+        self.assertSetEqual(
+            set(normalized_model.output.keys()),
+            {CharLoadTFModel._SOFTMAX_OUTPUT, CharLoadTFModel._ARGMAX_OUTPUT},
+        )
+
     def test_fit_and_predict(self, *mocks):
         # model
         model = CharLoadTFModel(self.model_path, self.label_mapping)
diff --git a/dataprofiler/tests/labelers/test_character_level_cnn_model.py b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
index 554b61c8..311da006 100644
--- a/dataprofiler/tests/labelers/test_character_level_cnn_model.py
+++ b/dataprofiler/tests/labelers/test_character_level_cnn_model.py
@@ -10,8 +10,10 @@
 
 from dataprofiler.labelers import utils as labeler_utils
 from dataprofiler.labelers.character_level_cnn_model import (
+    ArgMaxLayer,
     CharacterLevelCnnModel,
     EncodingLayer,
+    ThreshArgMaxLayer,
 )
 
 _file_dir = os.path.dirname(os.path.abspath(__file__))
@@ -253,6 +255,36 @@ def test_validation_evaluate_and_classification_report(self, *mocks):
         self.assertIsNotNone(f1_report)
         self.assertEqual(11, f1_report["ADDRESS"]["support"])
 
+    def test_normalize_old_list_output_model(self):
+        default_ind = self.label_mapping["UNKNOWN"]
+        num_labels = max(self.label_mapping.values()) + 1
+        inputs = tf.keras.Input(shape=(2, 4))
+        hidden = tf.keras.layers.Dense(8, activation="relu")(inputs)
+        softmax_output = tf.keras.layers.Dense(num_labels, activation="softmax")(hidden)
+        argmax_output = ArgMaxLayer()(softmax_output)
+        threshold_output = ThreshArgMaxLayer(
+            threshold_=0.0,
+            num_labels_=num_labels,
+            default_ind=default_ind,
+        )(argmax_output, softmax_output)
+        old_format_model = tf.keras.Model(
+            inputs, [softmax_output, argmax_output, threshold_output]
+        )
+
+        normalized_model = CharacterLevelCnnModel._normalize_model_outputs(
+            old_format_model, default_ind, num_labels
+        )
+
+        self.assertIsInstance(normalized_model.output, dict)
+        self.assertSetEqual(
+            set(normalized_model.output.keys()),
+            {
+                CharacterLevelCnnModel._SOFTMAX_OUTPUT,
+                CharacterLevelCnnModel._ARGMAX_OUTPUT,
+                CharacterLevelCnnModel._THRESH_OUTPUT,
+            },
+        )
+
     def test_fit_and_predict_with_new_labels(self):
         # Initialize model
         cnn_model = CharacterLevelCnnModel(self.label_mapping)