From c287260e378c17c32e89d5c1b04af7e04978b1f6 Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Tue, 10 Sep 2024 21:19:58 +0530 Subject: [PATCH 1/8] feat(add): numpy ndarray--template-less init --- python/cppyy/_cpython_cppyy.py | 46 ++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/python/cppyy/_cpython_cppyy.py b/python/cppyy/_cpython_cppyy.py index 8d6f1e0f..104aee0f 100644 --- a/python/cppyy/_cpython_cppyy.py +++ b/python/cppyy/_cpython_cppyy.py @@ -114,6 +114,20 @@ def __call__(self, *args): # most common cases are covered if args: args0 = args[0] + + if type(args0).__module__ == 'numpy' and type(args0).__name__ == 'ndarray': + import numpy as np + # Get the data type of the array + t = args0.dtype.type + if np.issubdtype(t, np.integer): + t = 'int' + elif np.issubdtype(t, np.floating): + t = 'double' + elif np.issubdtype(t, np.complexfloating): + t = 'std::complex' + + # Handle arrays of arbitrary dimension recursively + return _np_vector(args0, t) if args0 and (type(args0) is tuple or type(args0) is list): t = type(args0[0]) if t is float: t = 'double' @@ -209,3 +223,35 @@ def _end_capture_stderr(): pass return "C++ issued an error message that could not be decoded (%s)" % str(original_error) return "" + +def _np_vector(arr, dtype): + import cppyy + + def _build_nested_vector_type(ndim, dtype): + vector_t = cppyy.gbl.std.vector[dtype] + for _ in range(ndim - 1): + vector_t = cppyy.gbl.std.vector[vector_t] + return vector_t + + ndim = arr.ndim + + if ndim == 1: + vector_t = cppyy.gbl.std.vector[dtype] + vector = vector_t() + vector.reserve(arr.size) # Pre-allocate memory for better performance + + for elem in arr: + vector.push_back(elem.item()) # Convert NumPy type to native Python type + return vector + + # Build nested vector types dynamically + nested_vector_type = _build_nested_vector_type(ndim, dtype) + nested_vector = nested_vector_type() + nested_vector.reserve(arr.shape[0]) # Pre-allocate outer vector + + # Recursively process sub-arrays + for subarr in arr: + inner_vector = _np_vector(subarr, dtype) + nested_vector.push_back(inner_vector) + + return nested_vector \ No newline at end of file From eaf4ff33aa56edac8b2d87ad47f9c89a55858187 Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Tue, 10 Sep 2024 22:54:36 +0530 Subject: [PATCH 2/8] chore: add np bulk insert, dict lookup --- python/cppyy/_cpython_cppyy.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/python/cppyy/_cpython_cppyy.py b/python/cppyy/_cpython_cppyy.py index 104aee0f..087ca680 100644 --- a/python/cppyy/_cpython_cppyy.py +++ b/python/cppyy/_cpython_cppyy.py @@ -122,7 +122,7 @@ def __call__(self, *args): if np.issubdtype(t, np.integer): t = 'int' elif np.issubdtype(t, np.floating): - t = 'double' + t = 'doublex' elif np.issubdtype(t, np.complexfloating): t = 'std::complex' @@ -227,10 +227,18 @@ def _end_capture_stderr(): def _np_vector(arr, dtype): import cppyy + vector_type_cache = {} + def _build_nested_vector_type(ndim, dtype): + key = (ndim, dtype) + if key in vector_type_cache: + return vector_type_cache[key] + vector_t = cppyy.gbl.std.vector[dtype] for _ in range(ndim - 1): vector_t = cppyy.gbl.std.vector[vector_t] + + vector_type_cache[key] = vector_t return vector_t ndim = arr.ndim @@ -240,8 +248,11 @@ def _build_nested_vector_type(ndim, dtype): vector = vector_t() vector.reserve(arr.size) # Pre-allocate memory for better performance - for elem in arr: - vector.push_back(elem.item()) # Convert NumPy type to native Python type + try: + vector.insert(vector.end(), arr.flat) + except TypeError: + for elem in arr: + vector.push_back(elem.item()) return vector # Build nested vector types dynamically From 8f5d781400f3a9c72bf6633b1d3afec3f165805f Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Wed, 11 Sep 2024 11:14:02 +0530 Subject: [PATCH 3/8] chore: remove imports--numpy, cppyy tests: add template and dtpye less tests chore: remove cppyy import chore: remove numpy--import recover: deleted valgrind--file --- python/cppyy/_cpython_cppyy.py | 34 ++++++++++++------------- test/test_stltypes.py | 46 ++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 18 deletions(-) diff --git a/python/cppyy/_cpython_cppyy.py b/python/cppyy/_cpython_cppyy.py index 087ca680..72a59aba 100644 --- a/python/cppyy/_cpython_cppyy.py +++ b/python/cppyy/_cpython_cppyy.py @@ -115,16 +115,18 @@ def __call__(self, *args): if args: args0 = args[0] - if type(args0).__module__ == 'numpy' and type(args0).__name__ == 'ndarray': - import numpy as np - # Get the data type of the array - t = args0.dtype.type - if np.issubdtype(t, np.integer): - t = 'int' - elif np.issubdtype(t, np.floating): - t = 'doublex' - elif np.issubdtype(t, np.complexfloating): - t = 'std::complex' + if ( + type(args0).__module__ == "numpy" + and type(args0).__name__ == "ndarray" + and hasattr(args0, "dtype") + ): + t = args0.dtype.type.__name__ + if t.startswith("int"): + t = "int" + elif t.startswith("float"): + t = "double" + elif t.startswith("complex"): + t = "std::complex" # Handle arrays of arbitrary dimension recursively return _np_vector(args0, t) @@ -225,8 +227,6 @@ def _end_capture_stderr(): return "" def _np_vector(arr, dtype): - import cppyy - vector_type_cache = {} def _build_nested_vector_type(ndim, dtype): @@ -234,9 +234,9 @@ def _build_nested_vector_type(ndim, dtype): if key in vector_type_cache: return vector_type_cache[key] - vector_t = cppyy.gbl.std.vector[dtype] + vector_t = gbl.std.vector[dtype] for _ in range(ndim - 1): - vector_t = cppyy.gbl.std.vector[vector_t] + vector_t = gbl.std.vector[vector_t] vector_type_cache[key] = vector_t return vector_t @@ -244,9 +244,9 @@ def _build_nested_vector_type(ndim, dtype): ndim = arr.ndim if ndim == 1: - vector_t = cppyy.gbl.std.vector[dtype] + vector_t = gbl.std.vector[dtype] vector = vector_t() - vector.reserve(arr.size) # Pre-allocate memory for better performance + vector.reserve(arr.size) try: vector.insert(vector.end(), arr.flat) @@ -255,12 +255,10 @@ def _build_nested_vector_type(ndim, dtype): vector.push_back(elem.item()) return vector - # Build nested vector types dynamically nested_vector_type = _build_nested_vector_type(ndim, dtype) nested_vector = nested_vector_type() nested_vector.reserve(arr.shape[0]) # Pre-allocate outer vector - # Recursively process sub-arrays for subarr in arr: inner_vector = _np_vector(subarr, dtype) nested_vector.push_back(inner_vector) diff --git a/test/test_stltypes.py b/test/test_stltypes.py index 771cc941..9f5fc197 100644 --- a/test/test_stltypes.py +++ b/test/test_stltypes.py @@ -789,6 +789,52 @@ def test23_copy_conversion(self): for f, d in zip(x, v): assert f == d + def test24_numpy_template_less(self): + import cppyy + + try: + import numpy as np + except ImportError: + skip("numpy is not installed") + + rng = np.random.default_rng(seed=42) + x = rng.random((10, 3, 3, 3)) # It is default to dtype=np.float32 + v = cppyy.gbl.std.vector(x) + + assert len(v) == 10 + assert type(v[0][0][0][0]) is float + + + def test25_numpy_dtype_and_template_less(self): + import cppyy + + try: + import numpy as np + except ImportError: + skip("numpy is not installed") + + rng = np.random.default_rng(seed=42) + x = rng.integers(low=0, high=100, size=(10, 3, 3, 3)) + v = cppyy.gbl.std.vector(x) + + assert len(v) == 10 + assert type(v[0][0][0][0]) is int + + + def test26_numpy_passing_dtype(self): + import cppyy + + try: + import numpy as np + except ImportError: + skip("numpy is not installed") + + rng = np.random.default_rng(seed=42) + x = rng.integers(low=0, high=100, size=(10, 3, 3, 3), dtype=np.int32) + v = cppyy.gbl.std.vector(x) + + assert len(v) == 10 + assert type(v[0][0][0][0]) is int class TestSTLSTRING: def setup_class(cls): From 8ea5d7c350121cd2b63b1aa674e89a53be2a4f88 Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Tue, 22 Oct 2024 23:03:44 +0530 Subject: [PATCH 4/8] chore: remove type indeference, add optimisation steps --- python/cppyy/_cpython_cppyy.py | 86 +++++++++++++++++----------------- test/test_stltypes.py | 34 +------------- 2 files changed, 44 insertions(+), 76 deletions(-) diff --git a/python/cppyy/_cpython_cppyy.py b/python/cppyy/_cpython_cppyy.py index 72a59aba..0bc146d1 100644 --- a/python/cppyy/_cpython_cppyy.py +++ b/python/cppyy/_cpython_cppyy.py @@ -120,16 +120,8 @@ def __call__(self, *args): and type(args0).__name__ == "ndarray" and hasattr(args0, "dtype") ): - t = args0.dtype.type.__name__ - if t.startswith("int"): - t = "int" - elif t.startswith("float"): - t = "double" - elif t.startswith("complex"): - t = "std::complex" - # Handle arrays of arbitrary dimension recursively - return _np_vector(args0, t) + return _np_vector(args0) if args0 and (type(args0) is tuple or type(args0) is list): t = type(args0[0]) if t is float: t = 'double' @@ -226,41 +218,49 @@ def _end_capture_stderr(): return "C++ issued an error message that could not be decoded (%s)" % str(original_error) return "" -def _np_vector(arr, dtype): - vector_type_cache = {} - - def _build_nested_vector_type(ndim, dtype): - key = (ndim, dtype) - if key in vector_type_cache: - return vector_type_cache[key] - - vector_t = gbl.std.vector[dtype] - for _ in range(ndim - 1): - vector_t = gbl.std.vector[vector_t] - - vector_type_cache[key] = vector_t - return vector_t - - ndim = arr.ndim - - if ndim == 1: - vector_t = gbl.std.vector[dtype] - vector = vector_t() - vector.reserve(arr.size) - - try: - vector.insert(vector.end(), arr.flat) - except TypeError: - for elem in arr: +def _np_vector(arr): + def _build_nested_vector_type(ndim, base_type, cache={}): + key = (ndim, base_type) + if key not in cache: + vector_t = gbl.std.vector[base_type] + for _ in range(ndim - 1): + vector_t = gbl.std.vector[vector_t] + cache[key] = vector_t + return cache[key] + + def convert(arr): + ndim = arr.ndim + if arr.size > 0: + base_type = type(arr.flat[0].item()) + else: + base_type = float + + if ndim == 1: + vector = _build_nested_vector_type(1, base_type)() + vector.reserve(arr.size) + + if arr.flags["C_CONTIGUOUS"]: + try: + vector.insert(vector.end(), arr.flat) + return vector + except TypeError: + pass + + for elem in arr.flat: vector.push_back(elem.item()) - return vector + return vector + + nested_vector = _build_nested_vector_type(ndim, base_type)() + nested_vector.reserve(arr.shape[0]) - nested_vector_type = _build_nested_vector_type(ndim, dtype) - nested_vector = nested_vector_type() - nested_vector.reserve(arr.shape[0]) # Pre-allocate outer vector + if arr.flags["C_CONTIGUOUS"]: + arr_view = arr.reshape(-1, *arr.shape[1:]) + for subarr in arr_view: + nested_vector.push_back(convert(subarr)) + else: + for subarr in arr: + nested_vector.push_back(convert(subarr)) - for subarr in arr: - inner_vector = _np_vector(subarr, dtype) - nested_vector.push_back(inner_vector) + return nested_vector - return nested_vector \ No newline at end of file + return convert(arr) \ No newline at end of file diff --git a/test/test_stltypes.py b/test/test_stltypes.py index 9f5fc197..243b3428 100644 --- a/test/test_stltypes.py +++ b/test/test_stltypes.py @@ -789,23 +789,7 @@ def test23_copy_conversion(self): for f, d in zip(x, v): assert f == d - def test24_numpy_template_less(self): - import cppyy - - try: - import numpy as np - except ImportError: - skip("numpy is not installed") - - rng = np.random.default_rng(seed=42) - x = rng.random((10, 3, 3, 3)) # It is default to dtype=np.float32 - v = cppyy.gbl.std.vector(x) - - assert len(v) == 10 - assert type(v[0][0][0][0]) is float - - - def test25_numpy_dtype_and_template_less(self): + def test25_ndarray_template_less(self): import cppyy try: @@ -820,22 +804,6 @@ def test25_numpy_dtype_and_template_less(self): assert len(v) == 10 assert type(v[0][0][0][0]) is int - - def test26_numpy_passing_dtype(self): - import cppyy - - try: - import numpy as np - except ImportError: - skip("numpy is not installed") - - rng = np.random.default_rng(seed=42) - x = rng.integers(low=0, high=100, size=(10, 3, 3, 3), dtype=np.int32) - v = cppyy.gbl.std.vector(x) - - assert len(v) == 10 - assert type(v[0][0][0][0]) is int - class TestSTLSTRING: def setup_class(cls): cls.test_dct = test_dct From 9750babdefef80b56ad94b0390979e380fd0baaf Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Thu, 24 Oct 2024 23:01:36 +0530 Subject: [PATCH 5/8] chore: remove contigous array logic --- python/cppyy/_cpython_cppyy.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/python/cppyy/_cpython_cppyy.py b/python/cppyy/_cpython_cppyy.py index 0bc146d1..3650564b 100644 --- a/python/cppyy/_cpython_cppyy.py +++ b/python/cppyy/_cpython_cppyy.py @@ -238,29 +238,16 @@ def convert(arr): if ndim == 1: vector = _build_nested_vector_type(1, base_type)() vector.reserve(arr.size) - - if arr.flags["C_CONTIGUOUS"]: - try: - vector.insert(vector.end(), arr.flat) - return vector - except TypeError: - pass - for elem in arr.flat: vector.push_back(elem.item()) return vector - nested_vector = _build_nested_vector_type(ndim, base_type)() - nested_vector.reserve(arr.shape[0]) - - if arr.flags["C_CONTIGUOUS"]: - arr_view = arr.reshape(-1, *arr.shape[1:]) - for subarr in arr_view: - nested_vector.push_back(convert(subarr)) - else: - for subarr in arr: - nested_vector.push_back(convert(subarr)) + vector_type = _build_nested_vector_type(ndim, base_type) + result = vector_type() + result.reserve(arr.shape[0]) + for subarr in arr: + result.push_back(convert(subarr)) - return nested_vector + return result return convert(arr) \ No newline at end of file From 7ec37a08234c4b6fa4d3574af580d898536cc48a Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Thu, 24 Oct 2024 23:02:08 +0530 Subject: [PATCH 6/8] test: add non-int test --- test/test_stltypes.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/test/test_stltypes.py b/test/test_stltypes.py index 243b3428..0ceb90e2 100644 --- a/test/test_stltypes.py +++ b/test/test_stltypes.py @@ -789,7 +789,7 @@ def test23_copy_conversion(self): for f, d in zip(x, v): assert f == d - def test25_ndarray_template_less(self): + def test25_int_ndarray_template_less(self): import cppyy try: @@ -804,6 +804,21 @@ def test25_ndarray_template_less(self): assert len(v) == 10 assert type(v[0][0][0][0]) is int + def test26_float_ndarray_template_less(self): + import cppyy + + try: + import numpy as np + except ImportError: + skip("numpy is not installed") + + rng = np.random.default_rng(seed=42) + x = rng.random(size=(10, 3, 3, 3)) + v = cppyy.gbl.std.vector(x) + + assert len(v) == 10 + assert type(v[0][0][0][0]) is float + class TestSTLSTRING: def setup_class(cls): cls.test_dct = test_dct From b47ebc1b96156995cdef5c1becdfd0859a0b03f8 Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Thu, 24 Oct 2024 23:25:54 +0530 Subject: [PATCH 7/8] chore: add explicit init--double, long --- python/cppyy/_cpython_cppyy.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python/cppyy/_cpython_cppyy.py b/python/cppyy/_cpython_cppyy.py index 3650564b..eb597ee1 100644 --- a/python/cppyy/_cpython_cppyy.py +++ b/python/cppyy/_cpython_cppyy.py @@ -219,7 +219,9 @@ def _end_capture_stderr(): return "" def _np_vector(arr): - def _build_nested_vector_type(ndim, base_type, cache={}): + CPP_EXPLICIT_TYPES = {"float64": "double", "int64": "long"} + + def build_nested_vector_type(ndim, base_type, cache={}): key = (ndim, base_type) if key not in cache: vector_t = gbl.std.vector[base_type] @@ -231,18 +233,20 @@ def _build_nested_vector_type(ndim, base_type, cache={}): def convert(arr): ndim = arr.ndim if arr.size > 0: - base_type = type(arr.flat[0].item()) + base_type = CPP_EXPLICIT_TYPES.get( + arr.dtype.type.__name__, type(arr.flat[0].item()) + ) else: base_type = float if ndim == 1: - vector = _build_nested_vector_type(1, base_type)() + vector = build_nested_vector_type(1, base_type)() vector.reserve(arr.size) for elem in arr.flat: vector.push_back(elem.item()) return vector - vector_type = _build_nested_vector_type(ndim, base_type) + vector_type = build_nested_vector_type(ndim, base_type) result = vector_type() result.reserve(arr.shape[0]) for subarr in arr: From 1961a85d4c85c5b972164210ccdc609627c879c0 Mon Sep 17 00:00:00 2001 From: Khushiyant Date: Mon, 28 Oct 2024 22:22:15 +0530 Subject: [PATCH 8/8] chore: add test for multiple dtype --- test/test_stltypes.py | 68 +++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/test/test_stltypes.py b/test/test_stltypes.py index 0ceb90e2..673396a0 100644 --- a/test/test_stltypes.py +++ b/test/test_stltypes.py @@ -789,35 +789,45 @@ def test23_copy_conversion(self): for f, d in zip(x, v): assert f == d - def test25_int_ndarray_template_less(self): - import cppyy - - try: - import numpy as np - except ImportError: - skip("numpy is not installed") - - rng = np.random.default_rng(seed=42) - x = rng.integers(low=0, high=100, size=(10, 3, 3, 3)) - v = cppyy.gbl.std.vector(x) - - assert len(v) == 10 - assert type(v[0][0][0][0]) is int - - def test26_float_ndarray_template_less(self): - import cppyy - - try: - import numpy as np - except ImportError: - skip("numpy is not installed") - - rng = np.random.default_rng(seed=42) - x = rng.random(size=(10, 3, 3, 3)) - v = cppyy.gbl.std.vector(x) - - assert len(v) == 10 - assert type(v[0][0][0][0]) is float +def test_ndarray_template_less(self): + import cppyy + + try: + import numpy as np + except ImportError: + self.skipTest("numpy is not installed") + dtype_mappings = { + np.int32: "int", + np.int64: "long", + np.float32: "float", + np.float64: "double", + } + + shapes = [ + (10,), # 1D array + (5, 5), # 2D array + (4, 4, 4), # 3D array + (2, 3, 3, 3), # 4D array + ] + + for np_dtype, cpp_dtype in dtype_mappings.items(): + for shape in shapes: + rng = np.random.default_rng(seed=42) + + if np.issubdtype(np_dtype, np.integer): + x = rng.integers(low=0, high=100, size=shape, dtype=np_dtype) + else: + x = rng.random(size=shape).astype(np_dtype) + + cpp_vector = cppyy.gbl.std.vector(x) + assert len(cpp_vector) == shape[0] + + if len(shape) > 1: + assert len(cpp_vector[0]) == shape[1] + if len(shape) > 2: + assert len(cpp_vector[0][0]) == shape[2] + if len(shape) > 3: + assert len(cpp_vector[0][0][0]) == shape[3] class TestSTLSTRING: def setup_class(cls):