From ffb17a9ee02d984c981e7a58edb311aa38e3f217 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 23 Nov 2024 14:02:45 +0100 Subject: [PATCH 001/166] Getting ready for release 2.10.2 --- ANNOUNCE.rst | 28 +++++++++++----------------- RELEASE_NOTES.rst | 9 ++++++++- VERSION | 2 +- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index 4725189..12edc8d 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,34 +1,28 @@ ========================= -Announcing NumExpr 2.10.1 +Announcing NumExpr 2.10.2 ========================= Hi everyone, -NumExpr 2.10.1 continues to stabilize the support for NumPy 2.0.0. -Also, the default number of 'safe' threads has been upgraded to 16 -(instead of previous 8). Finally, preliminary support for Python 3.13; -thanks to Karolina Surma. +NumExpr 2.10.2 provides wheels for Python 2.13 for first time. +Also, there is better support for CPUs that do not have a power +of 2 number of cores. Finally, numexpr is allowed to run with +the multithreading package in Python. Project documentation is available at: http://numexpr.readthedocs.io/ -Changes from 2.10.0 to 2.10.1 +Changes from 2.10.1 to 2.10.2 ----------------------------- -* The default number of 'safe' threads has been upgraded to 16 (instead of - previous 8). That means that if your CPU has > 16 cores, the default is - to use 16. You can always override this with the "NUMEXPR_MAX_THREADS" - environment variable. +* Better support for CPUs that do not have a power of 2 number of + cores. See #479 and #490. Thanks to @avalentino. -* NumPy 1.23 is now the minimum supported. +* Allow numexpr to run with the multithreading package in Python. + See PR #496. Thanks to @emmaai -* Preliminary support for Python 3.13. Thanks to Karolina Surma. - -* Fix tests on nthreads detection (closes: #479). Thanks to @avalentino. - -* The build process has been modernized and now uses the `pyproject.toml` - file for more of the configuration options. +* Wheels for Python 3.13 are now provided. What's Numexpr? --------------- diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index c51964f..f0a64ab 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5,7 +5,14 @@ Release notes for NumExpr 2.10 series Changes from 2.10.1 to 2.10.2 ----------------------------- -* **Under development.** +* Better support for CPUs that do not have a power of 2 number of + cores. See #479 and #490. Thanks to @avalentino. + +* Allow numexpr to run with the multithreading package in Python. + See PR #496. Thanks to @emmaai + +* Wheels for Python 3.13 are now provided. + Changes from 2.10.0 to 2.10.1 ----------------------------- diff --git a/VERSION b/VERSION index 05a16b0..c6436a8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.10.2.dev0 +2.10.2 From e576f0486e01a9cf5ecbb08ced436918def29877 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 23 Nov 2024 14:40:04 +0100 Subject: [PATCH 002/166] Post 2.10.2 release actions done --- RELEASE_NOTES.rst | 6 ++++++ VERSION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index f0a64ab..3d0db54 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,12 @@ Release notes for NumExpr 2.10 series ===================================== +Changes from 2.10.2 to 2.10.3 +----------------------------- + +* **Under development.** + + Changes from 2.10.1 to 2.10.2 ----------------------------- diff --git a/VERSION b/VERSION index c6436a8..c23e78a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.10.2 +2.10.3.dev0 From 98f5e891713851d546001a9174b5f0c099debc22 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 23 Nov 2024 14:41:22 +0100 Subject: [PATCH 003/166] Remove building musllinux wheels for next release --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d7b0afc..23736fc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,8 +11,8 @@ env: CIBW_TEST_COMMAND: python -c "import sys, numexpr; sys.exit(0 if numexpr.test().wasSuccessful() else 1)" CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. - # NumPy is adding musllinux for just x86_64 too, so this is not too bad. - CIBW_SKIP: "*musllinux*aarch64*" + # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. + CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" jobs: build_wheels: From 2378606e4d8a0daf1bdc97512256953bffc8403f Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Sat, 23 Nov 2024 17:36:40 +0100 Subject: [PATCH 004/166] Next build will deprecate Python 3.9 --- .github/workflows/build.yml | 4 ++-- RELEASE_NOTES.rst | 2 +- pyproject.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 23736fc..ac7cc83 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,8 +28,8 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: ["cp3{9,10,11,12,13}-*"] - p_ver: ["3.9-3.13"] + cibw_build: ["cp3{10,11,12,13}-*"] + p_ver: ["3.10-3.13"] exclude: - os: windows-latest arch: aarch64 diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 3d0db54..f919b11 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5,7 +5,7 @@ Release notes for NumExpr 2.10 series Changes from 2.10.2 to 2.10.3 ----------------------------- -* **Under development.** +* Python 3.10 is now the minimum supported version. Changes from 2.10.1 to 2.10.2 diff --git a/pyproject.toml b/pyproject.toml index 9f2f025..e95cd82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,12 +26,12 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Operating System :: Unix", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] -requires-python = ">=3.9" +requires-python = ">=3.10" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ dependencies = [ "numpy>=1.23.0", From b1a916c26c5b01ececbf2058f377f4643597c51e Mon Sep 17 00:00:00 2001 From: Teng Liu <27rabbitlt@gmail.com> Date: Fri, 20 Dec 2024 16:36:51 +0100 Subject: [PATCH 005/166] fix imaginary evaluation 1.1e1j --- numexpr/necompiler.py | 2 +- numexpr/tests/test_numexpr.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index a693c4d..75bdaa2 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -265,7 +265,7 @@ def __str__(self): _flow_pat = r'[\;\[\:]' _dunder_pat = r'(^|[^\w])__[\w]+__($|[^\w])' -_attr_pat = r'\.\b(?!(real|imag|(\d*[eE]?[+-]?\d+)|\d*j)\b)' +_attr_pat = r'\.\b(?!(real|imag|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|(\d*j))\b)' _blacklist_re = re.compile(f'{_flow_pat}|{_dunder_pat}|{_attr_pat}') def stringToExpression(s, types, context, sanitize: bool=True): diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 2aa56ca..62210b4 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -583,6 +583,9 @@ def test_sanitize(self): evaluate('1.5j') evaluate('3.j') + #pass imaginary with scientific notation + evaluate('1.2e3+4.5e6j') + # pass forbidden characters within quotes x = np.array(['a', 'b'], dtype=bytes) evaluate("x == 'b:'") From b7c50ff8a6f388a50639e90b2f80e0b5cf14106f Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Feb 2025 18:40:55 +0100 Subject: [PATCH 006/166] Use upload-artifact v4 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ac7cc83..42e6a9d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -63,7 +63,7 @@ jobs: python -m pip install build python -m build --sdist --outdir wheelhouse . - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: path: ./wheelhouse/* From be928864031ede0bf466fca625881b7e50a210bc Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Fri, 7 Feb 2025 13:53:55 +0100 Subject: [PATCH 007/166] Cosmetic fixes in docstrings --- numexpr/necompiler.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 75bdaa2..98aee4c 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -884,7 +884,7 @@ def validate(ex: str, except KeyError: compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context) kwargs = {'out': out, 'order': order, 'casting': casting, - 'ex_uses_vml': ex_uses_vml} + 'ex_uses_vml': ex_uses_vml} _numexpr_last.set(ex=compiled_ex, argnames=names, kwargs=kwargs) except Exception as e: return e @@ -943,9 +943,9 @@ def evaluate(ex: str, * 'unsafe' means any data conversions may be done. sanitize: bool - Both `validate` and by extension `evaluate` call `eval(ex)`, which is - potentially dangerous on unsanitized inputs. As such, NumExpr by default - performs simple sanitization, banning the character ':;[', the + `validate` (and by extension `evaluate`) call `eval(ex)`, which is + potentially dangerous on non-sanitized inputs. As such, NumExpr by default + performs simple sanitization, banning the characters ':;[', the dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'. Using `None` defaults to `True` unless the environment variable @@ -956,15 +956,9 @@ def evaluate(ex: str, The calling frame depth. Unless you are a NumExpr developer you should not set this value. - Note - ---- - Both `validate` and by extension `evaluate` call `eval(ex)`, which is - potentially dangerous on unsanitized inputs. As such, NumExpr does some - sanitization, banning the character ':;[', the dunder '__', and attribute - access to all but '.r' for real and '.i' for imag access to complex numbers. """ # We could avoid code duplication if we called validate and then re_evaluate - # here, but they we have difficulties with the `sys.getframe(2)` call in + # here, but we have difficulties with the `sys.getframe(2)` call in # `getArguments` e = validate(ex, local_dict=local_dict, global_dict=global_dict, out=out, order=order, casting=casting, From 36aa11b736c40e7b7b09a386951a72dfc65dd149 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 3 Mar 2025 12:54:00 +0100 Subject: [PATCH 008/166] Add a funding option for Francesc --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..af04286 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +github: FrancescAlted From 33ee71b0c6f13224f3031cd8b42921c748ce9ede Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Mon, 3 Mar 2025 11:37:42 -0500 Subject: [PATCH 009/166] Test numexpr against pytest-run-parallel on 3.13t --- .gitignore | 1 + numexpr/interpreter.cpp | 6 ++++-- numexpr/necompiler.py | 37 +++++++++++++++++++++++------------ numexpr/tests/test_numexpr.py | 10 +++++++++- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 928bf15..7bf6f98 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ artifact/ numexpr.egg-info/ *.pyc *.swp +*.so *~ doc/_build site.cfg diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index edebd71..32f6c37 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -556,7 +556,7 @@ stringcontains(const char *haystack_start, const char *needle_start, npy_intp ma size_t si = 0; size_t min_len = min(needle_len, haystack_len); - while (*haystack && *needle && si < min_len) + while (si < min_len && *haystack && *needle) { ok &= *haystack++ == *needle++; si++; @@ -573,7 +573,7 @@ stringcontains(const char *haystack_start, const char *needle_start, npy_intp ma } /* calc haystack length */ - while (*haystack && si < haystack_len) { + while (si < haystack_len && *haystack) { haystack++; si++; } @@ -652,6 +652,7 @@ int vm_engine_iter_task(NpyIter *iter, npy_intp *memsteps, /* Then finish off the rest */ if (block_size > 0) do { + block_size = *size_ptr; #define REDUCTION_INNER_LOOP #define BLOCK_SIZE block_size #include "interp_body.cpp" @@ -698,6 +699,7 @@ vm_engine_iter_outer_reduce_task(NpyIter *iter, npy_intp *memsteps, /* Then finish off the rest */ if (block_size > 0) do { + block_size = *size_ptr; #define BLOCK_SIZE block_size #define NO_OUTPUT_BUFFERING // Because it's a reduction #include "interp_body.cpp" diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 98aee4c..296c41b 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -774,9 +774,12 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): # Dictionaries for caching variable names and compiled expressions -_names_cache = CacheDict(256) -_numexpr_cache = CacheDict(256) -_numexpr_last = ContextDict() +# _names_cache = CacheDict(256) +_names_cache = threading.local() +# _numexpr_cache = CacheDict(256) +_numexpr_cache = threading.local() +# _numexpr_last = ContextDict() +_numexpr_last = threading.local() evaluate_lock = threading.Lock() def validate(ex: str, @@ -853,6 +856,14 @@ def validate(ex: str, """ global _numexpr_last + if not hasattr(_numexpr_last, 'l'): + _numexpr_last.l = ContextDict() + + if not hasattr(_names_cache, 'c'): + _names_cache.c = CacheDict(256) + + if not hasattr(_numexpr_cache, 'c'): + _numexpr_cache.c = CacheDict(256) try: @@ -868,9 +879,9 @@ def validate(ex: str, # Get the names for this expression context = getContext(kwargs) expr_key = (ex, tuple(sorted(context.items()))) - if expr_key not in _names_cache: - _names_cache[expr_key] = getExprNames(ex, context, sanitize=sanitize) - names, ex_uses_vml = _names_cache[expr_key] + if expr_key not in _names_cache.c: + _names_cache.c[expr_key] = getExprNames(ex, context, sanitize=sanitize) + names, ex_uses_vml = _names_cache.c[expr_key] arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth) # Create a signature @@ -880,12 +891,12 @@ def validate(ex: str, # Look up numexpr if possible. numexpr_key = expr_key + (tuple(signature),) try: - compiled_ex = _numexpr_cache[numexpr_key] + compiled_ex = _numexpr_cache.c[numexpr_key] except KeyError: - compiled_ex = _numexpr_cache[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context) + compiled_ex = _numexpr_cache.c[numexpr_key] = NumExpr(ex, signature, sanitize=sanitize, **context) kwargs = {'out': out, 'order': order, 'casting': casting, 'ex_uses_vml': ex_uses_vml} - _numexpr_last.set(ex=compiled_ex, argnames=names, kwargs=kwargs) + _numexpr_last.l.set(ex=compiled_ex, argnames=names, kwargs=kwargs) except Exception as e: return e return None @@ -987,13 +998,15 @@ def re_evaluate(local_dict: Optional[Dict] = None, not set this value. """ global _numexpr_last + if not hasattr(_numexpr_last, 'l'): + _numexpr_last.l = ContextDict() try: - compiled_ex = _numexpr_last['ex'] + compiled_ex = _numexpr_last.l['ex'] except KeyError: raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`") - argnames = _numexpr_last['argnames'] + argnames = _numexpr_last.l['argnames'] args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth) - kwargs = _numexpr_last['kwargs'] + kwargs = _numexpr_last.l['kwargs'] with evaluate_lock: return compiled_ex(*args, **kwargs) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 62210b4..3970bab 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -16,6 +16,7 @@ import warnings from contextlib import contextmanager import subprocess +import pytest import numpy as np from numpy import ( @@ -318,6 +319,7 @@ def test_refcount(self): evaluate('1') assert sys.getrefcount(a) == 2 + @pytest.mark.thread_unsafe def test_locals_clears_globals(self): # Check for issue #313, whereby clearing f_locals also clear f_globals # if in the top-frame. This cannot be done inside `unittest` as it is always @@ -341,6 +343,7 @@ def test_locals_clears_globals(self): +@pytest.mark.thread_unsafe class test_numexpr2(test_numexpr): """Testing with 2 threads""" nthreads = 2 @@ -512,6 +515,7 @@ def test_illegal_value(self): else: self.fail() + @pytest.mark.thread_unsafe def test_sanitize(self): with _environment('NUMEXPR_SANITIZE', '1'): # Forbid dunder @@ -590,7 +594,7 @@ def test_sanitize(self): x = np.array(['a', 'b'], dtype=bytes) evaluate("x == 'b:'") - + @pytest.mark.thread_unsafe def test_no_sanitize(self): try: # Errors on compile() after eval() evaluate('import os;', sanitize=False) @@ -677,6 +681,7 @@ def test_ex_uses_vml(self): if 'sparc' not in platform.machine(): # Execution order set here so as to not use too many threads # during the rest of the execution. See #33 for details. + @pytest.mark.thread_unsafe def test_changing_nthreads_00_inc(self): a = linspace(-1, 1, 1000000) b = ((.25 * a + .75) * a - 1.5) * a - 2 @@ -685,6 +690,7 @@ def test_changing_nthreads_00_inc(self): c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c) + @pytest.mark.thread_unsafe def test_changing_nthreads_01_dec(self): a = linspace(-1, 1, 1000000) b = ((.25 * a + .75) * a - 1.5) * a - 2 @@ -1123,6 +1129,7 @@ def _environment(key, value): del os.environ[key] # Test cases for the threading configuration +@pytest.mark.thread_unsafe class test_threading_config(TestCase): def test_max_threads_unset(self): # Has to be done in a subprocess as `importlib.reload` doesn't let us @@ -1306,6 +1313,7 @@ def _worker(qout=None): # Case test for subprocesses (via multiprocessing module) class test_subprocess(TestCase): + @pytest.mark.thread_unsafe def test_multiprocess(self): try: import multiprocessing as mp From 8680084f04fc5c4b8a5fc015d9f0cf8174c62cbd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 26 Feb 2025 11:13:08 -0500 Subject: [PATCH 010/166] Mock pytest in case is not available --- numexpr/tests/test_numexpr.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 3970bab..98ae459 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -16,7 +16,6 @@ import warnings from contextlib import contextmanager import subprocess -import pytest import numpy as np from numpy import ( @@ -37,6 +36,13 @@ from numexpr.utils import detect_number_of_cores import unittest +from unittest.mock import MagicMock + +try: + import pytest + pytest_available = True +except ImportError: + pytest_available = False TestCase = unittest.TestCase @@ -45,6 +51,15 @@ MAX_THREADS = 16 +if not pytest_available: + def identity(f): + return f + + pytest = MagicMock() + pytest.mark = MagicMock() + pytest.mark.thread_unsafe = identity + + class test_numexpr(TestCase): """Testing with 1 thread""" nthreads = 1 From 8af34da07df516ae8935d7e893bc8199b799eb51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 26 Feb 2025 14:39:01 -0500 Subject: [PATCH 011/166] Build free-threaded wheels --- .github/workflows/build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 42e6a9d..8b3142f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,12 +24,13 @@ jobs: CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" + CIBW_FREE_THREADED_SUPPORT: true strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: ["cp3{10,11,12,13}-*"] - p_ver: ["3.10-3.13"] + cibw_build: ["cp3{10,11,12,13,13t}-*"] + p_ver: ["3.10-3.13+3.13t"] exclude: - os: windows-latest arch: aarch64 From 706cb9d40715f6684d47fde8f60979e17762528b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Fri, 28 Feb 2025 16:33:45 -0500 Subject: [PATCH 012/166] Use CIBW_ENABLE --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8b3142f..e3edadd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,7 +24,7 @@ jobs: CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" - CIBW_FREE_THREADED_SUPPORT: true + CIBW_ENABLE: true strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] From 318145546c9564ecd0c8289662cebef1957a1d46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Fri, 28 Feb 2025 17:26:12 -0500 Subject: [PATCH 013/166] Use pytest for testing --- .github/workflows/build.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e3edadd..31c3a96 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,9 +6,15 @@ permissions: contents: read env: - CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy + CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest + CIBW_ENVIRONMENT: > + IS_FREETHREADED=$(python -c "import sysconfig;print(sysconfig.get_config_var('Py_GIL_DISABLED'))") + PYTEST_RUN_PARALLEL=$([ "$IS_FREETHREADED" == "1" ] && echo "pytest-run-parallel" || echo "") + PARALLEL_THREADS=$([ "$IS_FREETHREADED" == "1" ] && echo "--parallel-threads=4" || echo "") + CIBW_BEFORE_BUILD_LINUX: > + pip install setuptools oldest-supported-numpy pytest $PYTEST_RUN_PARALLEL CIBW_BUILD_VERBOSITY: 1 - CIBW_TEST_COMMAND: python -c "import sys, numexpr; sys.exit(0 if numexpr.test().wasSuccessful() else 1)" + CIBW_TEST_COMMAND: pytest $PARALLEL_THREADS numexpr/tests CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. From 61076a277e704e6a43ecadaf4c311e2ad816fc74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Mon, 3 Mar 2025 11:19:05 -0500 Subject: [PATCH 014/166] Update env variable value --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31c3a96..6fc4ee4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,7 @@ permissions: env: CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest - CIBW_ENVIRONMENT: > + CIBW_ENVIRONMENT_LINUX: > IS_FREETHREADED=$(python -c "import sysconfig;print(sysconfig.get_config_var('Py_GIL_DISABLED'))") PYTEST_RUN_PARALLEL=$([ "$IS_FREETHREADED" == "1" ] && echo "pytest-run-parallel" || echo "") PARALLEL_THREADS=$([ "$IS_FREETHREADED" == "1" ] && echo "--parallel-threads=4" || echo "") @@ -30,7 +30,7 @@ jobs: CIBW_BUILD: ${{ matrix.cibw_build }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" - CIBW_ENABLE: true + CIBW_ENABLE: cpython-freethreading strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] From 1d15ad415311f0c0676c09628e1cbe93c58ab650 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Tue, 4 Mar 2025 18:00:21 -0500 Subject: [PATCH 015/166] Move free-threaded builds to an indindependent job --- .github/workflows/build.yml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6fc4ee4..11f6afd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,14 +7,9 @@ permissions: env: CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest - CIBW_ENVIRONMENT_LINUX: > - IS_FREETHREADED=$(python -c "import sysconfig;print(sysconfig.get_config_var('Py_GIL_DISABLED'))") - PYTEST_RUN_PARALLEL=$([ "$IS_FREETHREADED" == "1" ] && echo "pytest-run-parallel" || echo "") - PARALLEL_THREADS=$([ "$IS_FREETHREADED" == "1" ] && echo "--parallel-threads=4" || echo "") - CIBW_BEFORE_BUILD_LINUX: > - pip install setuptools oldest-supported-numpy pytest $PYTEST_RUN_PARALLEL + CIBW_BEFORE_TEST: pip install pytest CIBW_BUILD_VERBOSITY: 1 - CIBW_TEST_COMMAND: pytest $PARALLEL_THREADS numexpr/tests + CIBW_TEST_COMMAND: pytest numexpr/tests CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. @@ -35,7 +30,7 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: ["cp3{10,11,12,13,13t}-*"] + cibw_build: ["cp3{10,11,12,13}-*", "cp313t-*"] p_ver: ["3.10-3.13+3.13t"] exclude: - os: windows-latest @@ -60,6 +55,13 @@ jobs: if: ${{ matrix.arch == 'aarch64' }} name: Set up QEMU + - name: Setup free-threading variables + shell: bash -l {0} + run: | + echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" + echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" + echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 numexpr/tests" >> "$GITHUB_ENV" + - name: Build wheels run: | python -m cibuildwheel --output-dir wheelhouse From 40f04d21c0d76b50847c9dd01ed2281fec1b9ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 5 Mar 2025 12:30:49 -0500 Subject: [PATCH 016/166] Set free-threading variables only under free-threaded conditions --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 11f6afd..f6a68b4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,7 +17,7 @@ env: jobs: build_wheels: - name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.p_ver }} + name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.cibw_build }} runs-on: ${{ matrix.os }} permissions: contents: write @@ -27,6 +27,7 @@ jobs: CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading strategy: + fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] @@ -56,6 +57,7 @@ jobs: name: Set up QEMU - name: Setup free-threading variables + if: ${{ endsWith(matrix.cibw_build, 't-*') }} shell: bash -l {0} run: | echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" From 0fb95ec22cd1b070bd46b428cb404a4534d1f5c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 5 Mar 2025 12:32:07 -0500 Subject: [PATCH 017/166] Execute pytest with --pyargs --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f6a68b4..0761a58 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -9,7 +9,7 @@ env: CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest CIBW_BEFORE_TEST: pip install pytest CIBW_BUILD_VERBOSITY: 1 - CIBW_TEST_COMMAND: pytest numexpr/tests + CIBW_TEST_COMMAND: pytest --pyargs numexpr CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. @@ -62,7 +62,7 @@ jobs: run: | echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" - echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 numexpr/tests" >> "$GITHUB_ENV" + echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" - name: Build wheels run: | From e75d15f718f861d914618fb7abd505591a22d1e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 5 Mar 2025 13:00:12 -0500 Subject: [PATCH 018/166] Add section in README regarding free-threading --- README.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/README.rst b/README.rst index 9033d51..264fd2b 100644 --- a/README.rst +++ b/README.rst @@ -159,6 +159,24 @@ Usage array([ True, False, False], dtype=bool) +Free-threading support +---------------------- +Starting on CPython 3.13 onwards there is a new distribution that disables the +Global Interpreter Lock (GIL) altogether, thus increasing the performance yields +under multi-threaded conditions on a single interpreter, as opposed to having to use +multiprocessing. + +Whilst numexpr has been demonstrated to work under free-threaded +CPython, considerations need to be taken when using numexpr native parallel +implementation vs using Python threads directly in order to prevent oversubscription, +we recommend either using the main CPython interpreter thread to spawn multiple C threads +using the parallel numexpr API, or spawning multiple CPython threads that do not use +the parallel API. + +For more information about free-threaded CPython, we recommend visiting the following +`community Wiki ` + + Documentation ------------- From 5fe38b22ea0714e6d3240da00191656bb3e7f5dd Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 17:56:13 +0100 Subject: [PATCH 019/166] Fix the name of the wheels for uploading --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0761a58..b4c6956 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -76,6 +76,7 @@ jobs: - uses: actions/upload-artifact@v4 with: + name: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.cibw_build }} path: ./wheelhouse/* - name: Upload to GitHub Release From 95cbaaacbeb64488a009419661cf59b43594b2ee Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 18:10:45 +0100 Subject: [PATCH 020/166] Remove asterisks from wheel names and other improvements --- .github/workflows/build.yml | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b4c6956..d2b1e93 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -31,7 +31,7 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: ["cp3{10,11,12,13}-*", "cp313t-*"] + cibw_build: ["cp310", "cp311", "cp312", "cp313", "cp313t"] p_ver: ["3.10-3.13+3.13t"] exclude: - os: windows-latest @@ -48,13 +48,9 @@ jobs: with: python-version: '3.x' - - name: Install cibuildwheel - run: | - python -m pip install cibuildwheel - - - uses: docker/setup-qemu-action@v2 - if: ${{ matrix.arch == 'aarch64' }} - name: Set up QEMU +# - uses: docker/setup-qemu-action@v2 +# if: ${{ matrix.arch == 'aarch64' }} +# name: Set up QEMU - name: Setup free-threading variables if: ${{ endsWith(matrix.cibw_build, 't-*') }} @@ -65,8 +61,7 @@ jobs: echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" - name: Build wheels - run: | - python -m cibuildwheel --output-dir wheelhouse + uses: pypa/cibuildwheel@v2.23 - name: Make sdist if: ${{ matrix.os == 'windows-latest' }} From a15f943f3030fe50df7d9928530a40073e685dca Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 18:15:39 +0100 Subject: [PATCH 021/166] Do not remove muslinux builds for now --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d2b1e93..b5a60c7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,7 +13,7 @@ env: CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. - CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" + # CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" jobs: build_wheels: From 2f5bf50f151f679552c59972eafd2784e4b3a76a Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 18:24:48 +0100 Subject: [PATCH 022/166] Use cibw_id to remove * from wheel names --- .github/workflows/build.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b5a60c7..0929225 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -13,7 +13,7 @@ env: CIBW_TEST_SKIP: "*macosx*arm64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. - # CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" + CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" jobs: build_wheels: @@ -31,7 +31,9 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: ["cp310", "cp311", "cp312", "cp313", "cp313t"] + cibw_build: [ "cp310-*", "cp311-*", "cp312-*", "cp313-*", "cp313t-*" ] + # These are necessary to avoid * in wheel names, which are not allowed in GitHub Actions + cibw_id: [ "cp310", "cp311", "cp312", "cp313", "cp313t" ] p_ver: ["3.10-3.13+3.13t"] exclude: - os: windows-latest From 68642a1f33f4940cc13ac39342adbb15b799c3a3 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 18:31:40 +0100 Subject: [PATCH 023/166] Be explicit on the build names --- .github/workflows/build.yml | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0929225..46f0997 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -31,10 +31,17 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - cibw_build: [ "cp310-*", "cp311-*", "cp312-*", "cp313-*", "cp313t-*" ] - # These are necessary to avoid * in wheel names, which are not allowed in GitHub Actions - cibw_id: [ "cp310", "cp311", "cp312", "cp313", "cp313t" ] - p_ver: ["3.10-3.13+3.13t"] + include: + - cibw_build: "cp310-*" + cibw_id: "cp310" + - cibw_build: "cp311-*" + cibw_id: "cp311" + - cibw_build: "cp312-*" + cibw_id: "cp312" + - cibw_build: "cp313-*" + cibw_id: "cp313" + - cibw_build: "cp313t-*" + cibw_id: "cp313t" exclude: - os: windows-latest arch: aarch64 @@ -73,7 +80,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.cibw_build }} + name: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.cibw_id }} path: ./wheelhouse/* - name: Upload to GitHub Release From de54ba26d84d082579687bb97c3b04558ce44d4d Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 18:41:49 +0100 Subject: [PATCH 024/166] include -> python --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 46f0997..31ce15b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,7 +22,7 @@ jobs: permissions: contents: write env: - CIBW_BUILD: ${{ matrix.cibw_build }} + CIBW_BUILD: ${{ matrix.python.cibw_build }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading @@ -31,7 +31,7 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] arch: [x86_64, aarch64] - include: + python: - cibw_build: "cp310-*" cibw_id: "cp310" - cibw_build: "cp311-*" @@ -80,7 +80,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.cibw_id }} + name: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.python.cibw_id }} path: ./wheelhouse/* - name: Upload to GitHub Release From 16ab7d5f577e56f49d8e07a4ed5fd085f631fdbf Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:02:38 +0100 Subject: [PATCH 025/166] Yet another attempt for wheels --- .github/workflows/build.yml | 48 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 31ce15b..9282d90 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -22,33 +22,37 @@ jobs: permissions: contents: write env: - CIBW_BUILD: ${{ matrix.python.cibw_build }} + CIBW_BUILD: ${{ matrix.cibw_pattern }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - arch: [x86_64, aarch64] - python: - - cibw_build: "cp310-*" - cibw_id: "cp310" - - cibw_build: "cp311-*" - cibw_id: "cp311" - - cibw_build: "cp312-*" - cibw_id: "cp312" - - cibw_build: "cp313-*" - cibw_id: "cp313" - - cibw_build: "cp313t-*" - cibw_id: "cp313t" - exclude: - - os: windows-latest - arch: aarch64 - # cibuild is already in charge to build aarch64 (see CIBW_ARCHS_MACOS) - - os: macos-latest + include: + # Linux x86_64 builds + - os: ubuntu-latest + arch: x86_64 + cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" + artifact_name: "linux-x86_64" + + # Linux ARM64 builds (native runners) + - os: ubuntu-latest-arm64 arch: aarch64 + cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" + artifact_name: "linux-aarch64" + # Windows builds + - os: windows-latest + arch: x86_64 + cibw_pattern: "cp3{10,11,12,13,13t}-win*" + artifact_name: "windows-x86_64" + + # macOS builds (universal2) + - os: macos-latest + arch: x86_64 + cibw_pattern: "cp3{10,11,12,13,13t}-macosx*" + artifact_name: "macos-universal2" steps: - uses: actions/checkout@v3 @@ -57,10 +61,6 @@ jobs: with: python-version: '3.x' -# - uses: docker/setup-qemu-action@v2 -# if: ${{ matrix.arch == 'aarch64' }} -# name: Set up QEMU - - name: Setup free-threading variables if: ${{ endsWith(matrix.cibw_build, 't-*') }} shell: bash -l {0} @@ -80,7 +80,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.os }}-${{ matrix.arch }}-${{ matrix.python.cibw_id }} + name: ${{ matrix.artifact_name }} path: ./wheelhouse/* - name: Upload to GitHub Release From 18e9b893432ff2ca22e011c15a446732fd1804bf Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:11:17 +0100 Subject: [PATCH 026/166] Fixing arm64 arch --- .github/workflows/build.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 9282d90..1042ad5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,8 +17,8 @@ env: jobs: build_wheels: - name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} - ${{ matrix.cibw_build }} - runs-on: ${{ matrix.os }} + name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} + runs-on: ${{ matrix.runs-on || matrix.os }} permissions: contents: write env: @@ -37,10 +37,11 @@ jobs: artifact_name: "linux-x86_64" # Linux ARM64 builds (native runners) - - os: ubuntu-latest-arm64 + - os: ubuntu-latest arch: aarch64 cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" artifact_name: "linux-aarch64" + runs-on: ubuntu-latest-arm64 # Windows builds - os: windows-latest From 22ac3f157eedf2125d62a2e506fbf19a7f7bb307 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:13:36 +0100 Subject: [PATCH 027/166] Fixing arm64 arch --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1042ad5..50ddae0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,7 +41,7 @@ jobs: arch: aarch64 cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" artifact_name: "linux-aarch64" - runs-on: ubuntu-latest-arm64 + runs-on: ["ubuntu-latest", "arm64"] # Windows builds - os: windows-latest From 3ea863bc27ec58d2d0f5e45a6085b5677377343e Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:23:20 +0100 Subject: [PATCH 028/166] Don't use native arm64 builders for now --- .github/workflows/build.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 50ddae0..60588c4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -41,7 +41,8 @@ jobs: arch: aarch64 cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" artifact_name: "linux-aarch64" - runs-on: ["ubuntu-latest", "arm64"] + # Don't use native runners for now (looks like wait times are too long) + #runs-on: ["ubuntu-latest", "arm64"] # Windows builds - os: windows-latest @@ -70,6 +71,12 @@ jobs: echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" + - name: Set up QEMU + if: matrix.arch == 'aarch64' + uses: docker/setup-qemu-action@v3 + with: + platforms: arm64 + - name: Build wheels uses: pypa/cibuildwheel@v2.23 From bdbfd943a55574936d4f1b82ea0bec41c0e980b8 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:34:42 +0100 Subject: [PATCH 029/166] Skip tests on linux aarch64, not macosx arm64 --- .github/workflows/build.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 60588c4..b0077f8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,7 +10,8 @@ env: CIBW_BEFORE_TEST: pip install pytest CIBW_BUILD_VERBOSITY: 1 CIBW_TEST_COMMAND: pytest --pyargs numexpr - CIBW_TEST_SKIP: "*macosx*arm64*" + # Testing on aarch64 takes too long, as it is currently emulated on GitHub Actions + CIBW_TEST_SKIP: "*linux*aarch64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" From 48e7b9ae6c368d60abcfbd34d8fab4cf7ed6783b Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:48:21 +0100 Subject: [PATCH 030/166] Add a pre-commit config file --- .pre-commit-config.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..69e8dd5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,25 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: debug-statements + +- repo: https://github.com/pycqa/flake8 + rev: 7.0.0 + hooks: + - id: flake8 + +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.8.0 + hooks: + - id: mypy + additional_dependencies: [types-all] + exclude: ^(docs/|setup.py) From 04dfbebc1ba106ac7a54997230d5c1345ca808ec Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:54:43 +0100 Subject: [PATCH 031/166] Remve types-all --- .pre-commit-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 69e8dd5..548b964 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,5 +21,4 @@ repos: rev: v1.8.0 hooks: - id: mypy - additional_dependencies: [types-all] exclude: ^(docs/|setup.py) From 9dad87bd2c93e0a55bdbf556740eca52aef35f5e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 6 Mar 2025 18:54:55 +0000 Subject: [PATCH 032/166] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .readthedocs.yaml | 2 +- AUTHORS.txt | 2 +- bench/boolean_timing.py | 2 + bench/issue-36.py | 9 ++- bench/issue-47.py | 1 + bench/large_array_vs_numpy.py | 6 +- bench/multidim.py | 7 +- bench/poly.py | 4 +- bench/timing.py | 5 +- bench/unaligned-simple.py | 3 + bench/varying-expr.py | 3 + bench/vml_timing.py | 3 + bench/vml_timing2.py | 5 +- bench/vml_timing3.py | 4 +- doc/api.rst | 12 +-- doc/index.rst | 1 - doc/intro.rst | 58 +++++++-------- doc/mkl.rst | 84 ++++++++++----------- doc/release_notes.rst | 2 +- doc/user_guide.rst | 134 +++++++++++++++++----------------- doc/vm2.rst | 42 +++++------ issues/issue418.py | 6 +- numexpr/__init__.py | 20 ++--- numexpr/cpuinfo.py | 8 +- numexpr/expressions.py | 5 +- numexpr/interp_body.cpp | 8 +- numexpr/interpreter.cpp | 12 +-- numexpr/interpreter.hpp | 2 +- numexpr/module.cpp | 2 +- numexpr/module.hpp | 4 +- numexpr/necompiler.py | 91 +++++++++++------------ numexpr/numexpr_config.hpp | 2 +- numexpr/numexpr_object.cpp | 1 - numexpr/tests/__init__.py | 2 +- numexpr/tests/test_numexpr.py | 38 +++++----- numexpr/utils.py | 24 +++--- numexpr/win32/stdint.h | 12 +-- setup.py | 11 +-- 38 files changed, 336 insertions(+), 301 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index d2c3d13..3803a41 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -19,4 +19,4 @@ sphinx: # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html python: install: - - requirements: doc/requirements.txt \ No newline at end of file + - requirements: doc/requirements.txt diff --git a/AUTHORS.txt b/AUTHORS.txt index 88b9047..57410db 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -23,7 +23,7 @@ Google Inc. contributed bug fixes. David Cox improved readability of the Readme. -Robert A. McLeod contributed bug fixes and ported the documentation to +Robert A. McLeod contributed bug fixes and ported the documentation to numexpr.readthedocs.io. He has served as the maintainer of the package since 2016 to 2023. diff --git a/bench/boolean_timing.py b/bench/boolean_timing.py index fe07b31..0be0bf7 100644 --- a/bench/boolean_timing.py +++ b/bench/boolean_timing.py @@ -9,8 +9,10 @@ #################################################################### from __future__ import print_function + import sys import timeit + import numpy array_size = 5_000_000 diff --git a/bench/issue-36.py b/bench/issue-36.py index 9c356cf..611bddb 100644 --- a/bench/issue-36.py +++ b/bench/issue-36.py @@ -2,10 +2,14 @@ # performs better than the serial code. See issue #36 for details. from __future__ import print_function + +from time import time + import numpy as np -import numexpr as ne from numpy.testing import assert_array_equal -from time import time + +import numexpr as ne + def bench(N): print("*** array length:", N) @@ -31,4 +35,3 @@ def bench(N): ne.set_num_threads(2) for N in range(10, 20): bench(2**N) - diff --git a/bench/issue-47.py b/bench/issue-47.py index 31c68a6..a48fbe2 100644 --- a/bench/issue-47.py +++ b/bench/issue-47.py @@ -1,4 +1,5 @@ import numpy + import numexpr numexpr.set_num_threads(8) diff --git a/bench/large_array_vs_numpy.py b/bench/large_array_vs_numpy.py index 72219a1..b480261 100644 --- a/bench/large_array_vs_numpy.py +++ b/bench/large_array_vs_numpy.py @@ -31,10 +31,12 @@ import os os.environ["NUMEXPR_NUM_THREADS"] = "16" +import threading +import timeit + import numpy as np + import numexpr as ne -import timeit -import threading array_size = 10**8 num_runs = 10 diff --git a/bench/multidim.py b/bench/multidim.py index 587f100..eeccd0b 100644 --- a/bench/multidim.py +++ b/bench/multidim.py @@ -12,9 +12,12 @@ # Based on a script provided by Andrew Collette. from __future__ import print_function + +import time + import numpy as np + import numexpr as nx -import time test_shapes = [ (100*100*100), @@ -90,5 +93,3 @@ def test_func(a, b, c): print("Simple: ", (stop1-start1)/nruns) print("Numexpr: ", (stop2-start2)/nruns) print("Chunked: ", (stop3-start3)/nruns) - - diff --git a/bench/poly.py b/bench/poly.py index 0f50290..3eb12b1 100644 --- a/bench/poly.py +++ b/bench/poly.py @@ -17,11 +17,13 @@ ####################################################################### from __future__ import print_function + import sys from time import time + import numpy as np -import numexpr as ne +import numexpr as ne #expr = ".25*x**3 + .75*x**2 - 1.5*x - 2" # the polynomial to compute expr = "((.25*x + .75)*x - 1.5)*x - 2" # a computer-friendly polynomial diff --git a/bench/timing.py b/bench/timing.py index c84a6f4..9c70610 100644 --- a/bench/timing.py +++ b/bench/timing.py @@ -9,7 +9,10 @@ #################################################################### from __future__ import print_function -import timeit, numpy + +import timeit + +import numpy array_size = 5e6 iterations = 2 diff --git a/bench/unaligned-simple.py b/bench/unaligned-simple.py index e168c78..b653c7a 100644 --- a/bench/unaligned-simple.py +++ b/bench/unaligned-simple.py @@ -13,8 +13,11 @@ """ from __future__ import print_function + from timeit import Timer + import numpy as np + import numexpr as ne niter = 10 diff --git a/bench/varying-expr.py b/bench/varying-expr.py index d04ab35..df7419c 100644 --- a/bench/varying-expr.py +++ b/bench/varying-expr.py @@ -13,9 +13,12 @@ # the latency of numexpr when working with small arrays. from __future__ import print_function + import sys from time import time + import numpy as np + import numexpr as ne N = 100 diff --git a/bench/vml_timing.py b/bench/vml_timing.py index 52f5003..57dd4d2 100644 --- a/bench/vml_timing.py +++ b/bench/vml_timing.py @@ -9,9 +9,12 @@ #################################################################### from __future__ import print_function + import sys import timeit + import numpy + import numexpr array_size = 5_000_000 diff --git a/bench/vml_timing2.py b/bench/vml_timing2.py index 32fdc62..1c460d0 100644 --- a/bench/vml_timing2.py +++ b/bench/vml_timing2.py @@ -4,11 +4,14 @@ # https://github.com/pydata/numexpr/wiki/NumexprMKL from __future__ import print_function + import datetime import sys +from time import time + import numpy as np + import numexpr as ne -from time import time N = int(2**26) diff --git a/bench/vml_timing3.py b/bench/vml_timing3.py index 04997ff..0086421 100644 --- a/bench/vml_timing3.py +++ b/bench/vml_timing3.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- +from timeit import default_timer as timer + import numpy as np + import numexpr as ne -from timeit import default_timer as timer x = np.ones(100000) scaler = -1J diff --git a/doc/api.rst b/doc/api.rst index 7d750e3..5d1bb0f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -3,11 +3,11 @@ NumExpr API .. automodule:: numexpr :members: evaluate, re_evaluate, disassemble, NumExpr, get_vml_version, set_vml_accuracy_mode, set_vml_num_threads, set_num_threads, detect_number_of_cores, detect_number_of_threads - + .. py:attribute:: ncores The number of (virtual) cores detected. - + .. py:attribute:: nthreads The number of threads currently in-use. @@ -18,11 +18,11 @@ NumExpr API .. py:attribute:: version - The version of NumExpr. - - + The version of NumExpr. + + Tests submodule --------------- .. automodule:: numexpr.tests - :members: test, print_versions \ No newline at end of file + :members: test, print_versions diff --git a/doc/index.rst b/doc/index.rst index 02922c3..d517391 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -25,4 +25,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/doc/intro.rst b/doc/intro.rst index 11dbaaf..0d31925 100644 --- a/doc/intro.rst +++ b/doc/intro.rst @@ -1,25 +1,25 @@ How it works ============ -The string passed to :code:`evaluate` is compiled into an object representing the +The string passed to :code:`evaluate` is compiled into an object representing the expression and types of the arrays used by the function :code:`numexpr`. -The expression is first compiled using Python's :code:`compile` function (this means -that the expressions have to be valid Python expressions). From this, the -variable names can be taken. The expression is then evaluated using instances -of a special object that keep track of what is being done to them, and which +The expression is first compiled using Python's :code:`compile` function (this means +that the expressions have to be valid Python expressions). From this, the +variable names can be taken. The expression is then evaluated using instances +of a special object that keep track of what is being done to them, and which builds up the parse tree of the expression. -This parse tree is then compiled to a bytecode program, which describes how to -perform the operation element-wise. The virtual machine uses "vector registers": -each register is many elements wide (by default 4096 elements). The key to +This parse tree is then compiled to a bytecode program, which describes how to +perform the operation element-wise. The virtual machine uses "vector registers": +each register is many elements wide (by default 4096 elements). The key to NumExpr's speed is handling chunks of elements at a time. -There are two extremes to evaluating an expression elementwise. You can do each -operation as arrays, returning temporary arrays. This is what you do when you -use NumPy: :code:`2*a+3*b` uses three temporary arrays as large as :code:`a` or -:code:`b`. This strategy wastes memory (a problem if your arrays are large), -and also is not a good use of cache memory: for large arrays, the results of +There are two extremes to evaluating an expression elementwise. You can do each +operation as arrays, returning temporary arrays. This is what you do when you +use NumPy: :code:`2*a+3*b` uses three temporary arrays as large as :code:`a` or +:code:`b`. This strategy wastes memory (a problem if your arrays are large), +and also is not a good use of cache memory: for large arrays, the results of :code:`2*a` and :code:`3*b` won't be in cache when you do the add. The other extreme is to loop over each element, as in:: @@ -27,13 +27,13 @@ The other extreme is to loop over each element, as in:: for i in xrange(len(a)): c[i] = 2*a[i] + 3*b[i] -This doesn't consume extra memory, and is good for the cache, but, if the -expression is not compiled to machine code, you will have a big case statement -(or a bunch of if's) inside the loop, which adds a large overhead for each +This doesn't consume extra memory, and is good for the cache, but, if the +expression is not compiled to machine code, you will have a big case statement +(or a bunch of if's) inside the loop, which adds a large overhead for each element, and will hurt the branch-prediction used on the CPU. -:code:`numexpr` uses a in-between approach. Arrays are handled as chunks (of -4096 elements) at a time, using a register machine. As Python code, +:code:`numexpr` uses a in-between approach. Arrays are handled as chunks (of +4096 elements) at a time, using a register machine. As Python code, it looks something like this:: for i in xrange(0, len(a), 256): @@ -44,11 +44,11 @@ it looks something like this:: add(r2, r3, r2) c[i:i+128] = r2 -(remember that the 3-arg form stores the result in the third argument, -instead of allocating a new array). This achieves a good balance between -cache and branch-prediction. And the virtual machine is written entirely in -C, which makes it faster than the Python above. Furthermore the virtual machine -is also multi-threaded, which allows for efficient parallelization of NumPy +(remember that the 3-arg form stores the result in the third argument, +instead of allocating a new array). This achieves a good balance between +cache and branch-prediction. And the virtual machine is written entirely in +C, which makes it faster than the Python above. Furthermore the virtual machine +is also multi-threaded, which allows for efficient parallelization of NumPy operations. There is some more information and history at: @@ -58,12 +58,12 @@ http://www.bitsofbits.com/2014/09/21/numpy-micro-optimization-and-numexpr/ Expected performance ==================== -The range of speed-ups for NumExpr respect to NumPy can vary from 0.95x and 20x, -being 2x, 3x or 4x typical values, depending on the complexity of the -expression and the internal optimization of the operators used. The strided and -unaligned case has been optimized too, so if the expression contains such -arrays, the speed-up can increase significantly. Of course, you will need to -operate with large arrays (typically larger than the cache size of your CPU) +The range of speed-ups for NumExpr respect to NumPy can vary from 0.95x and 20x, +being 2x, 3x or 4x typical values, depending on the complexity of the +expression and the internal optimization of the operators used. The strided and +unaligned case has been optimized too, so if the expression contains such +arrays, the speed-up can increase significantly. Of course, you will need to +operate with large arrays (typically larger than the cache size of your CPU) to see these improvements in performance. Here there are some real timings. For the contiguous case:: diff --git a/doc/mkl.rst b/doc/mkl.rst index 6951655..0c706bb 100644 --- a/doc/mkl.rst +++ b/doc/mkl.rst @@ -1,19 +1,19 @@ NumExpr with Intel MKL ====================== -Numexpr has support for Intel's VML (included in Intel's MKL) in order to -accelerate the evaluation of transcendental functions on Intel CPUs. Here it +Numexpr has support for Intel's VML (included in Intel's MKL) in order to +accelerate the evaluation of transcendental functions on Intel CPUs. Here it is a small example on the kind of improvement you may get by using it. A first benchmark ----------------- -Firstly, we are going to exercise how MKL performs when computing a couple of -simple expressions. One is a pure algebraic one: :code:`2*y + 4*x` and the other +Firstly, we are going to exercise how MKL performs when computing a couple of +simple expressions. One is a pure algebraic one: :code:`2*y + 4*x` and the other contains transcendental functions: :code:`sin(x)**2 + cos(y)**2`. -For this, we are going to use this worksheet_. I (Francesc Alted) ran this -benchmark on a Intel Xeon E3-1245 v5 @ 3.50GHz. Here are the results when +For this, we are going to use this worksheet_. I (Francesc Alted) ran this +benchmark on a Intel Xeon E3-1245 v5 @ 3.50GHz. Here are the results when not using MKL:: NumPy version: 1.11.1 @@ -22,7 +22,7 @@ not using MKL:: Numexpr version: 2.6.1. Using MKL: False Time for an algebraic expression: 0.058 s / 19.116 GB/s Time for a transcendental expression: 0.283 s / 3.950 GB/s - + And now, using MKL:: @@ -34,14 +34,14 @@ And now, using MKL:: Time for a transcendental expression: 0.075 s / 14.975 GB/s -As you can see, numexpr using MKL can be up to 3.8x faster for the case of the -transcendental expression. Also, you can notice that the pure algebraic -expression is not accelerated at all. This is completely expected, as the -MKL is offering accelerations for CPU bounded functions (sin, cos, tan, exp, +As you can see, numexpr using MKL can be up to 3.8x faster for the case of the +transcendental expression. Also, you can notice that the pure algebraic +expression is not accelerated at all. This is completely expected, as the +MKL is offering accelerations for CPU bounded functions (sin, cos, tan, exp, log, sinh...) and not pure multiplications or adds. -Finally, note how numexpr+MKL can be up to 26x faster than using a pure NumPy -solution. And this was using a processor with just four physical cores; you +Finally, note how numexpr+MKL can be up to 26x faster than using a pure NumPy +solution. And this was using a processor with just four physical cores; you should expect more speedup as you throw more cores at that. .. _worksheet: https://github.com/pydata/numexpr/blob/master/bench/vml_timing2.py @@ -49,28 +49,28 @@ should expect more speedup as you throw more cores at that. More benchmarks (older) ----------------------- -Numexpr & VML can both use several threads for doing computations. Let's see -how performance improves by using 1 or 2 threads on a 2-core Intel CPU (Core2 +Numexpr & VML can both use several threads for doing computations. Let's see +how performance improves by using 1 or 2 threads on a 2-core Intel CPU (Core2 E8400 @ 3.00GHz). Using 1 thread ^^^^^^^^^^^^^^ -Here we have some benchmarks on the improvement of speed that Intel's VML can -achieve. First, look at times by some easy expression containing sine and +Here we have some benchmarks on the improvement of speed that Intel's VML can +achieve. First, look at times by some easy expression containing sine and cosine operations *without* using VML:: In [17]: ne.use_vml Out[17]: False - + In [18]: x = np.linspace(-1, 1, 1e6) - + In [19]: timeit np.sin(x)**2+np.cos(x)**2 10 loops, best of 3: 43.1 ms per loop - + In [20]: ne.set_num_threads(1) Out[20]: 2 - + In [21]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 10 loops, best of 3: 29.5 ms per loop @@ -79,15 +79,15 @@ and now using VML:: In [37]: ne.use_vml Out[37]: True - + In [38]: x = np.linspace(-1, 1, 1e6) - + In [39]: timeit np.sin(x)**2+np.cos(x)**2 10 loops, best of 3: 42.8 ms per loop - + In [40]: ne.set_num_threads(1) Out[40]: 2 - + In [41]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 19.8 ms per loop @@ -96,37 +96,37 @@ Hey, VML can accelerate computations by a 50% using a single CPU. That's great! Using 2 threads ^^^^^^^^^^^^^^^ -First, look at the time of the non-VML numexpr when using 2 threads:: +First, look at the time of the non-VML numexpr when using 2 threads:: In [22]: ne.set_num_threads(2) Out[22]: 1 - + In [23]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 15.3 ms per loop -OK. We've got an almost perfect 2x improvement in speed with regard to the 1 +OK. We've got an almost perfect 2x improvement in speed with regard to the 1 thread case. Let's see about the VML-powered numexpr version:: In [43]: ne.set_num_threads(2) Out[43]: 1 - + In [44]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 12.2 ms per loop -Ok, that's about 1.6x improvement over the 1 thread VML computation, and -still a 25% of improvement over the non-VML version. Good, native numexpr +Ok, that's about 1.6x improvement over the 1 thread VML computation, and +still a 25% of improvement over the non-VML version. Good, native numexpr multithreading code really looks very efficient! Numexpr native threading code vs VML's one ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -You may already know that both numexpr and Intel's VML do have support for -multithreaded computations, but you might be curious about which one is more -efficient, so here it goes a hint. First, using the VML multithreaded +You may already know that both numexpr and Intel's VML do have support for +multithreaded computations, but you might be curious about which one is more +efficient, so here it goes a hint. First, using the VML multithreaded implementation:: In [49]: ne.set_vml_num_threads(2) - + In [50]: ne.set_num_threads(1) Out[50]: 1 @@ -146,14 +146,14 @@ and now, using the native numexpr threading code:: 100 loops, best of 3: 12 ms per loop -This means that numexpr's native multithreaded code is about 40% faster than -VML's for this case. So, in general, you should use the former with numexpr +This means that numexpr's native multithreaded code is about 40% faster than +VML's for this case. So, in general, you should use the former with numexpr (and this is the default actually). Mixing numexpr's and VML multithreading capabilities ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Finally, you might be tempted to use both multithreading codes at the same +Finally, you might be tempted to use both multithreading codes at the same time, but you will be deceived about the improvement in performance:: In [57]: ne.set_vml_num_threads(2) @@ -161,7 +161,7 @@ time, but you will be deceived about the improvement in performance:: In [58]: timeit ne.evaluate('sin(x)**2+cos(x)**2') 100 loops, best of 3: 17.7 ms per loop -Your code actually performs much worse. That's normal too because you are -trying to run 4 threads on a 2-core CPU. For CPUs with many cores, you may -want to try with different threading configurations, but as a rule of thumb, -numexpr's one will generally win. \ No newline at end of file +Your code actually performs much worse. That's normal too because you are +trying to run 4 threads on a 2-core CPU. For CPUs with many cores, you may +want to try with different threading configurations, but as a rule of thumb, +numexpr's one will generally win. diff --git a/doc/release_notes.rst b/doc/release_notes.rst index 081e7f4..51d3212 100644 --- a/doc/release_notes.rst +++ b/doc/release_notes.rst @@ -1,4 +1,4 @@ Release Notes ============= -.. include:: ../RELEASE_NOTES.rst \ No newline at end of file +.. include:: ../RELEASE_NOTES.rst diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 3a3cf63..ce2ff9d 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -30,7 +30,7 @@ and it can also re_evaluate an expression:: Building -------- -*NumExpr* requires Python_ 3.7 or greater, and NumPy_ 1.13 or greater. It is +*NumExpr* requires Python_ 3.7 or greater, and NumPy_ 1.13 or greater. It is built in the standard Python way: .. code-block:: bash @@ -39,7 +39,7 @@ built in the standard Python way: You must have a C-compiler (i.e. MSVC Build tools on Windows and GCC on Linux) installed. -Then change to a directory that is not the repository directory (e.g. `/tmp`) and +Then change to a directory that is not the repository directory (e.g. `/tmp`) and test :code:`numexpr` with: .. code-block:: bash @@ -73,23 +73,23 @@ affect performance). Threadpool Configuration ------------------------ -Threads are spawned at import-time, with the number being set by the environment -variable ``NUMEXPR_MAX_THREADS``. The default maximum thread count is **64**. +Threads are spawned at import-time, with the number being set by the environment +variable ``NUMEXPR_MAX_THREADS``. The default maximum thread count is **64**. There is no advantage to spawning more threads than the number of virtual cores -available on the computing node. Practically NumExpr scales at large thread -count (`> 8`) only on very large matrices (`> 2**22`). Spawning large numbers -of threads is not free, and can increase import times for NumExpr or packages +available on the computing node. Practically NumExpr scales at large thread +count (`> 8`) only on very large matrices (`> 2**22`). Spawning large numbers +of threads is not free, and can increase import times for NumExpr or packages that import it such as Pandas or PyTables. -If desired, the number of threads in the pool used can be adjusted via an -environment variable, ``NUMEXPR_NUM_THREADS`` (preferred) or ``OMP_NUM_THREADS``. -Typically only setting ``NUMEXPR_MAX_THREADS`` is sufficient; the number of -threads used can be adjusted dynamically via ``numexpr.set_num_threads(int)``. +If desired, the number of threads in the pool used can be adjusted via an +environment variable, ``NUMEXPR_NUM_THREADS`` (preferred) or ``OMP_NUM_THREADS``. +Typically only setting ``NUMEXPR_MAX_THREADS`` is sufficient; the number of +threads used can be adjusted dynamically via ``numexpr.set_num_threads(int)``. The number of threads can never exceed that set by ``NUMEXPR_MAX_THREADS``. -If the user has not configured the environment prior to importing NumExpr, info -logs will be generated, and the initial number of threads *that are used*_ will -be set to the number of cores detected in the system or 8, whichever is *less*. +If the user has not configured the environment prior to importing NumExpr, info +logs will be generated, and the initial number of threads *that are used*_ will +be set to the number of cores detected in the system or 8, whichever is *less*. Usage:: @@ -111,16 +111,16 @@ function's frame (through the use of :code:`sys._getframe()`). Alternatively, they can be specified using the :code:`local_dict` or :code:`global_dict` arguments, or passed as keyword arguments. -The :code:`optimization` parameter can take the values :code:`'moderate'` -or :code:`'aggressive'`. :code:`'moderate'` means that no optimization is made -that can affect precision at all. :code:`'aggressive'` (the default) means that -the expression can be rewritten in a way that precision *could* be affected, but -normally very little. For example, in :code:`'aggressive'` mode, the -transformation :code:`x~**3` -> :code:`x*x*x` is made, but not in +The :code:`optimization` parameter can take the values :code:`'moderate'` +or :code:`'aggressive'`. :code:`'moderate'` means that no optimization is made +that can affect precision at all. :code:`'aggressive'` (the default) means that +the expression can be rewritten in a way that precision *could* be affected, but +normally very little. For example, in :code:`'aggressive'` mode, the +transformation :code:`x~**3` -> :code:`x*x*x` is made, but not in :code:`'moderate'` mode. -The `truediv` parameter specifies whether the division is a 'floor division' -(False) or a 'true division' (True). The default is the value of +The `truediv` parameter specifies whether the division is a 'floor division' +(False) or a 'true division' (True). The default is the value of `__future__.division` in the interpreter. See PEP 238 for details. Expressions are cached, so reuse is fast. Arrays or scalars are @@ -164,22 +164,22 @@ Casting rules in NumExpr follow closely those of *NumPy*. However, for implementation reasons, there are some known exceptions to this rule, namely: - * When an array with type :code:`int8`, :code:`uint8`, :code:`int16` or - :code:`uint16` is used inside NumExpr, it is internally upcasted to an - :code:`int` (or :code:`int32` in NumPy notation). - * When an array with type :code:`uint32` is used inside NumExpr, it is - internally upcasted to a :code:`long` (or :code:`int64` in NumPy notation). - * A floating point function (e.g. :code:`sin`) acting on :code:`int8` or - :code:`int16` types returns a :code:`float64` type, instead of the - :code:`float32` that is returned by NumPy functions. This is mainly due + * When an array with type :code:`int8`, :code:`uint8`, :code:`int16` or + :code:`uint16` is used inside NumExpr, it is internally upcasted to an + :code:`int` (or :code:`int32` in NumPy notation). + * When an array with type :code:`uint32` is used inside NumExpr, it is + internally upcasted to a :code:`long` (or :code:`int64` in NumPy notation). + * A floating point function (e.g. :code:`sin`) acting on :code:`int8` or + :code:`int16` types returns a :code:`float64` type, instead of the + :code:`float32` that is returned by NumPy functions. This is mainly due to the absence of native :code:`int8` or :code:`int16` types in NumExpr. - * In operations implying a scalar and an array, the normal rules of casting - are used in NumExpr, in contrast with NumPy, where array types takes - priority. For example, if :code:`a` is an array of type :code:`float32` - and :code:`b` is an scalar of type :code:`float64` (or Python :code:`float` - type, which is equivalent), then :code:`a*b` returns a :code:`float64` in - NumExpr, but a :code:`float32` in NumPy (i.e. array operands take priority - in determining the result type). If you need to keep the result a + * In operations implying a scalar and an array, the normal rules of casting + are used in NumExpr, in contrast with NumPy, where array types takes + priority. For example, if :code:`a` is an array of type :code:`float32` + and :code:`b` is an scalar of type :code:`float64` (or Python :code:`float` + type, which is equivalent), then :code:`a*b` returns a :code:`float64` in + NumExpr, but a :code:`float32` in NumPy (i.e. array operands take priority + in determining the result type). If you need to keep the result a :code:`float32`, be sure you use a :code:`float32` scalar too. @@ -199,42 +199,42 @@ Supported functions The next are the current supported set: - * :code:`where(bool, number1, number2): number` -- number1 if the bool condition + * :code:`where(bool, number1, number2): number` -- number1 if the bool condition is true, number2 otherwise. - * :code:`{sin,cos,tan}(float|complex): float|complex` -- trigonometric sine, + * :code:`{sin,cos,tan}(float|complex): float|complex` -- trigonometric sine, cosine or tangent. - * :code:`{arcsin,arccos,arctan}(float|complex): float|complex` -- trigonometric + * :code:`{arcsin,arccos,arctan}(float|complex): float|complex` -- trigonometric inverse sine, cosine or tangent. - * :code:`arctan2(float1, float2): float` -- trigonometric inverse tangent of + * :code:`arctan2(float1, float2): float` -- trigonometric inverse tangent of float1/float2. - * :code:`{sinh,cosh,tanh}(float|complex): float|complex` -- hyperbolic sine, + * :code:`{sinh,cosh,tanh}(float|complex): float|complex` -- hyperbolic sine, cosine or tangent. - * :code:`{arcsinh,arccosh,arctanh}(float|complex): float|complex` -- hyperbolic + * :code:`{arcsinh,arccosh,arctanh}(float|complex): float|complex` -- hyperbolic inverse sine, cosine or tangent. - * :code:`{log,log10,log1p}(float|complex): float|complex` -- natural, base-10 and + * :code:`{log,log10,log1p}(float|complex): float|complex` -- natural, base-10 and log(1+x) logarithms. - * :code:`{exp,expm1}(float|complex): float|complex` -- exponential and exponential + * :code:`{exp,expm1}(float|complex): float|complex` -- exponential and exponential minus one. * :code:`sqrt(float|complex): float|complex` -- square root. * :code:`abs(float|complex): float|complex` -- absolute value. * :code:`conj(complex): complex` -- conjugate value. * :code:`{real,imag}(complex): float` -- real or imaginary part of complex. - * :code:`complex(float, float): complex` -- complex from real and imaginary + * :code:`complex(float, float): complex` -- complex from real and imaginary parts. - * :code:`contains(np.str, np.str): bool` -- returns True for every string in :code:`op1` that + * :code:`contains(np.str, np.str): bool` -- returns True for every string in :code:`op1` that contains :code:`op2`. Notes ----- * :code:`abs()` for complex inputs returns a :code:`complex` output too. This is a - departure from NumPy where a :code:`float` is returned instead. However, - NumExpr is not flexible enough yet so as to allow this to happen. - Meanwhile, if you want to mimic NumPy behaviour, you may want to select the - real part via the :code:`real` function (e.g. :code:`real(abs(cplx))`) or via the + departure from NumPy where a :code:`float` is returned instead. However, + NumExpr is not flexible enough yet so as to allow this to happen. + Meanwhile, if you want to mimic NumPy behaviour, you may want to select the + real part via the :code:`real` function (e.g. :code:`real(abs(cplx))`) or via the :code:`real` selector (e.g. :code:`abs(cplx).real`). -More functions can be added if you need them. Note however that NumExpr 2.6 is +More functions can be added if you need them. Note however that NumExpr 2.6 is in maintenance mode and a new major revision is under development. Supported reduction operations @@ -242,12 +242,12 @@ Supported reduction operations The next are the current supported set: - * :code:`sum(number, axis=None)`: Sum of array elements over a given axis. + * :code:`sum(number, axis=None)`: Sum of array elements over a given axis. Negative axis are not supported. - * :code:`prod(number, axis=None)`: Product of array elements over a given axis. + * :code:`prod(number, axis=None)`: Product of array elements over a given axis. Negative axis are not supported. -*Note:* because of internal limitations, reduction operations must appear the +*Note:* because of internal limitations, reduction operations must appear the last in the stack. If not, it will be issued an error like:: >>> ne.evaluate('sum(1)*(-1)') @@ -256,23 +256,23 @@ last in the stack. If not, it will be issued an error like:: General routines ---------------- - * :code:`evaluate(expression, local_dict=None, global_dict=None, - optimization='aggressive', truediv='auto')`: Evaluate a simple array + * :code:`evaluate(expression, local_dict=None, global_dict=None, + optimization='aggressive', truediv='auto')`: Evaluate a simple array expression element-wise. See examples above. - * :code:`re_evaluate(local_dict=None)`: Re-evaluate the last array expression - without any check. This is meant for accelerating loops that are re-evaluating - the same expression repeatedly without changing anything else than the operands. + * :code:`re_evaluate(local_dict=None)`: Re-evaluate the last array expression + without any check. This is meant for accelerating loops that are re-evaluating + the same expression repeatedly without changing anything else than the operands. If unsure, use evaluate() which is safer. * :code:`test()`: Run all the tests in the test suite. * :code:`print_versions()`: Print the versions of software that numexpr relies on. - * :code:`set_num_threads(nthreads)`: Sets a number of threads to be used in operations. - Returns the previous setting for the number of threads. See note below to see + * :code:`set_num_threads(nthreads)`: Sets a number of threads to be used in operations. + Returns the previous setting for the number of threads. See note below to see how the number of threads is set via environment variables. - If you are using VML, you may want to use *set_vml_num_threads(nthreads)* to - perform the parallel job with VML instead. However, you should get very - similar performance with VML-optimized functions, and VML's parallelizer - cannot deal with common expressions like `(x+1)*(x-2)`, while NumExpr's + If you are using VML, you may want to use *set_vml_num_threads(nthreads)* to + perform the parallel job with VML instead. However, you should get very + similar performance with VML-optimized functions, and VML's parallelizer + cannot deal with common expressions like `(x+1)*(x-2)`, while NumExpr's one can. * :code:`detect_number_of_cores()`: Detects the number of cores on a system. @@ -324,4 +324,4 @@ License NumExpr is distributed under the MIT_ license. -.. _MIT: http://www.opensource.org/licenses/mit-license.php \ No newline at end of file +.. _MIT: http://www.opensource.org/licenses/mit-license.php diff --git a/doc/vm2.rst b/doc/vm2.rst index 45e9fc9..01c9826 100644 --- a/doc/vm2.rst +++ b/doc/vm2.rst @@ -1,32 +1,32 @@ Performance of the Virtual Machine in NumExpr2.0 ================================================ -Numexpr 2.0 leverages a new virtual machine completely based on the new ndarray -iterator introduced in NumPy 1.6. This represents a nice combination of the -advantages of using the new iterator, while retaining the ability to avoid -copies in memory as well as the multi-threading capabilities of the previous +Numexpr 2.0 leverages a new virtual machine completely based on the new ndarray +iterator introduced in NumPy 1.6. This represents a nice combination of the +advantages of using the new iterator, while retaining the ability to avoid +copies in memory as well as the multi-threading capabilities of the previous virtual machine (1.x series). -The increased performance of the new virtual machine can be seen in several +The increased performance of the new virtual machine can be seen in several scenarios, like: - * *Broadcasting*. Expressions containing arrays that needs to be broadcasted, + * *Broadcasting*. Expressions containing arrays that needs to be broadcasted, will not need additional memory (i.e. they will be broadcasted on-the-fly). - * *Non-native dtypes*. These will be translated to native dtypes on-the-fly, + * *Non-native dtypes*. These will be translated to native dtypes on-the-fly, so there is not need to convert the whole arrays first. - * *Fortran-ordered arrays*. The new iterator will find the best path to + * *Fortran-ordered arrays*. The new iterator will find the best path to optimize operations on such arrays, without the need to transpose them first. -There is a drawback though: performance with small arrays suffers a bit because -of higher set-up times for the new virtual machine. See below for detailed +There is a drawback though: performance with small arrays suffers a bit because +of higher set-up times for the new virtual machine. See below for detailed benchmarks. Some benchmarks for best-case scenarios --------------------------------------- -Here you have some benchmarks of some scenarios where the new virtual machine -actually represents an advantage in terms of speed (also memory, but this is -not shown here). As you will see, the improvement is notable in many areas, +Here you have some benchmarks of some scenarios where the new virtual machine +actually represents an advantage in terms of speed (also memory, but this is +not shown here). As you will see, the improvement is notable in many areas, ranging from 3x to 6x faster operations. Broadcasting @@ -85,7 +85,7 @@ Mix of 'non-native' arrays, Fortran-ordered, and using broadcasting Longer setup-time ^^^^^^^^^^^^^^^^^ -The only drawback of the new virtual machine is during the computation of +The only drawback of the new virtual machine is during the computation of small arrays:: >>> a = np.arange(10) @@ -98,8 +98,8 @@ small arrays:: 10000 loops, best of 3: 30.6 µs per loop -i.e. the new virtual machine takes a bit more time to set-up (around 8 µs in -this machine). However, this should be not too important because for such a +i.e. the new virtual machine takes a bit more time to set-up (around 8 µs in +this machine). However, this should be not too important because for such a small arrays NumPy is always a better option:: >>> timeit c = a*(b+1) @@ -121,8 +121,8 @@ And for arrays large enough the difference is negligible:: Conclusion ---------- -The new virtual machine introduced in numexpr 2.0 brings more performance in -many different scenarios (broadcast, non-native dtypes, Fortran-orderd arrays), -while it shows slightly worse performance for small arrays. However, as -numexpr is more geared to compute large arrays, the new virtual machine should -be good news for numexpr users in general. \ No newline at end of file +The new virtual machine introduced in numexpr 2.0 brings more performance in +many different scenarios (broadcast, non-native dtypes, Fortran-orderd arrays), +while it shows slightly worse performance for small arrays. However, as +numexpr is more geared to compute large arrays, the new virtual machine should +be good news for numexpr users in general. diff --git a/issues/issue418.py b/issues/issue418.py index b871c65..31ca2fc 100644 --- a/issues/issue418.py +++ b/issues/issue418.py @@ -1,7 +1,9 @@ +from time import perf_counter as pc + +import matplotlib.pyplot as plt import numpy as np + import numexpr as ne -import matplotlib.pyplot as plt -from time import perf_counter as pc # geomspace seems to be very slow, just a warning about setting `n` too high. # n = 2**24 diff --git a/numexpr/__init__.py b/numexpr/__init__.py index 648b869..63bb9e9 100644 --- a/numexpr/__init__.py +++ b/numexpr/__init__.py @@ -21,21 +21,20 @@ """ -from numexpr.interpreter import MAX_THREADS, use_vml, __BLOCK_SIZE1__ +from numexpr.interpreter import __BLOCK_SIZE1__, MAX_THREADS, use_vml is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE -# cpuinfo imports were moved into the test submodule function that calls them +# cpuinfo imports were moved into the test submodule function that calls them # to improve import times. from numexpr.expressions import E -from numexpr.necompiler import (NumExpr, disassemble, evaluate, re_evaluate, - validate) - -from numexpr.utils import (_init_num_threads, - get_vml_version, set_vml_accuracy_mode, set_vml_num_threads, - set_num_threads, get_num_threads, - detect_number_of_cores, detect_number_of_threads) +from numexpr.necompiler import (NumExpr, disassemble, evaluate, re_evaluate, + validate) +from numexpr.utils import (_init_num_threads, detect_number_of_cores, + detect_number_of_threads, get_num_threads, + get_vml_version, set_num_threads, + set_vml_accuracy_mode, set_vml_num_threads) # Detect the number of cores ncores = detect_number_of_cores() @@ -45,6 +44,7 @@ # set_vml_num_threads(1) from . import version + __version__ = version.version def print_versions(): @@ -63,4 +63,4 @@ def test(verbosity=1): return numexpr.tests.test(verbosity=verbosity) except ImportError: # To maintain Python 2.6 compatibility we have simple error handling - raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') \ No newline at end of file + raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') diff --git a/numexpr/cpuinfo.py b/numexpr/cpuinfo.py index 4a57d3c..897a4ca 100755 --- a/numexpr/cpuinfo.py +++ b/numexpr/cpuinfo.py @@ -23,12 +23,14 @@ __all__ = ['cpu'] -import sys, re, types +import inspect import os +import platform +import re import subprocess +import sys +import types import warnings -import platform -import inspect is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE diff --git a/numexpr/expressions.py b/numexpr/expressions.py index 419d7dc..5924c5f 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -35,6 +35,7 @@ from numexpr import interpreter + class Expression(): def __getattr__(self, name): @@ -269,10 +270,10 @@ def rtruediv_op(a, b): @ophelper def pow_op(a, b): - + if isinstance(b, ConstantNode): x = b.value - if ( a.astKind in ('int', 'long') and + if ( a.astKind in ('int', 'long') and b.astKind in ('int', 'long') and x < 0) : raise ValueError( 'Integers to negative integer powers are not allowed.') diff --git a/numexpr/interp_body.cpp b/numexpr/interp_body.cpp index 09b9da9..573ce8c 100644 --- a/numexpr/interp_body.cpp +++ b/numexpr/interp_body.cpp @@ -7,13 +7,13 @@ See LICENSE.txt for details about copyright and rights to use. **********************************************************************/ -// WARNING: This file is included multiple times in `interpreter.cpp`. It is -// essentially a very macro-heavy jump table. Interpretation is best done by +// WARNING: This file is included multiple times in `interpreter.cpp`. It is +// essentially a very macro-heavy jump table. Interpretation is best done by // the developer by expanding all macros (e.g. adding `'-E'` to the `extra_cflags` // argument in `setup.py` and looking at the resulting `interpreter.cpp`. // -// Changes made to this file will not be recognized by the compile, so the developer -// must make a trivial change is made to `interpreter.cpp` or delete the `build/` +// Changes made to this file will not be recognized by the compile, so the developer +// must make a trivial change is made to `interpreter.cpp` or delete the `build/` // directory in-between each build. { #define VEC_LOOP(expr) for(j = 0; j < BLOCK_SIZE; j++) { \ diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index 32f6c37..dbfcca1 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -25,7 +25,7 @@ #define fmin min #define NE_INFINITY (DBL_MAX+DBL_MAX) #define NE_NAN (INFINITY-INFINITY) -#else +#else #define NE_INFINITY INFINITY #define NE_NAN NAN #endif @@ -1262,7 +1262,7 @@ NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds) PyArrayObject *singleton; bool writeback; // NOTE: cannot assign on declaration due to `goto` statements - singleton = NULL; + singleton = NULL; writeback = false; if (n_inputs == 0) { char retsig = get_return_sig(self->program); @@ -1321,10 +1321,10 @@ NumExpr_run(NumExprObject *self, PyObject *args, PyObject *kwds) /* Allocate the iterator or nested iterators */ if (reduction_size < 0 || full_reduction) { /* When there's no reduction, reduction_size is 1 as well */ - // RAM: in issue #277 this was also the case for reductions on arrays - // with axis=0 having singleton dimension, i.e. such ops were interpreted - // as full_reductions when they weren't in Numpy. As such, the default - // reduction_size is now -1 and we add the flag for full_reduction, + // RAM: in issue #277 this was also the case for reductions on arrays + // with axis=0 having singleton dimension, i.e. such ops were interpreted + // as full_reductions when they weren't in Numpy. As such, the default + // reduction_size is now -1 and we add the flag for full_reduction, // e.g. ne.evaluate("sum(a)")" iter = NpyIter_AdvancedNew(n_inputs+1, operands, NPY_ITER_BUFFERED| diff --git a/numexpr/interpreter.hpp b/numexpr/interpreter.hpp index f9ac1c7..93c6e49 100644 --- a/numexpr/interpreter.hpp +++ b/numexpr/interpreter.hpp @@ -75,7 +75,7 @@ struct thread_data { int ret_code; int *pc_error; char **errmsg; - // NOTE: memsteps, iter, and reduce_iter are arrays, they MUST be allocated + // NOTE: memsteps, iter, and reduce_iter are arrays, they MUST be allocated // to length `global_max_threads` before module load. // One memsteps array per thread // npy_intp *memsteps[MAX_THREADS]; diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 66b5b77..345add4 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -380,7 +380,7 @@ Py_set_num_threads(PyObject *self, PyObject *args) } static PyObject* -Py_get_num_threads(PyObject *self, PyObject *args) +Py_get_num_threads(PyObject *self, PyObject *args) { int n_thread; n_thread = gs.nthreads; diff --git a/numexpr/module.hpp b/numexpr/module.hpp index cf7b571..079a17f 100644 --- a/numexpr/module.hpp +++ b/numexpr/module.hpp @@ -23,7 +23,7 @@ struct global_state { int end_threads; /* should exisiting threads end? */ // pthread_t threads[MAX_THREADS]; /* opaque structure for threads */ // int tids[MAX_THREADS]; /* ID per each thread */ - /* NOTE: threads and tids are arrays, they MUST be allocated to length + /* NOTE: threads and tids are arrays, they MUST be allocated to length `global_max_threads` before module load. */ pthread_t *threads; /* opaque structure for threads */ int *tids; /* ID per each thread */ @@ -36,7 +36,7 @@ struct global_state { /* Synchronization variables for threadpool state */ pthread_mutex_t count_mutex; int count_threads; - int barrier_passed; /* indicates if the thread pool's thread barrier + int barrier_passed; /* indicates if the thread pool's thread barrier is unlocked and ready for the VM to process.*/ pthread_mutex_t count_threads_mutex; pthread_cond_t count_threads_cv; diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 296c41b..4ada878 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -8,17 +8,18 @@ # rights to use. #################################################################### -from typing import Optional, Dict import __future__ -import sys + import os -import threading import re +import sys +import threading +from typing import Dict, Optional import numpy is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE -from numexpr import interpreter, expressions, use_vml +from numexpr import expressions, interpreter, use_vml from numexpr.utils import CacheDict, ContextDict # Declare a double type that does not exist in Python space @@ -28,7 +29,7 @@ int_ = numpy.int32 long_ = numpy.int64 -typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float', 'd': 'double', +typecode_to_kind = {'b': 'bool', 'i': 'int', 'l': 'long', 'f': 'float', 'd': 'double', 'c': 'complex', 'n': 'none', 's': 'str'} kind_to_typecode = {'bool': 'b', 'int': 'i', 'long': 'l', 'float': 'f', 'double': 'd', 'complex': 'c', 'bytes': 's', 'str': 's', 'none': 'n'} @@ -104,11 +105,11 @@ def __eq__(self, other): if getattr(self, name) != getattr(other, name): return False return True - + def __lt__(self,other): - # RAM: this is a fix for issue #88 whereby sorting on constants + # RAM: this is a fix for issue #88 whereby sorting on constants # that may be of astKind == 'complex' but type(self.value) == int or float - # Here we let NumPy sort as it will cast data properly for comparison + # Here we let NumPy sort as it will cast data properly for comparison # when the Python built-ins will raise an error. if self.astType == 'constant': if self.astKind == other.astKind: @@ -271,7 +272,7 @@ def __str__(self): def stringToExpression(s, types, context, sanitize: bool=True): """Given a string, convert it to a tree of ExpressionNode's. """ - # sanitize the string for obvious attack vectors that NumExpr cannot + # sanitize the string for obvious attack vectors that NumExpr cannot # parse into its homebrew AST. This is to protect the call to `eval` below. # We forbid `;`, `:`. `[` and `__`, and attribute access via '.'. # We cannot ban `.real` or `.imag` however... @@ -281,7 +282,7 @@ def stringToExpression(s, types, context, sanitize: bool=True): skip_quotes = re.sub(r'(\'[^\']*\')', '', no_whitespace) if _blacklist_re.search(skip_quotes) is not None: raise ValueError(f'Expression {s} has forbidden control characters.') - + old_ctx = expressions._context.get_current_context() try: expressions._context.set_new_context(context) @@ -307,7 +308,7 @@ def stringToExpression(s, types, context, sanitize: bool=True): # now build the expression ex = eval(c, names) - + if expressions.isConstant(ex): ex = expressions.ConstantNode(ex, expressions.getKind(ex)) elif not isinstance(ex, expressions.ExpressionNode): @@ -363,7 +364,7 @@ def getConstants(ast): a = 1 + 3j; b = 5.0 ne.evaluate('a*2 + 15j - b') """ - constant_registers = set([node.reg for node in ast.allOf("constant")]) + constant_registers = set([node.reg for node in ast.allOf("constant")]) constants_order = sorted([r.node for r in constant_registers]) constants = [convertConstantToKind(a.value, a.astKind) for a in constants_order] @@ -557,7 +558,7 @@ def getContext(kwargs, _frame_depth=1): context[name] = value else: raise ValueError("'%s' must be one of %s" % (name, allowed)) - + if d: raise ValueError("Unknown keyword argument '%s'" % d.popitem()[0]) if context['truediv'] == 'auto': @@ -657,7 +658,7 @@ def disassemble(nex): def parseOp(op): name, sig = [*op.rsplit(b'_', 1), ''][:2] - return name, sig + return name, sig def getArg(pc, offset): arg = nex.program[pc + (offset if offset < 4 else offset+1)] @@ -752,7 +753,7 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): if global_dict is None: global_dict = frame_globals - # If `call_frame` is the top frame of the interpreter we can't clear its + # If `call_frame` is the top frame of the interpreter we can't clear its # `local_dict`, because it is actually the `global_dict`. clear_local_dict = clear_local_dict and not frame_globals is local_dict @@ -782,18 +783,18 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): _numexpr_last = threading.local() evaluate_lock = threading.Lock() -def validate(ex: str, - local_dict: Optional[Dict] = None, +def validate(ex: str, + local_dict: Optional[Dict] = None, global_dict: Optional[Dict] = None, - out: numpy.ndarray = None, - order: str = 'K', - casting: str = 'safe', + out: numpy.ndarray = None, + order: str = 'K', + casting: str = 'safe', _frame_depth: int = 2, sanitize: Optional[bool] = None, **kwargs) -> Optional[Exception]: r""" Validate a NumExpr expression with the given `local_dict` or `locals()`. - Returns `None` on success and the Exception object if one occurs. Note that + Returns `None` on success and the Exception object if one occurs. Note that you can proceed directly to call `re_evaluate()` if you use `validate()` to sanitize your expressions and variables in advance. @@ -838,22 +839,22 @@ def validate(ex: str, * 'unsafe' means any data conversions may be done. sanitize: Optional[bool] - Both `validate` and by extension `evaluate` call `eval(ex)`, which is - potentially dangerous on unsanitized inputs. As such, NumExpr by default - performs simple sanitization, banning the character ':;[', the + Both `validate` and by extension `evaluate` call `eval(ex)`, which is + potentially dangerous on unsanitized inputs. As such, NumExpr by default + performs simple sanitization, banning the character ':;[', the dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'. - - Using `None` defaults to `True` unless the environment variable - `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`. + + Using `None` defaults to `True` unless the environment variable + `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`. Nominally this can be set via `os.environ` before `import numexpr`. _frame_depth: int - The calling frame depth. Unless you are a NumExpr developer you should + The calling frame depth. Unless you are a NumExpr developer you should not set this value. Note ---- - + """ global _numexpr_last if not hasattr(_numexpr_last, 'l'): @@ -866,10 +867,10 @@ def validate(ex: str, _numexpr_cache.c = CacheDict(256) try: - + if not isinstance(ex, str): raise ValueError("must specify expression as a string") - + if sanitize is None: if 'NUMEXPR_SANITIZE' in os.environ: sanitize = bool(int(os.environ['NUMEXPR_SANITIZE'])) @@ -901,12 +902,12 @@ def validate(ex: str, return e return None -def evaluate(ex: str, - local_dict: Optional[Dict] = None, +def evaluate(ex: str, + local_dict: Optional[Dict] = None, global_dict: Optional[Dict] = None, - out: numpy.ndarray = None, - order: str = 'K', - casting: str = 'safe', + out: numpy.ndarray = None, + order: str = 'K', + casting: str = 'safe', sanitize: Optional[bool] = None, _frame_depth: int = 3, **kwargs) -> numpy.ndarray: @@ -959,27 +960,27 @@ def evaluate(ex: str, performs simple sanitization, banning the characters ':;[', the dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'. - Using `None` defaults to `True` unless the environment variable - `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`. + Using `None` defaults to `True` unless the environment variable + `NUMEXPR_SANITIZE=0` is set, in which case the default is `False`. Nominally this can be set via `os.environ` before `import numexpr`. _frame_depth: int - The calling frame depth. Unless you are a NumExpr developer you should + The calling frame depth. Unless you are a NumExpr developer you should not set this value. """ - # We could avoid code duplication if we called validate and then re_evaluate + # We could avoid code duplication if we called validate and then re_evaluate # here, but we have difficulties with the `sys.getframe(2)` call in # `getArguments` - e = validate(ex, local_dict=local_dict, global_dict=global_dict, - out=out, order=order, casting=casting, + e = validate(ex, local_dict=local_dict, global_dict=global_dict, + out=out, order=order, casting=casting, _frame_depth=_frame_depth, sanitize=sanitize, **kwargs) if e is None: return re_evaluate(local_dict=local_dict, global_dict=global_dict, _frame_depth=_frame_depth) else: raise e - -def re_evaluate(local_dict: Optional[Dict] = None, + +def re_evaluate(local_dict: Optional[Dict] = None, global_dict: Optional[Dict] = None, _frame_depth: int=2) -> numpy.ndarray: """ @@ -994,7 +995,7 @@ def re_evaluate(local_dict: Optional[Dict] = None, local_dict: dictionary, optional A dictionary that replaces the local operands in current frame. _frame_depth: int - The calling frame depth. Unless you are a NumExpr developer you should + The calling frame depth. Unless you are a NumExpr developer you should not set this value. """ global _numexpr_last diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 0663c6d..2bf0091 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -19,7 +19,7 @@ #define BLOCK_SIZE1 1024 #endif -// The default threadpool size. It's prefer that the user set this via an +// The default threadpool size. It's prefer that the user set this via an // environment variable, "NUMEXPR_MAX_THREADS" #define DEFAULT_MAX_THREADS 64 diff --git a/numexpr/numexpr_object.cpp b/numexpr/numexpr_object.cpp index e788d1c..b20aef0 100644 --- a/numexpr/numexpr_object.cpp +++ b/numexpr/numexpr_object.cpp @@ -405,4 +405,3 @@ PyTypeObject NumExprType = { 0, /* tp_alloc */ NumExpr_new, /* tp_new */ }; - diff --git a/numexpr/tests/__init__.py b/numexpr/tests/__init__.py index 3fff411..f47c8cc 100644 --- a/numexpr/tests/__init__.py +++ b/numexpr/tests/__init__.py @@ -8,7 +8,7 @@ # rights to use. #################################################################### -from numexpr.tests.test_numexpr import test, print_versions +from numexpr.tests.test_numexpr import print_versions, test if __name__ == '__main__': test() diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 98ae459..2731b32 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -11,33 +11,32 @@ import os -import sys import platform +import subprocess +import sys +import unittest import warnings from contextlib import contextmanager -import subprocess +from unittest.mock import MagicMock -import numpy as np -from numpy import ( - array, arange, empty, zeros, int32, int64, uint16, cdouble, float64, rec, - copy, ones_like, where, all as alltrue, linspace, - sum, prod, sqrt, fmod, floor, ceil, - sin, cos, tan, arcsin, arccos, arctan, arctan2, - sinh, cosh, tanh, arcsinh, arccosh, arctanh, - log, log1p, log10, exp, expm1, conj) import numpy -from numpy.testing import (assert_equal, assert_array_equal, - assert_array_almost_equal, assert_allclose) -from numpy import shape, allclose, array_equal, ravel, isnan, isinf +import numpy as np +from numpy import all as alltrue +from numpy import (allclose, arange, arccos, arccosh, arcsin, arcsinh, arctan, + arctan2, arctanh, array, array_equal, cdouble, ceil, conj, + copy, cos, cosh, empty, exp, expm1, float64, floor, fmod, + int32, int64, isinf, isnan, linspace, log, log1p, log10, + ones_like, prod, ravel, rec, shape, sin, sinh, sqrt, sum, + tan, tanh, uint16, where, zeros) +from numpy.testing import (assert_allclose, assert_array_almost_equal, + assert_array_equal, assert_equal) import numexpr -from numexpr import E, NumExpr, evaluate, re_evaluate, validate, disassemble, use_vml +from numexpr import (E, NumExpr, disassemble, evaluate, re_evaluate, use_vml, + validate) from numexpr.expressions import ConstantNode from numexpr.utils import detect_number_of_cores -import unittest -from unittest.mock import MagicMock - try: import pytest pytest_available = True @@ -1351,9 +1350,10 @@ def test_multiprocess(self): def print_versions(): """Print the versions of software that numexpr relies on.""" # from pkg_resources import parse_version - from numexpr.cpuinfo import cpu import platform + from numexpr.cpuinfo import cpu + print('-=' * 38) print('Numexpr version: %s' % numexpr.__version__) print('NumPy version: %s' % np.__version__) @@ -1394,8 +1394,8 @@ def test(verbosity=1): def suite(): - import unittest import platform as pl + import unittest theSuite = unittest.TestSuite() niter = 1 diff --git a/numexpr/utils.py b/numexpr/utils.py index cc61833..9e45fbe 100644 --- a/numexpr/utils.py +++ b/numexpr/utils.py @@ -9,20 +9,22 @@ #################################################################### import logging + log = logging.getLogger(__name__) +import contextvars import os import subprocess -import contextvars -from numexpr.interpreter import _set_num_threads, _get_num_threads, MAX_THREADS from numexpr import use_vml +from numexpr.interpreter import MAX_THREADS, _get_num_threads, _set_num_threads + from . import version if use_vml: - from numexpr.interpreter import ( - _get_vml_version, _set_vml_accuracy_mode, _set_vml_num_threads, - _get_vml_num_threads) + from numexpr.interpreter import (_get_vml_num_threads, _get_vml_version, + _set_vml_accuracy_mode, + _set_vml_num_threads) def get_vml_version(): @@ -118,9 +120,9 @@ def get_num_threads(): def _init_num_threads(): """ - Detects the environment variable 'NUMEXPR_MAX_THREADS' to set the threadpool - size, and if necessary the slightly redundant 'NUMEXPR_NUM_THREADS' or - 'OMP_NUM_THREADS' env vars to set the initial number of threads used by + Detects the environment variable 'NUMEXPR_MAX_THREADS' to set the threadpool + size, and if necessary the slightly redundant 'NUMEXPR_NUM_THREADS' or + 'OMP_NUM_THREADS' env vars to set the initial number of threads used by the virtual machine. """ # Any platform-specific short-circuits @@ -140,7 +142,7 @@ def _init_num_threads(): env_configured = True n_cores = MAX_THREADS else: - # The use has not set 'NUMEXPR_MAX_THREADS', so likely they have not + # The use has not set 'NUMEXPR_MAX_THREADS', so likely they have not # configured NumExpr as desired, so we emit info logs. if n_cores > MAX_THREADS: log.info('Note: detected %d virtual cores but NumExpr set to maximum of %d, check "NUMEXPR_MAX_THREADS" environment variable.'%(n_cores, MAX_THREADS)) @@ -149,7 +151,7 @@ def _init_num_threads(): log.info('Note: NumExpr detected %d cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.'%n_cores) n_cores = 16 - # Now we check for 'NUMEXPR_NUM_THREADS' or 'OMP_NUM_THREADS' to set the + # Now we check for 'NUMEXPR_NUM_THREADS' or 'OMP_NUM_THREADS' to set the # actual number of threads used. if 'NUMEXPR_NUM_THREADS' in os.environ and os.environ['NUMEXPR_NUM_THREADS'] != '': requested_threads = int(os.environ['NUMEXPR_NUM_THREADS']) @@ -165,7 +167,7 @@ def _init_num_threads(): set_num_threads(requested_threads) return requested_threads - + def detect_number_of_cores(): """ Detects the number of cores on a system. Cribbed from pp. diff --git a/numexpr/win32/stdint.h b/numexpr/win32/stdint.h index b7e7112..c66267a 100644 --- a/numexpr/win32/stdint.h +++ b/numexpr/win32/stdint.h @@ -17,7 +17,7 @@ * * mwb: This was modified in the following ways: * - * - make it compatible with Visual C++ 6 (which uses + * - make it compatible with Visual C++ 6 (which uses * non-standard keywords and suffixes for 64-bit types) * - some environments need stddef.h included (for wchar stuff?) * - handle the fact that Microsoft's limits.h header defines @@ -70,9 +70,9 @@ typedef unsigned uint_least32_t; typedef __STDINT_LONGLONG int_least64_t; typedef unsigned __STDINT_LONGLONG uint_least64_t; -/* 7.18.1.3 Fastest minimum-width integer types +/* 7.18.1.3 Fastest minimum-width integer types * Not actually guaranteed to be fastest for all purposes - * Here we use the exact-width types for 8 and 16-bit ints. + * Here we use the exact-width types for 8 and 16-bit ints. */ typedef char int_fast8_t; typedef unsigned char uint_fast8_t; @@ -110,7 +110,7 @@ typedef unsigned __STDINT_LONGLONG uintmax_t; #if !defined ( __cplusplus) || defined (__STDC_LIMIT_MACROS) /* 7.18.2.1 Limits of exact-width integer types */ -#define INT8_MIN (-128) +#define INT8_MIN (-128) #define INT16_MIN (-32768) #define INT32_MIN (-2147483647 - 1) #define INT64_MIN (PASTE( -9223372036854775807, __STDINT_LONGLONG_SUFFIX) - 1) @@ -158,7 +158,7 @@ typedef unsigned __STDINT_LONGLONG uintmax_t; #define UINT_FAST64_MAX UINT64_MAX /* 7.18.2.4 Limits of integer types capable of holding - object pointers */ + object pointers */ #ifdef _WIN64 #define INTPTR_MIN INT64_MIN #define INTPTR_MAX INT64_MAX @@ -186,7 +186,7 @@ typedef unsigned __STDINT_LONGLONG uintmax_t; #define SIZE_MAX UINTPTR_MAX #endif -#ifndef WCHAR_MIN /* also in wchar.h */ +#ifndef WCHAR_MIN /* also in wchar.h */ #define WCHAR_MIN 0 #define WCHAR_MAX ((wchar_t)-1) /* UINT16_MAX */ #endif diff --git a/setup.py b/setup.py index 82f3651..64d9f20 100644 --- a/setup.py +++ b/setup.py @@ -9,12 +9,13 @@ # rights to use. #################################################################### -import os, os.path as op -import platform import configparser -import numpy as np -from setuptools import setup, Extension +import os +import os.path as op +import platform +import numpy as np +from setuptools import Extension, setup with open('requirements.txt') as f: requirements = f.read().splitlines() @@ -40,7 +41,7 @@ libs = [] # Pre-built libraries ONLY, like python36.so clibs = [] def_macros = [ - # keep in sync with minimal runtime requirement (requirements.txt) + # keep in sync with minimal runtime requirement (requirements.txt) ('NPY_TARGET_VERSION', 'NPY_1_23_API_VERSION') ] sources = ['numexpr/interpreter.cpp', From ccade0b6228a715e6e5a017912109b39aafa36f1 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:56:10 +0100 Subject: [PATCH 033/166] Remove flake8 for now --- .pre-commit-config.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 548b964..f0b8438 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,11 +7,12 @@ repos: - id: check-yaml - id: debug-statements -- repo: https://github.com/pycqa/flake8 - rev: 7.0.0 - hooks: - - id: flake8 - +# Too many things to fix, let's just ignore it for now +#- repo: https://github.com/pycqa/flake8 +# rev: 7.0.0 +# hooks: +# - id: flake8 +# - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: From 52b3799d7362f7ddaa347e2f5128ce016c1a2811 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 6 Mar 2025 19:58:44 +0100 Subject: [PATCH 034/166] Remove mypy checks for now --- .pre-commit-config.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f0b8438..cb4e829 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,8 +18,9 @@ repos: hooks: - id: isort -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.8.0 - hooks: - - id: mypy - exclude: ^(docs/|setup.py) +# Too many things to fix, let's just ignore it for now +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v1.8.0 +# hooks: +# - id: mypy +# exclude: ^(docs/|setup.py) From bb2cffb1754060aee494f6e18ae8c5485cc861a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Fri, 7 Mar 2025 12:22:39 -0500 Subject: [PATCH 035/166] Mark numexpr interpreter as free-threaded safe --- numexpr/module.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 345add4..442bcd0 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -477,6 +477,10 @@ PyInit_interpreter(void) { if (m == NULL) INITERROR; + #ifdef Py_GIL_DISABLED + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); + #endif + Py_INCREF(&NumExprType); PyModule_AddObject(m, "NumExpr", (PyObject *)&NumExprType); From da8a9df9d878e56cba60346a7583d220ba0ee64e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Wed, 12 Mar 2025 13:49:40 -0500 Subject: [PATCH 036/166] Ensure single thread write to gs.init_sentinels_done --- numexpr/module.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 442bcd0..0a012e2 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -51,7 +51,9 @@ void *th_worker(void *tidptr) while (1) { /* Sentinels have to be initialised yet */ - gs.init_sentinels_done = 0; + if(tid == 0) { + gs.init_sentinels_done = 0; + } /* Meeting point for all threads (wait for initialization) */ pthread_mutex_lock(&gs.count_threads_mutex); From 9aab353f7acf16a533aadb7c54a1e24327e46e2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Mon, 17 Mar 2025 11:43:31 -0500 Subject: [PATCH 037/166] Address review comments --- numexpr/module.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 0a012e2..a42042b 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -47,12 +47,13 @@ void *th_worker(void *tidptr) char **errmsg; // For output buffering if needed vector out_buffer; + int init_sentinels_done = 0; while (1) { /* Sentinels have to be initialised yet */ - if(tid == 0) { - gs.init_sentinels_done = 0; + if (tid == 0) { + init_sentinels_done = 0; } /* Meeting point for all threads (wait for initialization) */ From f4439c8023378df0226480d838da53235813ae58 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Tue, 18 Mar 2025 12:35:00 +0100 Subject: [PATCH 038/166] Register the thread_unsefe mark --- numexpr/tests/test_numexpr.py | 5 +++-- pytest.ini | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 pytest.ini diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 2731b32..9cf1105 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -14,10 +14,8 @@ import platform import subprocess import sys -import unittest import warnings from contextlib import contextmanager -from unittest.mock import MagicMock import numpy import numpy as np @@ -37,6 +35,9 @@ from numexpr.expressions import ConstantNode from numexpr.utils import detect_number_of_cores +import unittest +from unittest.mock import MagicMock + try: import pytest pytest_available = True diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..4fec170 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + thread_unsafe: mark a test as thread unsafe From 42baf828473fef167e72d61d4b9028d1702c3eb6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Mar 2025 11:35:55 +0000 Subject: [PATCH 039/166] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- numexpr/tests/test_numexpr.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 9cf1105..2731b32 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -14,8 +14,10 @@ import platform import subprocess import sys +import unittest import warnings from contextlib import contextmanager +from unittest.mock import MagicMock import numpy import numpy as np @@ -35,9 +37,6 @@ from numexpr.expressions import ConstantNode from numexpr.utils import detect_number_of_cores -import unittest -from unittest.mock import MagicMock - try: import pytest pytest_available = True From 0c42b7d652829257525c46a8cc6e163c5bfec44f Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Tue, 18 Mar 2025 13:00:59 +0100 Subject: [PATCH 040/166] Revert commit 9aab353, as it makes some tests to crash --- numexpr/module.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/numexpr/module.cpp b/numexpr/module.cpp index a42042b..e7d6ded 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -47,13 +47,12 @@ void *th_worker(void *tidptr) char **errmsg; // For output buffering if needed vector out_buffer; - int init_sentinels_done = 0; while (1) { /* Sentinels have to be initialised yet */ if (tid == 0) { - init_sentinels_done = 0; + gs.init_sentinels_done = 0; } /* Meeting point for all threads (wait for initialization) */ From d10c61c87acb26604f96618cdf66574913f8fe01 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 17:12:17 +0000 Subject: [PATCH 041/166] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v4.5.0 → v5.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v4.5.0...v5.0.0) - [github.com/pycqa/isort: 5.13.2 → 6.0.1](https://github.com/pycqa/isort/compare/5.13.2...6.0.1) --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cb4e829..5cda47c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -14,7 +14,7 @@ repos: # - id: flake8 # - repo: https://github.com/pycqa/isort - rev: 5.13.2 + rev: 6.0.1 hooks: - id: isort From b3622f9e583157327ca37462d896bc04b4999905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Tue, 25 Mar 2025 12:08:24 -0500 Subject: [PATCH 042/166] Use native aarch64 Ci builders --- .github/workflows/build.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b0077f8..4d7b58d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -38,7 +38,7 @@ jobs: artifact_name: "linux-x86_64" # Linux ARM64 builds (native runners) - - os: ubuntu-latest + - os: ubuntu-24.04-arm arch: aarch64 cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" artifact_name: "linux-aarch64" @@ -72,11 +72,11 @@ jobs: echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" - - name: Set up QEMU - if: matrix.arch == 'aarch64' - uses: docker/setup-qemu-action@v3 - with: - platforms: arm64 + # - name: Set up QEMU + # if: matrix.arch == 'aarch64' + # uses: docker/setup-qemu-action@v3 + # with: + # platforms: arm64 - name: Build wheels uses: pypa/cibuildwheel@v2.23 From 256330f83ba8cc9a49d8ce3fa7f46c01bf0df967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Tue, 25 Mar 2025 12:21:10 -0500 Subject: [PATCH 043/166] Do not skip tests on aarch64 --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4d7b58d..1186ee0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,7 @@ env: CIBW_BUILD_VERBOSITY: 1 CIBW_TEST_COMMAND: pytest --pyargs numexpr # Testing on aarch64 takes too long, as it is currently emulated on GitHub Actions - CIBW_TEST_SKIP: "*linux*aarch64*" + # CIBW_TEST_SKIP: "*linux*aarch64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" From 9c3e9d575c04abb50833c3be38a897e22d68a3f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Tue, 25 Mar 2025 18:07:54 -0500 Subject: [PATCH 044/166] Remove commented sections --- .github/workflows/build.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1186ee0..151efda 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,6 @@ env: CIBW_BUILD_VERBOSITY: 1 CIBW_TEST_COMMAND: pytest --pyargs numexpr # Testing on aarch64 takes too long, as it is currently emulated on GitHub Actions - # CIBW_TEST_SKIP: "*linux*aarch64*" # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" @@ -72,12 +71,6 @@ jobs: echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" - # - name: Set up QEMU - # if: matrix.arch == 'aarch64' - # uses: docker/setup-qemu-action@v3 - # with: - # platforms: arm64 - - name: Build wheels uses: pypa/cibuildwheel@v2.23 From d296d274b894c613d0422123cef51c87d94a0813 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 26 Mar 2025 13:25:25 +0100 Subject: [PATCH 045/166] Add a dedicated license field --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e95cd82..fd4d826 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,12 +15,12 @@ description = "Fast numerical expression evaluator for NumPy" readme = "README.rst" authors = [{name = "David M. Cooke, Francesc Alted, and others", email = "blosc@blosc.org"}] maintainers = [{ name = "Blosc Development Team", email = "blosc@blosc.org"}] +license = "MIT" classifiers = [ "Development Status :: 6 - Mature", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", "Operating System :: Microsoft :: Windows", From b8634be04d6fb70e8707d7d2ed0abd58a7511b10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Thu, 27 Mar 2025 12:04:39 -0500 Subject: [PATCH 046/166] Add a benchmark/example for numexpr usage under free-threading conditions --- bench/free_threading.py | 154 ++++++++++++++++++++++++++++++++++++++++ numexpr/necompiler.py | 10 +-- 2 files changed, 157 insertions(+), 7 deletions(-) create mode 100644 bench/free_threading.py diff --git a/bench/free_threading.py b/bench/free_threading.py new file mode 100644 index 0000000..f070c09 --- /dev/null +++ b/bench/free_threading.py @@ -0,0 +1,154 @@ +################################################################################# +# To mimic the scenario that computation is i/o bound and constrained by memory +# +# It's a much simplified version that the chunk is computed in a loop, +# and expression is evaluated in a sequence, which is not true in reality. +# Neverthless, numexpr outperforms numpy. +################################################################################# +""" +Benchmarking Expression 1: +NumPy time (threaded over 32 chunks with 2 threads): 4.612313 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 0.951172 seconds +numexpr speedup: 4.85x +---------------------------------------- +Benchmarking Expression 2: +NumPy time (threaded over 32 chunks with 2 threads): 23.862752 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 2.182058 seconds +numexpr speedup: 10.94x +---------------------------------------- +Benchmarking Expression 3: +NumPy time (threaded over 32 chunks with 2 threads): 20.594895 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 2.927881 seconds +numexpr speedup: 7.03x +---------------------------------------- +Benchmarking Expression 4: +NumPy time (threaded over 32 chunks with 2 threads): 12.834101 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 5.392480 seconds +numexpr speedup: 2.38x +---------------------------------------- +""" + +import os + +os.environ["NUMEXPR_NUM_THREADS"] = "1" +import threading +import timeit + +import numpy as np + +import numexpr as ne + +array_size = 10**8 +num_runs = 10 +num_chunks = 32 # Number of chunks +num_threads = 16 # Number of threads constrained by how many chunks memory can hold + +a = np.random.rand(array_size).reshape(10**4, -1) +b = np.random.rand(array_size).reshape(10**4, -1) +c = np.random.rand(array_size).reshape(10**4, -1) + +chunk_size = array_size // num_chunks + +expressions_numpy = [ + lambda a, b, c: a + b * c, + lambda a, b, c: a**2 + b**2 - 2 * a * b * np.cos(c), + lambda a, b, c: np.sin(a) + np.log(b) * np.sqrt(c), + lambda a, b, c: np.exp(a) + np.tan(b) - np.sinh(c), +] + +expressions_numexpr = [ + "a + b * c", + "a**2 + b**2 - 2 * a * b * cos(c)", + "sin(a) + log(b) * sqrt(c)", + "exp(a) + tan(b) - sinh(c)", +] + + +def benchmark_numpy_chunk(func, a, b, c, results, indices): + for index in indices: + start = index * chunk_size + end = (index + 1) * chunk_size + time_taken = timeit.timeit( + lambda: func(a[start:end], b[start:end], c[start:end]), number=num_runs + ) + results.append(time_taken) + + +def benchmark_numexpr_re_evaluate(expr, a, b, c, results, indices): + for index in indices: + start = index * chunk_size + end = (index + 1) * chunk_size + # if index == 0: + # Evaluate the first chunk with evaluate + time_taken = timeit.timeit( + lambda: ne.evaluate( + expr, + local_dict={ + "a": a[start:end], + "b": b[start:end], + "c": c[start:end], + }, + ), + number=num_runs, + ) + # else: + # Re-evaluate subsequent chunks with re_evaluate + # time_taken = timeit.timeit( + # lambda: ne.re_evaluate( + # local_dict={"a": a[start:end], "b": b[start:end], "c": c[start:end]} + # ), + # number=num_runs, + # ) + results.append(time_taken) + + +def run_benchmark_threaded(): + chunk_indices = list(range(num_chunks)) + + for i in range(len(expressions_numpy)): + print(f"Benchmarking Expression {i+1}:") + + results_numpy = [] + results_numexpr = [] + + threads_numpy = [] + for j in range(num_threads): + indices = chunk_indices[j::num_threads] # Distribute chunks across threads + thread = threading.Thread( + target=benchmark_numpy_chunk, + args=(expressions_numpy[i], a, b, c, results_numpy, indices), + ) + threads_numpy.append(thread) + thread.start() + + for thread in threads_numpy: + thread.join() + + numpy_time = sum(results_numpy) + print( + f"NumPy time (threaded over {num_chunks} chunks with {num_threads} threads): {numpy_time:.6f} seconds" + ) + + threads_numexpr = [] + for j in range(num_threads): + indices = chunk_indices[j::num_threads] # Distribute chunks across threads + thread = threading.Thread( + target=benchmark_numexpr_re_evaluate, + args=(expressions_numexpr[i], a, b, c, results_numexpr, indices), + ) + threads_numexpr.append(thread) + thread.start() + + for thread in threads_numexpr: + thread.join() + + numexpr_time = sum(results_numexpr) + print( + f"numexpr time (threaded with re_evaluate over {num_chunks} chunks with {num_threads} threads): {numexpr_time:.6f} seconds" + ) + print(f"numexpr speedup: {numpy_time / numexpr_time:.2f}x") + print("-" * 40) + + +if __name__ == "__main__": + run_benchmark_threaded() diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 4ada878..537f816 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -775,14 +775,12 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): # Dictionaries for caching variable names and compiled expressions -# _names_cache = CacheDict(256) _names_cache = threading.local() -# _numexpr_cache = CacheDict(256) _numexpr_cache = threading.local() -# _numexpr_last = ContextDict() _numexpr_last = threading.local() evaluate_lock = threading.Lock() + def validate(ex: str, local_dict: Optional[Dict] = None, global_dict: Optional[Dict] = None, @@ -856,7 +854,6 @@ def validate(ex: str, ---- """ - global _numexpr_last if not hasattr(_numexpr_last, 'l'): _numexpr_last.l = ContextDict() @@ -998,7 +995,6 @@ def re_evaluate(local_dict: Optional[Dict] = None, The calling frame depth. Unless you are a NumExpr developer you should not set this value. """ - global _numexpr_last if not hasattr(_numexpr_last, 'l'): _numexpr_last.l = ContextDict() @@ -1009,5 +1005,5 @@ def re_evaluate(local_dict: Optional[Dict] = None, argnames = _numexpr_last.l['argnames'] args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth) kwargs = _numexpr_last.l['kwargs'] - with evaluate_lock: - return compiled_ex(*args, **kwargs) + # with evaluate_lock: + return compiled_ex(*args, **kwargs) From 462dd17b54cf4122941120d26f1f3d76f4db9b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= Date: Mon, 7 Apr 2025 12:56:23 -0500 Subject: [PATCH 047/166] Add benchmark results --- bench/free_threading.py | 67 ++++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/bench/free_threading.py b/bench/free_threading.py index f070c09..cd00e78 100644 --- a/bench/free_threading.py +++ b/bench/free_threading.py @@ -1,36 +1,61 @@ ################################################################################# -# To mimic the scenario that computation is i/o bound and constrained by memory +# To compare the performance of numexpr when free-threading CPython is used. # -# It's a much simplified version that the chunk is computed in a loop, -# and expression is evaluated in a sequence, which is not true in reality. -# Neverthless, numexpr outperforms numpy. +# This example makes use of Python threads, as opposed to C native ones +# in order to highlight the improvement introduced by free-threading CPython, +# which now disables the GIL altogether. ################################################################################# """ +Results with GIL-enabled CPython: + +Benchmarking Expression 1: +NumPy time (threaded over 32 chunks with 16 threads): 1.173090 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 0.951071 seconds +numexpr speedup: 1.23x +---------------------------------------- +Benchmarking Expression 2: +NumPy time (threaded over 32 chunks with 16 threads): 10.410874 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 8.248753 seconds +numexpr speedup: 1.26x +---------------------------------------- +Benchmarking Expression 3: +NumPy time (threaded over 32 chunks with 16 threads): 9.605909 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 11.087108 seconds +numexpr speedup: 0.87x +---------------------------------------- +Benchmarking Expression 4: +NumPy time (threaded over 32 chunks with 16 threads): 3.836962 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 18.054531 seconds +numexpr speedup: 0.21x +---------------------------------------- + +Results with free-threading CPython: + Benchmarking Expression 1: -NumPy time (threaded over 32 chunks with 2 threads): 4.612313 seconds -numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 0.951172 seconds -numexpr speedup: 4.85x +NumPy time (threaded over 32 chunks with 16 threads): 3.415349 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 2.618876 seconds +numexpr speedup: 1.30x ---------------------------------------- Benchmarking Expression 2: -NumPy time (threaded over 32 chunks with 2 threads): 23.862752 seconds -numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 2.182058 seconds -numexpr speedup: 10.94x +NumPy time (threaded over 32 chunks with 16 threads): 19.005238 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 12.611407 seconds +numexpr speedup: 1.51x ---------------------------------------- Benchmarking Expression 3: -NumPy time (threaded over 32 chunks with 2 threads): 20.594895 seconds -numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 2.927881 seconds -numexpr speedup: 7.03x +NumPy time (threaded over 32 chunks with 16 threads): 20.555149 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 17.690749 seconds +numexpr speedup: 1.16x ---------------------------------------- Benchmarking Expression 4: -NumPy time (threaded over 32 chunks with 2 threads): 12.834101 seconds -numexpr time (threaded with re_evaluate over 32 chunks with 2 threads): 5.392480 seconds -numexpr speedup: 2.38x +NumPy time (threaded over 32 chunks with 16 threads): 38.338372 seconds +numexpr time (threaded with re_evaluate over 32 chunks with 16 threads): 35.074684 seconds +numexpr speedup: 1.09x ---------------------------------------- """ import os -os.environ["NUMEXPR_NUM_THREADS"] = "1" +os.environ["NUMEXPR_NUM_THREADS"] = "2" import threading import timeit @@ -91,14 +116,6 @@ def benchmark_numexpr_re_evaluate(expr, a, b, c, results, indices): ), number=num_runs, ) - # else: - # Re-evaluate subsequent chunks with re_evaluate - # time_taken = timeit.timeit( - # lambda: ne.re_evaluate( - # local_dict={"a": a[start:end], "b": b[start:end], "c": c[start:end]} - # ), - # number=num_runs, - # ) results.append(time_taken) From 61e44f0cb2eb750b2aa97dc7c90d666e50eef566 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 9 Jun 2025 10:35:07 +0200 Subject: [PATCH 048/166] Comment test_refcount out. Fixes #511. --- numexpr/tests/test_numexpr.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 2731b32..91f4306 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -324,9 +324,11 @@ def test_where_scalar_bool(self): res = evaluate('where(a, b, c)') assert_array_equal(res, c) + # Comment out this test completely, as modern Python optimizes handling refcounts. + # See #511 for more info. @unittest.skipIf(hasattr(sys, "pypy_version_info"), "PyPy does not have sys.getrefcount()") - def test_refcount(self): + def _test_refcount(self): # Regression test for issue #310 a = array([1]) assert sys.getrefcount(a) == 2 From 73a459bb09262d83dc2cb520a281ddcfc5f0729b Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 9 Jun 2025 11:39:45 +0200 Subject: [PATCH 049/166] Modernized test suite to use latest version of pytest --- numexpr/tests/test_numexpr.py | 226 +++++++++++++++++----------------- 1 file changed, 114 insertions(+), 112 deletions(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 91f4306..74db5bb 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -808,108 +808,89 @@ def equal(a, b, exact): class Skip(Exception): pass -def test_expressions(): - test_no = [0] +@pytest.mark.parametrize( + "expr,test_scalar,dtype,optimization,exact,section_name", + [ + (expr, test_scalar, dtype, optimization, exact, section_name) + for test_scalar in (0, 1, 2) + for dtype in (int, int, np.float32, double, complex) + for optimization, exact in [ + ("none", False), + ("moderate", False), + ("aggressive", False), + ] + for section_name, section_tests in tests + for expr in section_tests + if not ( + dtype == complex + and ( + "<" in expr + or ">" in expr + or "%" in expr + or "arctan2" in expr + or "fmod" in expr + or "floor" in expr + or "ceil" in expr + ) + ) + if not (dtype in (int, int) and test_scalar and expr == "(a+1) ** -1") + ], +) +def test_expressions( + expr, test_scalar, dtype, optimization, exact, section_name +): + array_size = 100 + a = arange(2 * array_size, dtype=dtype)[::2] + a2 = zeros([array_size, array_size], dtype=dtype) + b = arange(array_size, dtype=dtype) / array_size + c = arange(array_size, dtype=dtype) + d = arange(array_size, dtype=dtype) + e = arange(array_size, dtype=dtype) + x = None - def make_test_method(a, a2, b, c, d, e, x, expr, - test_scalar, dtype, optimization, exact, section): - this_locals = locals() + if dtype == complex: + a = a.real + for var in [a2, b, c, d, e]: + var += 1j + var *= 1 + 1j - def method(): - try: - # We don't want to listen at RuntimeWarnings like - # "overflows" or "divide by zero" in plain eval(). - warnings.simplefilter("ignore") - npval = eval(expr, globals(), this_locals) - warnings.simplefilter("always") - npval = eval(expr, globals(), this_locals) - except Exception as ex: - # just store the exception in a variable - # compatibility with numpy v1.12 - # see also https://github.com/pydata/numexpr/issues/239 - np_exception = ex - npval = None - else: - np_exception = None + if test_scalar == 1: + a = a[array_size // 2] + if test_scalar == 2: + b = b[array_size // 2] - try: - neval = evaluate(expr, local_dict=this_locals, - optimization=optimization) - except AssertionError: - raise - except NotImplementedError: - print('%r not implemented for %s (scalar=%d, opt=%s)' - % (expr, dtype.__name__, test_scalar, optimization)) - except Exception as ne_exception: - same_exc_type = issubclass(type(ne_exception), - type(np_exception)) - if np_exception is None or not same_exc_type: - print('numexpr error for expression %r' % (expr,)) - raise - except: - print('numexpr error for expression %r' % (expr,)) - raise - else: - msg = ('expected numexpr error not raised for expression ' - '%r' % (expr,)) - assert np_exception is None, msg - - assert equal(npval, neval, exact), """%r -(test_scalar=%r, dtype=%r, optimization=%r, exact=%r, - npval=%r (%r - %r)\n neval=%r (%r - %r))""" % (expr, test_scalar, dtype.__name__, - optimization, exact, - npval, type(npval), shape(npval), - neval, type(neval), shape(neval)) - - method.description = ('test_expressions(%s, test_scalar=%r, ' - 'dtype=%r, optimization=%r, exact=%r)') % (expr, test_scalar, dtype.__name__, optimization, exact) - test_no[0] += 1 - method.__name__ = 'test_scalar%d_%s_%s_%s_%04d' % (test_scalar, - dtype.__name__, - optimization.encode('ascii'), - section.encode('ascii'), - test_no[0]) - return method + # We don't want to listen at RuntimeWarnings like + # "overflows" or "divide by zero" in plain eval(). + warnings.simplefilter("ignore") + try: + npval = eval(expr, globals(), locals()) + except Exception as ex: + np_exception = ex + npval = None + else: + np_exception = None + warnings.simplefilter("always") - x = None - for test_scalar in (0, 1, 2): - for dtype in (int, int, np.float32, double, complex): - array_size = 100 - a = arange(2 * array_size, dtype=dtype)[::2] - a2 = zeros([array_size, array_size], dtype=dtype) - b = arange(array_size, dtype=dtype) / array_size - c = arange(array_size, dtype=dtype) - d = arange(array_size, dtype=dtype) - e = arange(array_size, dtype=dtype) - if dtype == complex: - a = a.real - for x in [a2, b, c, d, e]: - x += 1j - x *= 1 + 1j - if test_scalar == 1: - a = a[array_size // 2] - if test_scalar == 2: - b = b[array_size // 2] - for optimization, exact in [ - ('none', False), ('moderate', False), ('aggressive', False)]: - for section_name, section_tests in tests: - for expr in section_tests: - if (dtype == complex and - ('<' in expr or '>' in expr or '%' in expr - or "arctan2" in expr or "fmod" in expr - or "floor" in expr or "ceil" in expr)): - # skip complex comparisons or functions not - # defined in complex domain. - continue - if (dtype in (int, int) and test_scalar and - expr == '(a+1) ** -1'): - continue - - m = make_test_method(a, a2, b, c, d, e, x, - expr, test_scalar, dtype, - optimization, exact, - section_name) - yield m + try: + neval = evaluate(expr, local_dict=locals(), optimization=optimization) + except AssertionError: + raise + except NotImplementedError: + pytest.skip( + f"{expr!r} not implemented for {dtype.__name__} (scalar={test_scalar}, opt={optimization})" + ) + except Exception as ne_exception: + same_exc_type = issubclass(type(ne_exception), type(np_exception)) + if np_exception is None or not same_exc_type: + pytest.fail(f"numexpr error for expression {expr!r}") + else: + if np_exception is not None: + pytest.fail(f"expected numexpr error not raised for expression {expr!r}") + + assert equal(npval, neval, exact), f"""{expr!r} + (test_scalar={test_scalar!r}, dtype={dtype.__name__!r}, optimization={optimization!r}, exact={exact!r}, + npval={npval!r} ({type(npval)!r} - {shape(npval)!r}) + neval={neval!r} ({type(neval)!r} - {shape(neval)!r}))""" class test_int64(TestCase): @@ -1402,25 +1383,11 @@ def suite(): theSuite = unittest.TestSuite() niter = 1 - class TestExpressions(TestCase): - pass - - def add_method(func): - def method(self): - return func() - - setattr(TestExpressions, func.__name__, - method.__get__(None, TestExpressions)) - - for func in test_expressions(): - add_method(func) - for n in range(niter): theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr)) if 'sparc' not in platform.machine(): theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr2)) theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_evaluate)) - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestExpressions)) theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_int32_int64)) theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_uint32_int64)) theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_strings)) @@ -1439,6 +1406,41 @@ def method(self): # interaction with threads and subprocess :-/ theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_threading)) + # Add the pytest parametrized tests only if pytest is available + if pytest_available: + # Create a class that will run the test_expressions function with different parameters + class TestExpressions(unittest.TestCase): + pass + + # Get the parameters from the pytest.mark.parametrize decorator + # This is safer than accessing internal pytest modules + marker = getattr(test_expressions, "pytestmark", None) + if marker and hasattr(marker[0], "args") and len(marker[0].args) >= 2: + param_list = marker[0].args[1] + + # Create test methods dynamically + for i, params in enumerate(param_list): + expr, test_scalar, dtype, optimization, exact, section_name = params + + def create_test_method(params=params): + def test_method(self): + expr, test_scalar, dtype, optimization, exact, section_name = ( + params + ) + test_expressions( + expr, test_scalar, dtype, optimization, exact, section_name + ) + + return test_method + + method_name = f"test_expr_{i}" + setattr(TestExpressions, method_name, create_test_method()) + + # Add our dynamically created TestExpressions to the suite + theSuite.addTest( + unittest.defaultTestLoader.loadTestsFromTestCase(TestExpressions) + ) + return theSuite From 16acf2646beb77b6ff32e724d4a6e443f7e1c854 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 9 Jun 2025 12:22:21 +0200 Subject: [PATCH 050/166] Register thread_unsafe as a custom mark --- numexpr/tests/conftest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 numexpr/tests/conftest.py diff --git a/numexpr/tests/conftest.py b/numexpr/tests/conftest.py new file mode 100644 index 0000000..4ffa831 --- /dev/null +++ b/numexpr/tests/conftest.py @@ -0,0 +1,16 @@ +################################################################### +# Numexpr - Fast numerical array expression evaluator for NumPy. +# +# License: MIT +# Author: See AUTHORS.txt +# +# See LICENSE.txt and LICENSES/*.txt for details about copyright and +# rights to use. +#################################################################### + +import pytest + +def pytest_configure(config): + config.addinivalue_line( + "markers", "thread_unsafe: mark test as unsafe for parallel execution" + ) From 11bab7ae4dbda1e139948d01294a10d412784125 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 9 Jun 2025 12:31:11 +0200 Subject: [PATCH 051/166] Move TestExpressions class abovle the main niter loop --- numexpr/tests/test_numexpr.py | 51 +++++++++++++++++------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 74db5bb..d409165 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -1383,29 +1383,6 @@ def suite(): theSuite = unittest.TestSuite() niter = 1 - for n in range(niter): - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr)) - if 'sparc' not in platform.machine(): - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr2)) - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_evaluate)) - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_int32_int64)) - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_uint32_int64)) - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_strings)) - theSuite.addTest( - unittest.defaultTestLoader.loadTestsFromTestCase(test_irregular_stride)) - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_zerodim)) - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_threading_config)) - - # multiprocessing module is not supported on Hurd/kFreeBSD - if (pl.system().lower() not in ('gnu', 'gnu/kfreebsd')): - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_subprocess)) - - # I need to put this test after test_subprocess because - # if not, the test suite locks immediately before test_subproces. - # This only happens with Windows, so I suspect of a subtle bad - # interaction with threads and subprocess :-/ - theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_threading)) - # Add the pytest parametrized tests only if pytest is available if pytest_available: # Create a class that will run the test_expressions function with different parameters @@ -1436,10 +1413,32 @@ def test_method(self): method_name = f"test_expr_{i}" setattr(TestExpressions, method_name, create_test_method()) - # Add our dynamically created TestExpressions to the suite + for n in range(niter): + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr)) + if 'sparc' not in platform.machine(): + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_numexpr2)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_evaluate)) + # Add the dynamically created TestExpressions to the suite + if pytest_available: + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(TestExpressions)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_int32_int64)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_uint32_int64)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_strings)) theSuite.addTest( - unittest.defaultTestLoader.loadTestsFromTestCase(TestExpressions) - ) + unittest.defaultTestLoader.loadTestsFromTestCase(test_irregular_stride)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_zerodim)) + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_threading_config)) + + # multiprocessing module is not supported on Hurd/kFreeBSD + if (pl.system().lower() not in ('gnu', 'gnu/kfreebsd')): + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_subprocess)) + + # I need to put this test after test_subprocess because + # if not, the test suite locks immediately before test_subproces. + # This only happens with Windows, so I suspect of a subtle bad + # interaction with threads and subprocess :-/ + theSuite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(test_threading)) + return theSuite From 0b9d2e94b11ced65bd3ad717b430d230aa407474 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 9 Jun 2025 12:47:22 +0200 Subject: [PATCH 052/166] Getting ready for release 2.11.0 --- ANNOUNCE.rst | 27 ++++++++++++++++----------- RELEASE_NOTES.rst | 17 ++++++++++++++--- VERSION | 2 +- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index 12edc8d..e3d7285 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,28 +1,33 @@ ========================= -Announcing NumExpr 2.10.2 +Announcing NumExpr 2.11.0 ========================= Hi everyone, -NumExpr 2.10.2 provides wheels for Python 2.13 for first time. -Also, there is better support for CPUs that do not have a power -of 2 number of cores. Finally, numexpr is allowed to run with -the multithreading package in Python. +NumExpr 2.11.0 Initial support for free-threaded Python 3.13t has been added. +This is still experimental, so please report any issues you find. +Finally, Python 3.10 is now the minimum supported version. Project documentation is available at: http://numexpr.readthedocs.io/ -Changes from 2.10.1 to 2.10.2 +Changes from 2.10.2 to 2.11.0 ----------------------------- -* Better support for CPUs that do not have a power of 2 number of - cores. See #479 and #490. Thanks to @avalentino. +* Initial support for free-threaded Python 3.13t has been added. + This is still experimental, so please report any issues you find. + For more info, see discussions PRs #504, #505 and #508. + Thanks to @andfoy, @rgommers and @FrancescAlted for the work. -* Allow numexpr to run with the multithreading package in Python. - See PR #496. Thanks to @emmaai +* Fix imaginary evaluation in the form of `1.1e1j`. This was + previously not supported and would raise an error. Thanks to @27rabbitlt + for the fix. -* Wheels for Python 3.13 are now provided. +* The test suite has been modernized to use `pytest` instead of `unittest`. + This should make it easier to run the tests and contribute to the project. + +* Python 3.10 is now the minimum supported version. What's Numexpr? --------------- diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index f919b11..3800223 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -1,12 +1,23 @@ ===================================== -Release notes for NumExpr 2.10 series +Release notes for NumExpr 2.11 series ===================================== -Changes from 2.10.2 to 2.10.3 +Changes from 2.10.2 to 2.11.0 ----------------------------- -* Python 3.10 is now the minimum supported version. +* Initial support for free-threaded Python 3.13t has been added. + This is still experimental, so please report any issues you find. + For more info, see discussions PRs #504, #505 and #508. + Thanks to @andfoy, @rgommers and @FrancescAlted for the work. + +* Fix imaginary evaluation in the form of `1.1e1j`. This was + previously not supported and would raise an error. Thanks to @27rabbitlt + for the fix. +* The test suite has been modernized to use `pytest` instead of `unittest`. + This should make it easier to run the tests and contribute to the project. + +* Python 3.10 is now the minimum supported version. Changes from 2.10.1 to 2.10.2 ----------------------------- diff --git a/VERSION b/VERSION index c23e78a..46b81d8 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.10.3.dev0 +2.11.0 From bbf46723251f0830f1e4d518efd9d57ca8d1a58f Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 9 Jun 2025 13:37:37 +0200 Subject: [PATCH 053/166] Post 2.11.0 release actions done --- RELEASE_NOTES.rst | 7 +++++++ VERSION | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 3800223..9b133d9 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,12 @@ Release notes for NumExpr 2.11 series ===================================== +Changes from 2.11.0 to 2.11.1 +----------------------------- + +* **Under development.** + + Changes from 2.10.2 to 2.11.0 ----------------------------- @@ -19,6 +25,7 @@ Changes from 2.10.2 to 2.11.0 * Python 3.10 is now the minimum supported version. + Changes from 2.10.1 to 2.10.2 ----------------------------- diff --git a/VERSION b/VERSION index 46b81d8..2cc9678 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.11.0 +2.11.1.dev0 From 4abc3508235367c13df2ba28993276094aa90286 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Mon, 4 Aug 2025 14:32:10 +0200 Subject: [PATCH 054/166] WHL: upgrade cibuildwheel, add `cp314` wheels --- .github/workflows/build.yml | 10 +++++----- numexpr/tests/conftest.py | 1 + pyproject.toml | 3 ++- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 151efda..1529221 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,13 +33,13 @@ jobs: # Linux x86_64 builds - os: ubuntu-latest arch: x86_64 - cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" + cibw_pattern: "*manylinux*" artifact_name: "linux-x86_64" # Linux ARM64 builds (native runners) - os: ubuntu-24.04-arm arch: aarch64 - cibw_pattern: "cp3{10,11,12,13,13t}-manylinux*" + cibw_pattern: "*manylinux*" artifact_name: "linux-aarch64" # Don't use native runners for now (looks like wait times are too long) #runs-on: ["ubuntu-latest", "arm64"] @@ -47,13 +47,13 @@ jobs: # Windows builds - os: windows-latest arch: x86_64 - cibw_pattern: "cp3{10,11,12,13,13t}-win*" + cibw_pattern: "*" artifact_name: "windows-x86_64" # macOS builds (universal2) - os: macos-latest arch: x86_64 - cibw_pattern: "cp3{10,11,12,13,13t}-macosx*" + cibw_pattern: "*" artifact_name: "macos-universal2" steps: - uses: actions/checkout@v3 @@ -72,7 +72,7 @@ jobs: echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" - name: Build wheels - uses: pypa/cibuildwheel@v2.23 + uses: pypa/cibuildwheel@v3.1.3 - name: Make sdist if: ${{ matrix.os == 'windows-latest' }} diff --git a/numexpr/tests/conftest.py b/numexpr/tests/conftest.py index 4ffa831..3d32260 100644 --- a/numexpr/tests/conftest.py +++ b/numexpr/tests/conftest.py @@ -10,6 +10,7 @@ import pytest + def pytest_configure(config): config.addinivalue_line( "markers", "thread_unsafe: mark test as unsafe for parallel execution" diff --git a/pyproject.toml b/pyproject.toml index fd4d826..890cdb9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] requires-python = ">=3.10" # Follow guidelines from https://scientific-python.org/specs/spec-0000/ @@ -44,7 +45,7 @@ documentation = "https://numexpr.readthedocs.io" repository = "https://github.com/pydata/numexpr" [tool.cibuildwheel] -skip = "cp36-* cp37-* pp37-* cp38-* pp* *-manylinux_i686 *_ppc64le *_s390x" +skip = ["*-manylinux_i686", "*_ppc64le", "*_s390x"] # Let's use a more recent version of the manylinux image for more modern compilers manylinux-x86_64-image = "manylinux_2_28" manylinux-aarch64-image = "manylinux_2_28" From aa02baf8be494193ac9c07ff0e3c7f638ca27954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Mon, 4 Aug 2025 14:48:05 +0200 Subject: [PATCH 055/166] MNT: move as much as possible from cibuildwheel conf to static metadata --- .github/workflows/build.yml | 11 ----------- pyproject.toml | 12 ++++++++++++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1529221..745ee30 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -6,10 +6,7 @@ permissions: contents: read env: - CIBW_BEFORE_BUILD: pip install setuptools oldest-supported-numpy pytest - CIBW_BEFORE_TEST: pip install pytest CIBW_BUILD_VERBOSITY: 1 - CIBW_TEST_COMMAND: pytest --pyargs numexpr # Testing on aarch64 takes too long, as it is currently emulated on GitHub Actions # Building for musllinux and aarch64 takes way too much time. # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. @@ -63,14 +60,6 @@ jobs: with: python-version: '3.x' - - name: Setup free-threading variables - if: ${{ endsWith(matrix.cibw_build, 't-*') }} - shell: bash -l {0} - run: | - echo "CIBW_BEFORE_BUILD=pip install setuptools numpy" >> "$GITHUB_ENV" - echo "CIBW_BEFORE_TEST=pip install pytest pytest-run-parallel" >> "$GITHUB_ENV" - echo "CIBW_TEST_COMMAND=pytest --parallel-threads=4 --pyargs numexpr" >> "$GITHUB_ENV" - - name: Build wheels uses: pypa/cibuildwheel@v3.1.3 diff --git a/pyproject.toml b/pyproject.toml index 890cdb9..9d5149c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,8 +44,20 @@ homepage = "https://github.com/pydata/numexpr" documentation = "https://numexpr.readthedocs.io" repository = "https://github.com/pydata/numexpr" +[dependency-groups] +test = [ + "pytest>=7.0.0", + "pytest-run-parallel>=0.6.0", +] + [tool.cibuildwheel] skip = ["*-manylinux_i686", "*_ppc64le", "*_s390x"] # Let's use a more recent version of the manylinux image for more modern compilers manylinux-x86_64-image = "manylinux_2_28" manylinux-aarch64-image = "manylinux_2_28" +test-groups = ["test"] +test-command = ["python -m pytest --pyargs numexpr"] + +[[tool.cibuildwheel.overrides]] +select = "cp31*t-*" +test-command = ["python -m pytest --parallel-threads=4 --pyargs numexpr"] From 45b6d6643074b8792a43db98afbd015cbb1cc637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Mon, 4 Aug 2025 14:56:50 +0200 Subject: [PATCH 056/166] MNT: specify license-files following PEP 639 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fd4d826..e39b372 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,6 @@ [build-system] requires = [ - "setuptools", - "wheel", + "setuptools>=77.0.0", "numpy>=2.0.0", ] build-backend = "setuptools.build_meta" @@ -16,6 +15,7 @@ readme = "README.rst" authors = [{name = "David M. Cooke, Francesc Alted, and others", email = "blosc@blosc.org"}] maintainers = [{ name = "Blosc Development Team", email = "blosc@blosc.org"}] license = "MIT" +license-files = ["LICENSE.txt", "LICENSES/*"] classifiers = [ "Development Status :: 6 - Mature", "Intended Audience :: Developers", From c59ee636e66461804640a571565706ef4cfb6f4e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 4 Aug 2025 12:59:38 +0000 Subject: [PATCH 057/166] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- numexpr/tests/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/numexpr/tests/conftest.py b/numexpr/tests/conftest.py index 4ffa831..3d32260 100644 --- a/numexpr/tests/conftest.py +++ b/numexpr/tests/conftest.py @@ -10,6 +10,7 @@ import pytest + def pytest_configure(config): config.addinivalue_line( "markers", "thread_unsafe: mark test as unsafe for parallel execution" From 659107cbeca6ce0a93603e1771b9de93fd1cef76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Mon, 4 Aug 2025 16:23:04 +0200 Subject: [PATCH 058/166] WHL: enable musllinux wheels --- .github/workflows/build.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 745ee30..e61e021 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,10 +7,6 @@ permissions: env: CIBW_BUILD_VERBOSITY: 1 - # Testing on aarch64 takes too long, as it is currently emulated on GitHub Actions - # Building for musllinux and aarch64 takes way too much time. - # Moreover, NumPy is not providing musllinux for x86_64 either, so it's not worth it. - CIBW_SKIP: "*musllinux*aarch64* *musllinux*x86_64*" jobs: build_wheels: @@ -30,16 +26,14 @@ jobs: # Linux x86_64 builds - os: ubuntu-latest arch: x86_64 - cibw_pattern: "*manylinux*" + cibw_pattern: "*" artifact_name: "linux-x86_64" # Linux ARM64 builds (native runners) - os: ubuntu-24.04-arm arch: aarch64 - cibw_pattern: "*manylinux*" + cibw_pattern: "*" artifact_name: "linux-aarch64" - # Don't use native runners for now (looks like wait times are too long) - #runs-on: ["ubuntu-latest", "arm64"] # Windows builds - os: windows-latest From 845ba6ffad6e139ecbf7ada1ba160bb7ac8de2fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Mon, 4 Aug 2025 18:43:25 +0200 Subject: [PATCH 059/166] CLN: cleanup unused option in wheel jobs --- .github/workflows/build.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e61e021..c4aee43 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,6 @@ jobs: permissions: contents: write env: - CIBW_BUILD: ${{ matrix.cibw_pattern }} CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading @@ -26,25 +25,21 @@ jobs: # Linux x86_64 builds - os: ubuntu-latest arch: x86_64 - cibw_pattern: "*" artifact_name: "linux-x86_64" # Linux ARM64 builds (native runners) - os: ubuntu-24.04-arm arch: aarch64 - cibw_pattern: "*" artifact_name: "linux-aarch64" # Windows builds - os: windows-latest arch: x86_64 - cibw_pattern: "*" artifact_name: "windows-x86_64" # macOS builds (universal2) - os: macos-latest arch: x86_64 - cibw_pattern: "*" artifact_name: "macos-universal2" steps: - uses: actions/checkout@v3 From 47550280e2e1741e7984c4a643ad3e464207227c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Robert?= Date: Tue, 5 Aug 2025 09:28:35 +0200 Subject: [PATCH 060/166] MNT: move build-verbosity option to pyproject.toml --- .github/workflows/build.yml | 3 --- pyproject.toml | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c4aee43..bfdd27d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,9 +5,6 @@ on: [push, pull_request] permissions: contents: read -env: - CIBW_BUILD_VERBOSITY: 1 - jobs: build_wheels: name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} diff --git a/pyproject.toml b/pyproject.toml index 4d3afd1..264a999 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ test = [ ] [tool.cibuildwheel] +build-verbosity = 1 skip = ["*-manylinux_i686", "*_ppc64le", "*_s390x"] # Let's use a more recent version of the manylinux image for more modern compilers manylinux-x86_64-image = "manylinux_2_28" From 4282cd439970179dbfc1480f82c02a64cb3e3930 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 11 Aug 2025 19:22:01 +0000 Subject: [PATCH 061/166] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/pre-commit-hooks: v5.0.0 → v6.0.0](https://github.com/pre-commit/pre-commit-hooks/compare/v5.0.0...v6.0.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5cda47c..97f6d37 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer From 54187d7fec914cb8892c9c279991c0a782539775 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 28 Aug 2025 13:56:40 +0200 Subject: [PATCH 062/166] Adding isnan/isfinite functions etc. --- numexpr/expressions.py | 5 +++ numexpr/functions.hpp | 26 +++++++++++++ numexpr/interp_body.cpp | 18 +++++++++ numexpr/interpreter.cpp | 59 ++++++++++++++++++++++++++++-- numexpr/interpreter.hpp | 16 ++++++++ numexpr/module.cpp | 5 ++- numexpr/msvc_function_stubs.hpp | 13 +++++++ numexpr/necompiler.py | 6 ++- numexpr/opcodes.hpp | 65 ++++++++++++++++++++------------- numexpr/tests/test_numexpr.py | 17 ++++++++- 10 files changed, 196 insertions(+), 34 deletions(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index 5924c5f..9540fe4 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -366,6 +366,9 @@ def multiply(x, y): 'complex': func(complex, 'complex'), 'conj': func(numpy.conj, 'complex'), + 'isnan': func(numpy.isnan, 'bool'), + 'isfinite': func(numpy.isfinite, 'bool'), + 'sum': gen_reduce_axis_func('sum'), 'prod': gen_reduce_axis_func('prod'), 'min': gen_reduce_axis_func('min'), @@ -521,4 +524,6 @@ class FuncNode(OpNode): def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) + if opcode in ("isnan", "isfinite"): # bodge for boolean return functions + kind = 'bool' OpNode.__init__(self, opcode, args, kind) diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index 78e03f4..6715406 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -86,6 +86,32 @@ FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #undef FUNC_DD #endif +// double -> boolean functions +#ifndef FUNC_BD +#define ELIDE_FUNC_BD +#define FUNC_BD(...) +#endif +FUNC_BD(FUNC_ISNAN_BD, "isnan_bd", isnan, vdIsnan) +FUNC_BD(FUNC_ISFINITE_BD, "isfinite_bd", isfinite, vdIsfinite) +FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BD +#undef ELIDE_FUNC_BD +#undef FUNC_BD +#endif + +// float -> boolean functions (C99 defines the same function for all types) +#ifndef FUNC_BF +#define ELIDE_FUNC_BF +#define FUNC_BF(...) +#endif // use wrappers as there is name collision with isnanf in std +FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_wrapper, isnanf2, vfIsnan) +FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_wrapper, isfinitef2, vfIsfinite) +FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BF +#undef ELIDE_FUNC_BF +#undef FUNC_BF +#endif + #ifndef FUNC_DDD #define ELIDE_FUNC_DDD #define FUNC_DDD(...) diff --git a/numexpr/interp_body.cpp b/numexpr/interp_body.cpp index 573ce8c..740dc34 100644 --- a/numexpr/interp_body.cpp +++ b/numexpr/interp_body.cpp @@ -451,6 +451,24 @@ case OP_COMPLEX_CDD: VEC_ARG2(cr_dest = d1; ci_dest = d2); + // Boolean return types + case OP_FUNC_BFN: +#ifdef USE_VML + VEC_ARG1_VML(functions_bf_vml[arg2](BLOCK_SIZE, + (float*)x1, (float*)dest)); +#else + VEC_ARG1(b_dest = functions_bf[arg2](f1)); +#endif + + + case OP_FUNC_BDN: +#ifdef USE_VML + VEC_ARG1_VML(functions_bd_vml[arg2](BLOCK_SIZE, + (double*)x1, (bool*)dest)); +#else + VEC_ARG1(b_dest = functions_bd[arg2](d1)); +#endif + /* Reductions */ case OP_SUM_IIN: VEC_ARG1(i_reduce += i1); case OP_SUM_LLN: VEC_ARG1(l_reduce += l1); diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index dbfcca1..e8f5797 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -204,6 +204,47 @@ FuncDDPtr functions_dd[] = { #undef FUNC_DD }; +// Boolean output functions - need no except due to std definition of isnan/isfinite as int(float) +typedef bool (*FuncBFPtr)(float) noexcept; +#ifdef _WIN32 +FuncBFPtr functions_bf[] = { +#define FUNC_BF(fop, s, f, f_win32, ...) f_win32, +#include "functions.hpp" +#undef FUNC_BF +}; +#else +FuncBFPtr functions_bf[] = { +#define FUNC_BF(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_BF +}; +#endif + +#ifdef USE_VML +typedef void (*FuncBFPtr_vml)(MKL_INT, const float*, bool*); +FuncBFPtr_vml functions_bf_vml[] = { +#define FUNC_BF(fop, s, f, f_win32, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_BF +}; +#endif + +typedef bool (*FuncBDPtr)(double); +FuncBDPtr functions_bd[] = { +#define FUNC_BD(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_BD +}; + +#ifdef USE_VML +typedef void (*FuncBDPtr_vml)(MKL_INT, const double*, bool*); +FuncBDPtr_vml functions_bd_vml[] = { +#define FUNC_BD(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_BD +}; +#endif + #ifdef USE_VML /* Fake vdConj function just for casting purposes inside numexpr */ static void vdConj(MKL_INT n, const double* x1, double* dest) @@ -312,11 +353,11 @@ FuncCCCPtr functions_ccc[] = { char get_return_sig(PyObject* program) -{ +{ // use unsigned chars to match OPCODE table and allow OPCODE > 127 int sig; - char last_opcode; + unsigned char last_opcode; Py_ssize_t end = PyBytes_Size(program); - char *program_str = PyBytes_AS_STRING(program); + unsigned char *program_str = (unsigned char *)PyBytes_AS_STRING(program); do { end -= 4; @@ -464,6 +505,18 @@ check_program(NumExprObject *self) PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); return -1; } + } + else if (op == OP_FUNC_BDN) { + if (arg < 0 || arg >= FUNC_BD_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op == OP_FUNC_BFN) { + if (arg < 0 || arg >= FUNC_BF_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } } else if (op >= OP_REDUCTION) { ; } else { diff --git a/numexpr/interpreter.hpp b/numexpr/interpreter.hpp index 93c6e49..f9c6a2f 100644 --- a/numexpr/interpreter.hpp +++ b/numexpr/interpreter.hpp @@ -3,6 +3,10 @@ #include "numexpr_config.hpp" +// Wrapper functions for float -> bool (since not defined in std) +inline bool isfinitef_wrapper(float x) noexcept { return std::isfinite(static_cast(x)); } +inline bool isnanf_wrapper(float x) noexcept { return std::isnan(static_cast(x)); } + // Forward declaration struct NumExprObject; @@ -18,6 +22,12 @@ enum FuncFFCodes { #undef FUNC_FF }; +enum FuncBFCodes { +#define FUNC_BF(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_BF +}; + enum FuncFFFCodes { #define FUNC_FFF(fop, ...) fop, #include "functions.hpp" @@ -30,6 +40,12 @@ enum FuncDDCodes { #undef FUNC_DD }; +enum FuncBDCodes { +#define FUNC_BD(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_BD +}; + enum FuncDDDCodes { #define FUNC_DDD(fop, ...) fop, #include "functions.hpp" diff --git a/numexpr/module.cpp b/numexpr/module.cpp index e7d6ded..95f810c 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -506,6 +506,8 @@ PyInit_interpreter(void) { #define FUNC_FF(name, sname, ...) add_func(name, sname); #define FUNC_FFF(name, sname, ...) add_func(name, sname); #define FUNC_DD(name, sname, ...) add_func(name, sname); +#define FUNC_BF(name, sname, ...) add_func(name, sname); +#define FUNC_BD(name, sname, ...) add_func(name, sname); #define FUNC_DDD(name, sname, ...) add_func(name, sname); #define FUNC_CC(name, sname, ...) add_func(name, sname); #define FUNC_CCC(name, sname, ...) add_func(name, sname); @@ -513,7 +515,8 @@ PyInit_interpreter(void) { #undef FUNC_CCC #undef FUNC_CC #undef FUNC_DDD -#undef FUNC_DD +#undef FUNC_BD +#undef FUNC_BF #undef FUNC_DD #undef FUNC_FFF #undef FUNC_FF diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 0c28f22..92edb6f 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -40,6 +40,10 @@ #define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) #define ceilf(x) ((float)ceil((double)(x))) +// Boolean output functions +#define isnanf(x) (isnan((double)(x))) +#define isfinitef(x) (isfinite((double)(x))) + /* The next are directly called from interp_body.cpp */ #define powf(x, y) ((float)pow((double)(x), (double)(y))) #define floorf(x) ((float)floor((double)(x))) @@ -133,6 +137,15 @@ inline float atan2f2(float x, float y) { return atan2f(x, y); } +// Boolean output functions +inline bool isnanf2(float x) { + return isnanf(x); +} + +inline bool isfinitef2(float x) { + return isfinitef(x); +} + // Needed for allowing the internal casting in numexpr machinery for // conjugate operations inline float fconjf2(float x) { diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 537f816..036008b 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -69,7 +69,9 @@ "arctan2", "fmod", "ceil", - "floor" + "floor", + "isnan", + "isfinite" ] @@ -520,7 +522,7 @@ def nToChr(reg): return bytes([reg.n]) def quadrupleToString(opcode, store, a1=None, a2=None): - cop = chr(interpreter.opcodes[opcode]).encode('ascii') + cop = chr(interpreter.opcodes[opcode]).encode('latin_1') cs = nToChr(store) ca1 = nToChr(a1) ca2 = nToChr(a2) diff --git a/numexpr/opcodes.hpp b/numexpr/opcodes.hpp index 086c98e..bfd8487 100644 --- a/numexpr/opcodes.hpp +++ b/numexpr/opcodes.hpp @@ -15,6 +15,9 @@ OPCODE(n, enum_name, exported, return_type, arg1_type, arg2_type, arg3_type) Types are Tb, Ti, Tl, Tf, Td, Tc, Ts, Tn, and T0; these symbols should be #defined to whatever is needed. (T0 is the no-such-arg type.) +When adding new OPCODES, one has to respect the order of the numeration, as +there are parts of the code (iterations) which assume that the OPCODES are ordered. + */ OPCODE(0, OP_NOOP, "noop", T0, T0, T0, T0) @@ -144,36 +147,46 @@ OPCODE(103, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) OPCODE(104, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) OPCODE(105, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) +//Boolean outputs +OPCODE(106, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) +OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) -OPCODE(106, OP_REDUCTION, NULL, T0, T0, T0, T0) +// Reductions always have to be at the end - parts of the code +// use > OP_REDUCTION to decide whether operation is a reduction +OPCODE(108, OP_REDUCTION, NULL, T0, T0, T0, T0) /* Last argument in a reduction is the axis of the array the reduction should be applied along. */ -OPCODE(107, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) -OPCODE(108, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) -OPCODE(109, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) -OPCODE(110, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) -OPCODE(111, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) - -OPCODE(112, OP_PROD, NULL, T0, T0, T0, T0) -OPCODE(113, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) -OPCODE(114, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) -OPCODE(115, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) -OPCODE(116, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) -OPCODE(117, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) - -OPCODE(118, OP_MIN, NULL, T0, T0, T0, T0) -OPCODE(119, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) -OPCODE(120, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) -OPCODE(121, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) -OPCODE(122, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) - -OPCODE(123, OP_MAX, NULL, T0, T0, T0, T0) -OPCODE(124, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) -OPCODE(125, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) -OPCODE(126, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) -OPCODE(127, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) +OPCODE(109, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) +OPCODE(110, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) +OPCODE(111, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) +OPCODE(112, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) +OPCODE(113, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) + +OPCODE(114, OP_PROD, NULL, T0, T0, T0, T0) +OPCODE(115, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) +OPCODE(116, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) +OPCODE(117, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) +OPCODE(118, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) +OPCODE(119, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) + +OPCODE(120, OP_MIN, NULL, T0, T0, T0, T0) +OPCODE(121, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) +OPCODE(122, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) +OPCODE(123, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) +OPCODE(124, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) + +OPCODE(125, OP_MAX, NULL, T0, T0, T0, T0) +OPCODE(126, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) +OPCODE(127, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) +OPCODE(128, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) +OPCODE(129, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) +/* +When we get to 255, will maybe have to change code again +(change latin_1 encoding in necompiler.py, use something +other than unsigned char for OPCODE table) +*/ /* Should be the last opcode */ -OPCODE(128, OP_END, NULL, T0, T0, T0, T0) +OPCODE(130, OP_END, NULL, T0, T0, T0, T0) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index d409165..df294ea 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -683,7 +683,6 @@ def test_negative_power_scalar(self): out_l = evaluate('base ** -1.0') assert_equal(out_l, np.power(base, -1.0)) - def test_ex_uses_vml(self): vml_funcs = [ "sin", "cos", "tan", "arcsin", "arccos", "arctan", "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh", @@ -694,6 +693,21 @@ def test_ex_uses_vml(self): _, ex_uses_vml = numexpr.necompiler.getExprNames(strexpr, {}) assert_equal(ex_uses_vml, use_vml, strexpr) + def test_bool_funcs(self): + # Test functions with boolean outputs + array_size = 100 + dtype = np.float32 + a = np.arange(2 * array_size, dtype=dtype) + a[array_size//2] = np.nan + a[array_size//3] = np.inf + + assert np.all(evaluate("isnan(a)") == np.isnan(a)) + assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) + a = a.astype(np.float64) + assert a.dtype == np.float64 + assert np.all(evaluate("isnan(a)") == np.isnan(a)) + assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) + if 'sparc' not in platform.machine(): # Execution order set here so as to not use too many threads # during the rest of the execution. See #33 for details. @@ -892,7 +906,6 @@ def test_expressions( npval={npval!r} ({type(npval)!r} - {shape(npval)!r}) neval={neval!r} ({type(neval)!r} - {shape(neval)!r}))""" - class test_int64(TestCase): def test_neg(self): a = array([2 ** 31 - 1, 2 ** 31, 2 ** 32, 2 ** 63 - 1], dtype=int64) From 03eba331378e01362b46d31e9987f00a2a484e1d Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 28 Aug 2025 14:33:20 +0200 Subject: [PATCH 063/166] Add CXXFLAGS='-std=c++17' to allow noexcept in function pointer typedefs --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bfdd27d..3ec137f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,6 +15,7 @@ jobs: CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading + CXXFLAGS: "-std=c++17" # allow noexcept in function pointer typedefs strategy: fail-fast: false matrix: From 01c254993efc10f3839f7e9e0a0150bd20c7919d Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 28 Aug 2025 17:13:07 +0200 Subject: [PATCH 064/166] Fixes for Windows --- numexpr/functions.hpp | 8 ++++---- numexpr/interpreter.cpp | 4 ++-- numexpr/interpreter.hpp | 4 ---- numexpr/msvc_function_stubs.hpp | 19 ++++++++++++------- numexpr/numexpr_config.hpp | 9 +++++++++ 5 files changed, 27 insertions(+), 17 deletions(-) diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index 6715406..ca5ed75 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -91,8 +91,8 @@ FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #define ELIDE_FUNC_BD #define FUNC_BD(...) #endif -FUNC_BD(FUNC_ISNAN_BD, "isnan_bd", isnan, vdIsnan) -FUNC_BD(FUNC_ISFINITE_BD, "isfinite_bd", isfinite, vdIsfinite) +FUNC_BD(FUNC_ISNAN_BD, "isnan_bd", isnand, vdIsnan) +FUNC_BD(FUNC_ISFINITE_BD, "isfinite_bd", isfinited, vdIsfinite) FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_BD #undef ELIDE_FUNC_BD @@ -104,8 +104,8 @@ FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) #define ELIDE_FUNC_BF #define FUNC_BF(...) #endif // use wrappers as there is name collision with isnanf in std -FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_wrapper, isnanf2, vfIsnan) -FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_wrapper, isfinitef2, vfIsfinite) +FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_, isnanf2, vfIsnan) +FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_, isfinitef2, vfIsfinite) FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_BF #undef ELIDE_FUNC_BF diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index e8f5797..d3c811b 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -204,8 +204,8 @@ FuncDDPtr functions_dd[] = { #undef FUNC_DD }; -// Boolean output functions - need no except due to std definition of isnan/isfinite as int(float) -typedef bool (*FuncBFPtr)(float) noexcept; +// Boolean output functions +typedef bool (*FuncBFPtr)(float); #ifdef _WIN32 FuncBFPtr functions_bf[] = { #define FUNC_BF(fop, s, f, f_win32, ...) f_win32, diff --git a/numexpr/interpreter.hpp b/numexpr/interpreter.hpp index f9c6a2f..6a3448a 100644 --- a/numexpr/interpreter.hpp +++ b/numexpr/interpreter.hpp @@ -3,10 +3,6 @@ #include "numexpr_config.hpp" -// Wrapper functions for float -> bool (since not defined in std) -inline bool isfinitef_wrapper(float x) noexcept { return std::isfinite(static_cast(x)); } -inline bool isnanf_wrapper(float x) noexcept { return std::isnan(static_cast(x)); } - // Forward declaration struct NumExprObject; diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 92edb6f..1b95cb4 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -1,3 +1,5 @@ +#include // for _finite, _isnan on MSVC + #ifndef NUMEXPR_MSVC_FUNCTION_STUBS_HPP #define NUMEXPR_MSVC_FUNCTION_STUBS_HPP @@ -40,15 +42,18 @@ #define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) #define ceilf(x) ((float)ceil((double)(x))) -// Boolean output functions -#define isnanf(x) (isnan((double)(x))) -#define isfinitef(x) (isfinite((double)(x))) - /* The next are directly called from interp_body.cpp */ #define powf(x, y) ((float)pow((double)(x), (double)(y))) #define floorf(x) ((float)floor((double)(x))) +#endif // _MSC_VER < 1400 -#endif // _MSC_VER < 1400 +/* Due to casting problems (normally return ints not bools, easiest to define +non-overloaded wrappers for these functions) */ +// MSVC version: use global ::isfinite / ::isnan +inline bool isfinitef_(float x) { return !!::_finite(x); } // MSVC has _finite +inline bool isnanf_(float x) { return !!::_isnan(x); } // MSVC has _isnan +inline bool isfinited(double x) { return !!::_finite(x); } +inline bool isnand(double x) { return !!::_isnan(x); } /* Now the actual stubs */ @@ -139,11 +144,11 @@ inline float atan2f2(float x, float y) { // Boolean output functions inline bool isnanf2(float x) { - return isnanf(x); + return isnanf_(x); } inline bool isfinitef2(float x) { - return isfinitef(x); + return isfinitef_(x); } // Needed for allowing the internal casting in numexpr machinery for diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 2bf0091..335d96d 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -46,6 +46,15 @@ #include "missing_posix_functions.hpp" #endif #include "msvc_function_stubs.hpp" +#else +/* GCC/Clang version: use std:: (can't use it for windows) +msvc_function_stubs contains windows alternatives +/* Due to casting problems (normally return ints not bools, easiest to define +non-overloaded wrappers for these functions) */ +inline bool isfinitef_(float x) { return std::isfinite(x); } +inline bool isnanf_(float x) { return std::isnan(x); } +inline bool isfinited(double x) { return std::isfinite(x); } +inline bool isnand(double x) { return std::isnan(x); } #endif #endif // NUMEXPR_CONFIG_HPP From 69f657630c64f5f4240446a4eed30ad96fce646f Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 28 Aug 2025 17:39:28 +0200 Subject: [PATCH 065/166] Fix for macos --- .github/workflows/build.yml | 1 - numexpr/numexpr_config.hpp | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3ec137f..bfdd27d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,7 +15,6 @@ jobs: CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading - CXXFLAGS: "-std=c++17" # allow noexcept in function pointer typedefs strategy: fail-fast: false matrix: diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 335d96d..4c2b612 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -51,10 +51,10 @@ msvc_function_stubs contains windows alternatives /* Due to casting problems (normally return ints not bools, easiest to define non-overloaded wrappers for these functions) */ -inline bool isfinitef_(float x) { return std::isfinite(x); } -inline bool isnanf_(float x) { return std::isnan(x); } -inline bool isfinited(double x) { return std::isfinite(x); } -inline bool isnand(double x) { return std::isnan(x); } +inline bool isfinitef_(float x) { return !!::finite(x); } +inline bool isnanf_(float x) { return !!::isnan(x); } +inline bool isfinited(double x) { return !!::finite(x); } +inline bool isnand(double x) { return !!::isnan(x); } #endif #endif // NUMEXPR_CONFIG_HPP From 98306874999bae947861104530c598029962c163 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 28 Aug 2025 17:51:30 +0200 Subject: [PATCH 066/166] Small typo --- numexpr/numexpr_config.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 4c2b612..3269a06 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -51,9 +51,9 @@ msvc_function_stubs contains windows alternatives /* Due to casting problems (normally return ints not bools, easiest to define non-overloaded wrappers for these functions) */ -inline bool isfinitef_(float x) { return !!::finite(x); } +inline bool isfinitef_(float x) { return !!::isfinite(x); } inline bool isnanf_(float x) { return !!::isnan(x); } -inline bool isfinited(double x) { return !!::finite(x); } +inline bool isfinited(double x) { return !!::isfinite(x); } inline bool isnand(double x) { return !!::isnan(x); } #endif From 074b16229e7d63cc28ae5be7acf3361e09d9afc2 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 29 Aug 2025 09:42:18 +0200 Subject: [PATCH 067/166] Fixes for VML, document adding functions --- ADDFUNCS.rst | 212 ++++++++++++++++++++++++++++++++++++++++ numexpr/functions.hpp | 2 +- numexpr/interp_body.cpp | 2 +- 3 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 ADDFUNCS.rst diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst new file mode 100644 index 0000000..aab1a8e --- /dev/null +++ b/ADDFUNCS.rst @@ -0,0 +1,212 @@ +Functions and Function signatures +================================= + +Adding functions +---------------- + +In order to add new functions to ``numexpr``, currently it is necessary to edit several files. Consider adding a function +``out_type myfunc(arg_type)``. + +* ``numexpr/expressions.py`` +Add ``'myfunc': func(numpy.myfunc, out_dtype),`` to the dict of functions, ``functions = {...``. If the return type of the function is ``bool``, add +the function to the list ``if opcode in ("isnan", "isfinite"):`` in the ``__init__`` function of the ``FuncNode`` class. +In the future it might be nice to refactor this function since it sets the output type based on the type of the inputs in general. + +* ``numexpr/necompiler.py`` +Add ``"myfunc"`` to the list of functions: +``` +"floor", +"isnan", +"isfinite", +"myfunc" +] +``` + +* ``numexpr/functions.hpp`` +Find the correct function signature ``FUNC_OA`` where ``O`` is the return type, and ``A`` the argument type(s). For example, if the function +is ``double myfunc(double)``, one should edit within the ``FUNC_DD`` clause. If you cannot find your function signature you will have to add it, +following the template of the other functions. +Most likely, you will want to add support for several function signatures (e.g. double -> bool and float -> bool) and so you will have to add the +function in two clauses. If your function has a float input, you will see that there are 5 arguments in the +``FUNC_OA`` macro, and you will have to add ``myfunc2`` here is order to compile on MSVC machines (i.e. Windows, see following). +Example: +``` +#ifndef FUNC_DD +#define ELIDE_FUNC_DD +#define FUNC_DD(...) +#endif +... +FUNC_DD(FUNC_MYFUNC_DD, "myfunc_dd", myfunc, vdMyfunc) <--------------------- insert your function +FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_DD +#undef ELIDE_FUNC_DD +#undef FUNC_DD +#endif + +... + +#ifndef FUNC_FF +#define ELIDE_FUNC_FF +#define FUNC_FF(...) +#endif +... +FUNC_FF(FUNC_MYFUNC_FF, "myfunc_ff", myfuncf, myfuncf2, vfMyfunc) <--------------------- insert your function +FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_FF +#undef ELIDE_FUNC_FF +#undef FUNC_FF +#endif +``` + +* ``numexpr/msvc_function_stubs.hpp`` +In order to support float arguments, due to oddities of MSVC, you have to provide explicit support for your function in this file. +Add ``#define myfuncf(x) ((float)floor((double)(x)))`` (if your function is float -> float) to the ``#if`` clause at the top of the file +which is for old versions of MSVC which did not have support for single precision fucntions. Then in the body, add an inline function +``` +inline float myfuncf2(float x) { + return myfuncf(x); +} +``` +This is the function that appears as the ``f_win32`` parameter in ``functions.hpp``. + +* ``numexpr/tests/test_numexpr.py`` +Don't forget to add a test for your function! + +Adding function signatures +-------------------------- +It may so happen that you cannot find your desired function signature in ``functions.hpp``. This means you will have to add it yourself! +This involves editing a few more files. In addition, there may be certain bespoke changes, specific to the function signature +that you may have to make (see Notes, below) + +* ``numexpr/functions.hpp`` +Firstly, add clause(s) for your function signature. For example, if the function signature is ``bool(double)`` and ``bool(float)``, add +``FUNC_BD`` and ``FUNC_BF`` clauses (in the latter case you will need the macro to take 5 arguments for MSVC-compatibility.) +``` +#ifndef FUNC_BD +#define ELIDE_FUNC_BD +#define FUNC_BD(...) +#endif +... +FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BD +#undef ELIDE_FUNC_BD +#undef FUNC_BD +#endif + +#ifndef FUNC_BF +#define ELIDE_FUNC_BF +#define FUNC_BF(...) +#endif +... +FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BF +#undef ELIDE_FUNC_BF +#undef FUNC_BF +#endif +``` +The ultimate source of the functions in the macro ``FUNC_BF(...)`` are the headers included in ``numexpr/interpreter.cpp`` (in particular +``numexpr/numexpr_config.hpp``, which can be used to overwrite ```` functions), so the functions should be available from there. + +* ``numexpr/interp_body.cpp`` +Add case support for OPCODES associated to your new function signatures via e.g. ``case OP_FUNC_BFN`` and ``case OP_FUNC_BDN``, following +the framework suggested by the other functions: +``` +case OP_FUNC_BFN: +#ifdef USE_VML + VEC_ARG1_VML(functions_bf_vml[arg2](BLOCK_SIZE, + (float*)x1, (bool*)dest)); +#else + VEC_ARG1(b_dest = functions_bf[arg2](f1)); +#endif +``` +Note that it is important that the out variable matches the output type of the function (i.e. ``b_dest`` for bool, ``f_dest`` for float etc.) + +* ``numexpr/interpreter.hpp`` +Add clauses to read the ``functions.hpp`` macros correctly +``` +enum FuncBFCodes { +#define FUNC_BF(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_BF +}; +``` + +* ``numexpr/interpreter.cpp`` +Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making sure to include clauses for ``_WIN32`` and +``VML`` as necessary accoridng to the framework suggested by the other functions. +``` +typedef bool (*FuncBFPtr)(float); +#ifdef _WIN32 +FuncBFPtr functions_bf[] = { +#define FUNC_BF(fop, s, f, f_win32, ...) f_win32, +#include "functions.hpp" +#undef FUNC_BF +}; +#else +FuncBFPtr functions_bf[] = { +#define FUNC_BF(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_BF +}; +#endif + +#ifdef USE_VML +typedef void (*FuncBFPtr_vml)(MKL_INT, const float*, bool*); +FuncBFPtr_vml functions_bf_vml[] = { +#define FUNC_BF(fop, s, f, f_win32, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_BF +}; +#endif +``` + +Add case handling to the ``check_program`` function +``` +else if (op == OP_FUNC_BDN) { + if (arg < 0 || arg >= FUNC_BD_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } +} +else if (op == OP_FUNC_BFN) { + if (arg < 0 || arg >= FUNC_BF_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } +} +``` + +* ``numexpr/module.cpp`` +Add code here to define the ``FUNC_OA`` macros you require +``` +#define FUNC_BF(name, sname, ...) add_func(name, sname); +#define FUNC_BD(name, sname, ...) add_func(name, sname); +... +#include "functions.hpp" +... +#undef FUNC_BD +#undef FUNC_BF +``` + +* ``numexpr/opcodes.hpp`` +Finally, add the ``OP_FUNC_BDN`` etc. codes here. It is necessary for the OPCODES in the file to be in (ascending order) with +``NOOP`` as 0 and ``OP_LAST`` as the largest number. Secondly, all reduction OPCODES must appear last. Hence, after adding your +function signatures (just before the reduction OPCODES) it is necessary to increment all succeeding OPCODES. +``` +OPCODE(106, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) +OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) +``` + +Notes +----- +In many cases this process will not be very smooth since one relies on the internal C/C++ standard functions (which can be fussy, +to varying degrees on different platforms). Some common gotchas are then: +* OPCODES are currently only supported up to 255 - if it becomes necessary to increment further, one will have to change the ``latin_1`` +encoding used in ``quadrupleToString`` in ``necompiler.py``. In addition, since the OPCDE table is assumed to be of type ``unsigned char`` +the ``get_return_sig`` function in ``numexpr/interpreter.cpp`` may have to be changed (possibly other changes too). +* Depending on the new function signature (above all if the out type is different to the input types), one may have to edit the ``__init__`` +function in the ``FuncNode`` class in ``expressions.py``. +* Depending on MSVC support, namespace clashes, casting problems, it may be necessary to make various changes to ``numexpr/numexpr_config.hpp`` +and ``numexpr/msvc_function_stubs.hpp``. For example, in PR #523, non-clashing wrappers were introduced for ``isnan`` and ``isfinite`` since +the float versions ``isnanf, isfinitef`` were inconsistently defined (and output ints) - depending on how strict the platform interpreter is, the implicit cast +from int to bool was acceptable or not for example. In addition, the base functions were in different namespaces or had different names across platforms. \ No newline at end of file diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index ca5ed75..fa766d5 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -106,7 +106,7 @@ FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) #endif // use wrappers as there is name collision with isnanf in std FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_, isnanf2, vfIsnan) FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_, isfinitef2, vfIsfinite) -FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL) +FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_BF #undef ELIDE_FUNC_BF #undef FUNC_BF diff --git a/numexpr/interp_body.cpp b/numexpr/interp_body.cpp index 740dc34..297d48d 100644 --- a/numexpr/interp_body.cpp +++ b/numexpr/interp_body.cpp @@ -455,7 +455,7 @@ case OP_FUNC_BFN: #ifdef USE_VML VEC_ARG1_VML(functions_bf_vml[arg2](BLOCK_SIZE, - (float*)x1, (float*)dest)); + (float*)x1, (bool*)dest)); #else VEC_ARG1(b_dest = functions_bf[arg2](f1)); #endif From be2ea71ec9a79734c577a91675f325cf94f10c5d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 07:43:11 +0000 Subject: [PATCH 068/166] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ADDFUNCS.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index aab1a8e..586c866 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -4,12 +4,12 @@ Functions and Function signatures Adding functions ---------------- -In order to add new functions to ``numexpr``, currently it is necessary to edit several files. Consider adding a function +In order to add new functions to ``numexpr``, currently it is necessary to edit several files. Consider adding a function ``out_type myfunc(arg_type)``. * ``numexpr/expressions.py`` -Add ``'myfunc': func(numpy.myfunc, out_dtype),`` to the dict of functions, ``functions = {...``. If the return type of the function is ``bool``, add -the function to the list ``if opcode in ("isnan", "isfinite"):`` in the ``__init__`` function of the ``FuncNode`` class. +Add ``'myfunc': func(numpy.myfunc, out_dtype),`` to the dict of functions, ``functions = {...``. If the return type of the function is ``bool``, add +the function to the list ``if opcode in ("isnan", "isfinite"):`` in the ``__init__`` function of the ``FuncNode`` class. In the future it might be nice to refactor this function since it sets the output type based on the type of the inputs in general. * ``numexpr/necompiler.py`` @@ -25,11 +25,11 @@ Add ``"myfunc"`` to the list of functions: * ``numexpr/functions.hpp`` Find the correct function signature ``FUNC_OA`` where ``O`` is the return type, and ``A`` the argument type(s). For example, if the function is ``double myfunc(double)``, one should edit within the ``FUNC_DD`` clause. If you cannot find your function signature you will have to add it, -following the template of the other functions. -Most likely, you will want to add support for several function signatures (e.g. double -> bool and float -> bool) and so you will have to add the +following the template of the other functions. +Most likely, you will want to add support for several function signatures (e.g. double -> bool and float -> bool) and so you will have to add the function in two clauses. If your function has a float input, you will see that there are 5 arguments in the ``FUNC_OA`` macro, and you will have to add ``myfunc2`` here is order to compile on MSVC machines (i.e. Windows, see following). -Example: +Example: ``` #ifndef FUNC_DD #define ELIDE_FUNC_DD @@ -75,7 +75,7 @@ Don't forget to add a test for your function! Adding function signatures -------------------------- It may so happen that you cannot find your desired function signature in ``functions.hpp``. This means you will have to add it yourself! -This involves editing a few more files. In addition, there may be certain bespoke changes, specific to the function signature +This involves editing a few more files. In addition, there may be certain bespoke changes, specific to the function signature that you may have to make (see Notes, below) * ``numexpr/functions.hpp`` @@ -132,7 +132,7 @@ enum FuncBFCodes { ``` * ``numexpr/interpreter.cpp`` -Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making sure to include clauses for ``_WIN32`` and +Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making sure to include clauses for ``_WIN32`` and ``VML`` as necessary accoridng to the framework suggested by the other functions. ``` typedef bool (*FuncBFPtr)(float); @@ -189,7 +189,7 @@ Add code here to define the ``FUNC_OA`` macros you require ``` * ``numexpr/opcodes.hpp`` -Finally, add the ``OP_FUNC_BDN`` etc. codes here. It is necessary for the OPCODES in the file to be in (ascending order) with +Finally, add the ``OP_FUNC_BDN`` etc. codes here. It is necessary for the OPCODES in the file to be in (ascending order) with ``NOOP`` as 0 and ``OP_LAST`` as the largest number. Secondly, all reduction OPCODES must appear last. Hence, after adding your function signatures (just before the reduction OPCODES) it is necessary to increment all succeeding OPCODES. ``` @@ -199,14 +199,14 @@ OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) Notes ----- -In many cases this process will not be very smooth since one relies on the internal C/C++ standard functions (which can be fussy, +In many cases this process will not be very smooth since one relies on the internal C/C++ standard functions (which can be fussy, to varying degrees on different platforms). Some common gotchas are then: -* OPCODES are currently only supported up to 255 - if it becomes necessary to increment further, one will have to change the ``latin_1`` +* OPCODES are currently only supported up to 255 - if it becomes necessary to increment further, one will have to change the ``latin_1`` encoding used in ``quadrupleToString`` in ``necompiler.py``. In addition, since the OPCDE table is assumed to be of type ``unsigned char`` the ``get_return_sig`` function in ``numexpr/interpreter.cpp`` may have to be changed (possibly other changes too). -* Depending on the new function signature (above all if the out type is different to the input types), one may have to edit the ``__init__`` +* Depending on the new function signature (above all if the out type is different to the input types), one may have to edit the ``__init__`` function in the ``FuncNode`` class in ``expressions.py``. * Depending on MSVC support, namespace clashes, casting problems, it may be necessary to make various changes to ``numexpr/numexpr_config.hpp`` and ``numexpr/msvc_function_stubs.hpp``. For example, in PR #523, non-clashing wrappers were introduced for ``isnan`` and ``isfinite`` since the float versions ``isnanf, isfinitef`` were inconsistently defined (and output ints) - depending on how strict the platform interpreter is, the implicit cast -from int to bool was acceptable or not for example. In addition, the base functions were in different namespaces or had different names across platforms. \ No newline at end of file +from int to bool was acceptable or not for example. In addition, the base functions were in different namespaces or had different names across platforms. From 721d4e3f378093b82b1302b9680e0c1753e98b8a Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 29 Aug 2025 10:03:06 +0200 Subject: [PATCH 069/166] Fix ADDFUNCS.rst formatting --- ADDFUNCS.rst | 250 +++++++++++++++++++++++++-------------------------- 1 file changed, 122 insertions(+), 128 deletions(-) diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index 586c866..45e63cd 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -14,13 +14,12 @@ In the future it might be nice to refactor this function since it sets the outpu * ``numexpr/necompiler.py`` Add ``"myfunc"`` to the list of functions: -``` -"floor", -"isnan", -"isfinite", -"myfunc" -] -``` +.. code-block:: python3 + "floor", + "isnan", + "isfinite", + "myfunc" + ] * ``numexpr/functions.hpp`` Find the correct function signature ``FUNC_OA`` where ``O`` is the return type, and ``A`` the argument type(s). For example, if the function @@ -30,43 +29,43 @@ Most likely, you will want to add support for several function signatures (e.g. function in two clauses. If your function has a float input, you will see that there are 5 arguments in the ``FUNC_OA`` macro, and you will have to add ``myfunc2`` here is order to compile on MSVC machines (i.e. Windows, see following). Example: -``` -#ifndef FUNC_DD -#define ELIDE_FUNC_DD -#define FUNC_DD(...) -#endif -... -FUNC_DD(FUNC_MYFUNC_DD, "myfunc_dd", myfunc, vdMyfunc) <--------------------- insert your function -FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) -#ifdef ELIDE_FUNC_DD -#undef ELIDE_FUNC_DD -#undef FUNC_DD -#endif - -... - -#ifndef FUNC_FF -#define ELIDE_FUNC_FF -#define FUNC_FF(...) -#endif -... -FUNC_FF(FUNC_MYFUNC_FF, "myfunc_ff", myfuncf, myfuncf2, vfMyfunc) <--------------------- insert your function -FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) -#ifdef ELIDE_FUNC_FF -#undef ELIDE_FUNC_FF -#undef FUNC_FF -#endif -``` +.. code-block:: cpp + :emphasize-lines: 6, 20 + #ifndef FUNC_DD + #define ELIDE_FUNC_DD + #define FUNC_DD(...) + #endif + ... + FUNC_DD(FUNC_MYFUNC_DD, "myfunc_dd", myfunc, vdMyfunc) + FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) + #ifdef ELIDE_FUNC_DD + #undef ELIDE_FUNC_DD + #undef FUNC_DD + #endif + + ... + + #ifndef FUNC_FF + #define ELIDE_FUNC_FF + #define FUNC_FF(...) + #endif + ... + FUNC_FF(FUNC_MYFUNC_FF, "myfunc_ff", myfuncf, myfuncf2, vfMyfunc) + FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) + #ifdef ELIDE_FUNC_FF + #undef ELIDE_FUNC_FF + #undef FUNC_FF + #endif * ``numexpr/msvc_function_stubs.hpp`` In order to support float arguments, due to oddities of MSVC, you have to provide explicit support for your function in this file. Add ``#define myfuncf(x) ((float)floor((double)(x)))`` (if your function is float -> float) to the ``#if`` clause at the top of the file which is for old versions of MSVC which did not have support for single precision fucntions. Then in the body, add an inline function -``` -inline float myfuncf2(float x) { - return myfuncf(x); -} -``` +.. code-block:: cpp + inline float myfuncf2(float x) { + return myfuncf(x); + } + This is the function that appears as the ``f_win32`` parameter in ``functions.hpp``. * ``numexpr/tests/test_numexpr.py`` @@ -81,121 +80,116 @@ that you may have to make (see Notes, below) * ``numexpr/functions.hpp`` Firstly, add clause(s) for your function signature. For example, if the function signature is ``bool(double)`` and ``bool(float)``, add ``FUNC_BD`` and ``FUNC_BF`` clauses (in the latter case you will need the macro to take 5 arguments for MSVC-compatibility.) -``` -#ifndef FUNC_BD -#define ELIDE_FUNC_BD -#define FUNC_BD(...) -#endif -... -FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) -#ifdef ELIDE_FUNC_BD -#undef ELIDE_FUNC_BD -#undef FUNC_BD -#endif - -#ifndef FUNC_BF -#define ELIDE_FUNC_BF -#define FUNC_BF(...) -#endif -... -FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) -#ifdef ELIDE_FUNC_BF -#undef ELIDE_FUNC_BF -#undef FUNC_BF -#endif -``` +.. code-block:: cpp + #ifndef FUNC_BD + #define ELIDE_FUNC_BD + #define FUNC_BD(...) + #endif + ... + FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) + #ifdef ELIDE_FUNC_BD + #undef ELIDE_FUNC_BD + #undef FUNC_BD + #endif + + #ifndef FUNC_BF + #define ELIDE_FUNC_BF + #define FUNC_BF(...) + #endif + ... + FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) + #ifdef ELIDE_FUNC_BF + #undef ELIDE_FUNC_BF + #undef FUNC_BF + #endif + The ultimate source of the functions in the macro ``FUNC_BF(...)`` are the headers included in ``numexpr/interpreter.cpp`` (in particular ``numexpr/numexpr_config.hpp``, which can be used to overwrite ```` functions), so the functions should be available from there. * ``numexpr/interp_body.cpp`` Add case support for OPCODES associated to your new function signatures via e.g. ``case OP_FUNC_BFN`` and ``case OP_FUNC_BDN``, following the framework suggested by the other functions: -``` -case OP_FUNC_BFN: -#ifdef USE_VML - VEC_ARG1_VML(functions_bf_vml[arg2](BLOCK_SIZE, - (float*)x1, (bool*)dest)); -#else - VEC_ARG1(b_dest = functions_bf[arg2](f1)); -#endif -``` +.. code-block:: cpp + case OP_FUNC_BFN: + #ifdef USE_VML + VEC_ARG1_VML(functions_bf_vml[arg2](BLOCK_SIZE, + (float*)x1, (bool*)dest)); + #else + VEC_ARG1(b_dest = functions_bf[arg2](f1)); + #endif + Note that it is important that the out variable matches the output type of the function (i.e. ``b_dest`` for bool, ``f_dest`` for float etc.) * ``numexpr/interpreter.hpp`` Add clauses to read the ``functions.hpp`` macros correctly -``` -enum FuncBFCodes { -#define FUNC_BF(fop, ...) fop, -#include "functions.hpp" -#undef FUNC_BF -}; -``` +.. code-block:: cpp + enum FuncBFCodes { + #define FUNC_BF(fop, ...) fop, + #include "functions.hpp" + #undef FUNC_BF + }; * ``numexpr/interpreter.cpp`` Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making sure to include clauses for ``_WIN32`` and ``VML`` as necessary accoridng to the framework suggested by the other functions. -``` -typedef bool (*FuncBFPtr)(float); -#ifdef _WIN32 -FuncBFPtr functions_bf[] = { -#define FUNC_BF(fop, s, f, f_win32, ...) f_win32, -#include "functions.hpp" -#undef FUNC_BF -}; -#else -FuncBFPtr functions_bf[] = { -#define FUNC_BF(fop, s, f, ...) f, -#include "functions.hpp" -#undef FUNC_BF -}; -#endif - -#ifdef USE_VML -typedef void (*FuncBFPtr_vml)(MKL_INT, const float*, bool*); -FuncBFPtr_vml functions_bf_vml[] = { -#define FUNC_BF(fop, s, f, f_win32, f_vml) f_vml, -#include "functions.hpp" -#undef FUNC_BF -}; -#endif -``` +.. code-block:: cpp + typedef bool (*FuncBFPtr)(float); + #ifdef _WIN32 + FuncBFPtr functions_bf[] = { + #define FUNC_BF(fop, s, f, f_win32, ...) f_win32, + #include "functions.hpp" + #undef FUNC_BF + }; + #else + FuncBFPtr functions_bf[] = { + #define FUNC_BF(fop, s, f, ...) f, + #include "functions.hpp" + #undef FUNC_BF + }; + #endif + + #ifdef USE_VML + typedef void (*FuncBFPtr_vml)(MKL_INT, const float*, bool*); + FuncBFPtr_vml functions_bf_vml[] = { + #define FUNC_BF(fop, s, f, f_win32, f_vml) f_vml, + #include "functions.hpp" + #undef FUNC_BF + }; + #endif Add case handling to the ``check_program`` function -``` -else if (op == OP_FUNC_BDN) { - if (arg < 0 || arg >= FUNC_BD_LAST) { - PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); - return -1; +.. code-block:: cpp + else if (op == OP_FUNC_BDN) { + if (arg < 0 || arg >= FUNC_BD_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } } -} -else if (op == OP_FUNC_BFN) { - if (arg < 0 || arg >= FUNC_BF_LAST) { - PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); - return -1; + else if (op == OP_FUNC_BFN) { + if (arg < 0 || arg >= FUNC_BF_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } } -} -``` * ``numexpr/module.cpp`` Add code here to define the ``FUNC_OA`` macros you require -``` -#define FUNC_BF(name, sname, ...) add_func(name, sname); -#define FUNC_BD(name, sname, ...) add_func(name, sname); -... -#include "functions.hpp" -... -#undef FUNC_BD -#undef FUNC_BF -``` +.. code-block:: cpp + #define FUNC_BF(name, sname, ...) add_func(name, sname); + #define FUNC_BD(name, sname, ...) add_func(name, sname); + ... + #include "functions.hpp" + ... + #undef FUNC_BD + #undef FUNC_BF * ``numexpr/opcodes.hpp`` Finally, add the ``OP_FUNC_BDN`` etc. codes here. It is necessary for the OPCODES in the file to be in (ascending order) with ``NOOP`` as 0 and ``OP_LAST`` as the largest number. Secondly, all reduction OPCODES must appear last. Hence, after adding your function signatures (just before the reduction OPCODES) it is necessary to increment all succeeding OPCODES. -``` -OPCODE(106, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) -OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) -``` +.. code-block:: cpp + OPCODE(106, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) + OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) Notes ----- From a7161c4ea42df0eda47f9dc64b91b119f2a940cc Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 29 Aug 2025 10:04:59 +0200 Subject: [PATCH 070/166] More fixes to ADDFUNCS.rst formatting --- ADDFUNCS.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index 45e63cd..bb651b4 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -14,7 +14,9 @@ In the future it might be nice to refactor this function since it sets the outpu * ``numexpr/necompiler.py`` Add ``"myfunc"`` to the list of functions: + .. code-block:: python3 + "floor", "isnan", "isfinite", @@ -29,8 +31,10 @@ Most likely, you will want to add support for several function signatures (e.g. function in two clauses. If your function has a float input, you will see that there are 5 arguments in the ``FUNC_OA`` macro, and you will have to add ``myfunc2`` here is order to compile on MSVC machines (i.e. Windows, see following). Example: + .. code-block:: cpp :emphasize-lines: 6, 20 + #ifndef FUNC_DD #define ELIDE_FUNC_DD #define FUNC_DD(...) @@ -61,7 +65,9 @@ Example: In order to support float arguments, due to oddities of MSVC, you have to provide explicit support for your function in this file. Add ``#define myfuncf(x) ((float)floor((double)(x)))`` (if your function is float -> float) to the ``#if`` clause at the top of the file which is for old versions of MSVC which did not have support for single precision fucntions. Then in the body, add an inline function + .. code-block:: cpp + inline float myfuncf2(float x) { return myfuncf(x); } @@ -80,7 +86,9 @@ that you may have to make (see Notes, below) * ``numexpr/functions.hpp`` Firstly, add clause(s) for your function signature. For example, if the function signature is ``bool(double)`` and ``bool(float)``, add ``FUNC_BD`` and ``FUNC_BF`` clauses (in the latter case you will need the macro to take 5 arguments for MSVC-compatibility.) + .. code-block:: cpp + #ifndef FUNC_BD #define ELIDE_FUNC_BD #define FUNC_BD(...) @@ -109,7 +117,9 @@ The ultimate source of the functions in the macro ``FUNC_BF(...)`` are the heade * ``numexpr/interp_body.cpp`` Add case support for OPCODES associated to your new function signatures via e.g. ``case OP_FUNC_BFN`` and ``case OP_FUNC_BDN``, following the framework suggested by the other functions: + .. code-block:: cpp + case OP_FUNC_BFN: #ifdef USE_VML VEC_ARG1_VML(functions_bf_vml[arg2](BLOCK_SIZE, @@ -122,7 +132,9 @@ Note that it is important that the out variable matches the output type of the f * ``numexpr/interpreter.hpp`` Add clauses to read the ``functions.hpp`` macros correctly + .. code-block:: cpp + enum FuncBFCodes { #define FUNC_BF(fop, ...) fop, #include "functions.hpp" @@ -132,7 +144,9 @@ Add clauses to read the ``functions.hpp`` macros correctly * ``numexpr/interpreter.cpp`` Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making sure to include clauses for ``_WIN32`` and ``VML`` as necessary accoridng to the framework suggested by the other functions. + .. code-block:: cpp + typedef bool (*FuncBFPtr)(float); #ifdef _WIN32 FuncBFPtr functions_bf[] = { @@ -158,7 +172,9 @@ Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making #endif Add case handling to the ``check_program`` function + .. code-block:: cpp + else if (op == OP_FUNC_BDN) { if (arg < 0 || arg >= FUNC_BD_LAST) { PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); @@ -174,7 +190,9 @@ Add case handling to the ``check_program`` function * ``numexpr/module.cpp`` Add code here to define the ``FUNC_OA`` macros you require + .. code-block:: cpp + #define FUNC_BF(name, sname, ...) add_func(name, sname); #define FUNC_BD(name, sname, ...) add_func(name, sname); ... @@ -187,7 +205,9 @@ Add code here to define the ``FUNC_OA`` macros you require Finally, add the ``OP_FUNC_BDN`` etc. codes here. It is necessary for the OPCODES in the file to be in (ascending order) with ``NOOP`` as 0 and ``OP_LAST`` as the largest number. Secondly, all reduction OPCODES must appear last. Hence, after adding your function signatures (just before the reduction OPCODES) it is necessary to increment all succeeding OPCODES. + .. code-block:: cpp + OPCODE(106, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) From e7d0787693587b9ac480e6883b878ff3eb1fa413 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 29 Aug 2025 10:06:09 +0200 Subject: [PATCH 071/166] Final fix to ADDFUNCS.rst formatting --- ADDFUNCS.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index bb651b4..8cce037 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -215,11 +215,14 @@ Notes ----- In many cases this process will not be very smooth since one relies on the internal C/C++ standard functions (which can be fussy, to varying degrees on different platforms). Some common gotchas are then: + * OPCODES are currently only supported up to 255 - if it becomes necessary to increment further, one will have to change the ``latin_1`` encoding used in ``quadrupleToString`` in ``necompiler.py``. In addition, since the OPCDE table is assumed to be of type ``unsigned char`` the ``get_return_sig`` function in ``numexpr/interpreter.cpp`` may have to be changed (possibly other changes too). + * Depending on the new function signature (above all if the out type is different to the input types), one may have to edit the ``__init__`` function in the ``FuncNode`` class in ``expressions.py``. + * Depending on MSVC support, namespace clashes, casting problems, it may be necessary to make various changes to ``numexpr/numexpr_config.hpp`` and ``numexpr/msvc_function_stubs.hpp``. For example, in PR #523, non-clashing wrappers were introduced for ``isnan`` and ``isfinite`` since the float versions ``isnanf, isfinitef`` were inconsistently defined (and output ints) - depending on how strict the platform interpreter is, the implicit cast From 0026b68638fa2f79788c20049e4530cc3fa1b724 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 29 Aug 2025 10:07:51 +0200 Subject: [PATCH 072/166] Final fix 2 to ADDFUNCS.rst formatting --- ADDFUNCS.rst | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index 8cce037..afd348b 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -213,17 +213,10 @@ function signatures (just before the reduction OPCODES) it is necessary to incre Notes ----- -In many cases this process will not be very smooth since one relies on the internal C/C++ standard functions (which can be fussy, -to varying degrees on different platforms). Some common gotchas are then: +In many cases this process will not be very smooth since one relies on the internal C/C++ standard functions (which can be fussy, to varying degrees on different platforms). Some common gotchas are then: -* OPCODES are currently only supported up to 255 - if it becomes necessary to increment further, one will have to change the ``latin_1`` -encoding used in ``quadrupleToString`` in ``necompiler.py``. In addition, since the OPCDE table is assumed to be of type ``unsigned char`` -the ``get_return_sig`` function in ``numexpr/interpreter.cpp`` may have to be changed (possibly other changes too). +* OPCODES are currently only supported up to 255 - if it becomes necessary to increment further, one will have to change the ``latin_1`` encoding used in ``quadrupleToString`` in ``necompiler.py``. In addition, since the OPCDE table is assumed to be of type ``unsigned char`` the ``get_return_sig`` function in ``numexpr/interpreter.cpp`` may have to be changed (possibly other changes too). -* Depending on the new function signature (above all if the out type is different to the input types), one may have to edit the ``__init__`` -function in the ``FuncNode`` class in ``expressions.py``. +* Depending on the new function signature (above all if the out type is different to the input types), one may have to edit the ``__init__`` function in the ``FuncNode`` class in ``expressions.py``. -* Depending on MSVC support, namespace clashes, casting problems, it may be necessary to make various changes to ``numexpr/numexpr_config.hpp`` -and ``numexpr/msvc_function_stubs.hpp``. For example, in PR #523, non-clashing wrappers were introduced for ``isnan`` and ``isfinite`` since -the float versions ``isnanf, isfinitef`` were inconsistently defined (and output ints) - depending on how strict the platform interpreter is, the implicit cast -from int to bool was acceptable or not for example. In addition, the base functions were in different namespaces or had different names across platforms. +* Depending on MSVC support, namespace clashes, casting problems, it may be necessary to make various changes to ``numexpr/numexpr_config.hpp`` and ``numexpr/msvc_function_stubs.hpp``. For example, in PR #523, non-clashing wrappers were introduced for ``isnan`` and ``isfinite`` since the float versions ``isnanf, isfinitef`` were inconsistently defined (and output ints) - depending on how strict the platform interpreter is, the implicit cast from int to bool was acceptable or not for example. In addition, the base functions were in different namespaces or had different names across platforms. From 9e901d43eea7cc354f0662f7c68a0152ebb753d3 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Fri, 29 Aug 2025 13:58:41 +0200 Subject: [PATCH 073/166] Change default casting from safe to same_kind --- numexpr/necompiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 036008b..15b4571 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -906,7 +906,7 @@ def evaluate(ex: str, global_dict: Optional[Dict] = None, out: numpy.ndarray = None, order: str = 'K', - casting: str = 'safe', + casting: str = 'same_kind', sanitize: Optional[bool] = None, _frame_depth: int = 3, **kwargs) -> numpy.ndarray: From 80b133618090b0c0014ba23b11eaa573876b859e Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 9 Sep 2025 12:44:02 +0200 Subject: [PATCH 074/166] Add isinf function --- numexpr/expressions.py | 3 ++- numexpr/functions.hpp | 2 ++ numexpr/msvc_function_stubs.hpp | 8 +++++++- numexpr/necompiler.py | 3 ++- numexpr/numexpr_config.hpp | 2 ++ numexpr/tests/test_numexpr.py | 2 ++ 6 files changed, 17 insertions(+), 3 deletions(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index 9540fe4..db5490d 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -368,6 +368,7 @@ def multiply(x, y): 'isnan': func(numpy.isnan, 'bool'), 'isfinite': func(numpy.isfinite, 'bool'), + 'isinf': func(numpy.isinf, 'bool'), 'sum': gen_reduce_axis_func('sum'), 'prod': gen_reduce_axis_func('prod'), @@ -524,6 +525,6 @@ class FuncNode(OpNode): def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) - if opcode in ("isnan", "isfinite"): # bodge for boolean return functions + if opcode in ("isnan", "isfinite", "isinf"): # bodge for boolean return functions kind = 'bool' OpNode.__init__(self, opcode, args, kind) diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index fa766d5..a3344b6 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -93,6 +93,7 @@ FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #endif FUNC_BD(FUNC_ISNAN_BD, "isnan_bd", isnand, vdIsnan) FUNC_BD(FUNC_ISFINITE_BD, "isfinite_bd", isfinited, vdIsfinite) +FUNC_BD(FUNC_ISINF_BD, "isinf_bd", isinfd, vdIsinf) FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_BD #undef ELIDE_FUNC_BD @@ -106,6 +107,7 @@ FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) #endif // use wrappers as there is name collision with isnanf in std FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_, isnanf2, vfIsnan) FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_, isfinitef2, vfIsfinite) +FUNC_BF(FUNC_ISINF_BF, "isinf_bf", isinff_, isinff2, vfIsinf) FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_BF #undef ELIDE_FUNC_BF diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 1b95cb4..6b77736 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -54,7 +54,8 @@ inline bool isfinitef_(float x) { return !!::_finite(x); } // MSVC has _finite inline bool isnanf_(float x) { return !!::_isnan(x); } // MSVC has _isnan inline bool isfinited(double x) { return !!::_finite(x); } inline bool isnand(double x) { return !!::_isnan(x); } - +inline bool isinfd(double x) { return !!::_isinf(x); } +inline bool isinff_(float x) { return !!::_isinf(x); } /* Now the actual stubs */ @@ -151,6 +152,11 @@ inline bool isfinitef2(float x) { return isfinitef_(x); } +inline bool isinff2(float x) { + return isinff_(x); +} + + // Needed for allowing the internal casting in numexpr machinery for // conjugate operations inline float fconjf2(float x) { diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 15b4571..aea9dfc 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -71,7 +71,8 @@ "ceil", "floor", "isnan", - "isfinite" + "isfinite", + "isinf", ] diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 3269a06..abb5539 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -55,6 +55,8 @@ inline bool isfinitef_(float x) { return !!::isfinite(x); } inline bool isnanf_(float x) { return !!::isnan(x); } inline bool isfinited(double x) { return !!::isfinite(x); } inline bool isnand(double x) { return !!::isnan(x); } +inline bool isinff_(float x) { return !!::isinf(x); } +inline bool isinfd(double x) { return !!::isinf(x); } #endif #endif // NUMEXPR_CONFIG_HPP diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index df294ea..10ce851 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -703,10 +703,12 @@ def test_bool_funcs(self): assert np.all(evaluate("isnan(a)") == np.isnan(a)) assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) + assert np.all(evaluate("isinf(a)") == np.isinf(a)) a = a.astype(np.float64) assert a.dtype == np.float64 assert np.all(evaluate("isnan(a)") == np.isnan(a)) assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) + assert np.all(evaluate("isinf(a)") == np.isinf(a)) if 'sparc' not in platform.machine(): # Execution order set here so as to not use too many threads From a19783c14f383b55461fb28421a85f95d609dc75 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 9 Sep 2025 13:04:24 +0200 Subject: [PATCH 075/166] Fixes for windows --- numexpr/msvc_function_stubs.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 6b77736..f6aaf88 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -54,8 +54,9 @@ inline bool isfinitef_(float x) { return !!::_finite(x); } // MSVC has _finite inline bool isnanf_(float x) { return !!::_isnan(x); } // MSVC has _isnan inline bool isfinited(double x) { return !!::_finite(x); } inline bool isnand(double x) { return !!::_isnan(x); } -inline bool isinfd(double x) { return !!::_isinf(x); } -inline bool isinff_(float x) { return !!::_isinf(x); } + +inline bool isinfd(double x) { return !!::isinf(x)(x); } +inline bool isinff_(float x) { return !!::isinf(x)(x); } /* Now the actual stubs */ From d8a42ff1c8ab507c0690b31152ddb457cd9aa268 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 9 Sep 2025 13:10:48 +0200 Subject: [PATCH 076/166] Fixes for windows --- numexpr/msvc_function_stubs.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index f6aaf88..27ea54b 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -54,9 +54,8 @@ inline bool isfinitef_(float x) { return !!::_finite(x); } // MSVC has _finite inline bool isnanf_(float x) { return !!::_isnan(x); } // MSVC has _isnan inline bool isfinited(double x) { return !!::_finite(x); } inline bool isnand(double x) { return !!::_isnan(x); } - -inline bool isinfd(double x) { return !!::isinf(x)(x); } -inline bool isinff_(float x) { return !!::isinf(x)(x); } +inline bool isinfd(double x) { return !!::isinf(x); } +inline bool isinff_(float x) { return !!::isinf(x); } /* Now the actual stubs */ From c7f6e81b1277611e027ac3e68bcc1b97f0d12f19 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 10 Sep 2025 18:18:34 +0200 Subject: [PATCH 077/166] Getting ready for release 2.12.0 --- ANNOUNCE.rst | 26 +++++++++++--------------- RELEASE_NOTES.rst | 13 ++++++++++--- VERSION | 2 +- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index e3d7285..885b2b3 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,33 +1,29 @@ ========================= -Announcing NumExpr 2.11.0 +Announcing NumExpr 2.12.0 ========================= Hi everyone, -NumExpr 2.11.0 Initial support for free-threaded Python 3.13t has been added. -This is still experimental, so please report any issues you find. -Finally, Python 3.10 is now the minimum supported version. +NumExpr 2.12.0 comes with new isnan/isfinite/isinf functions. +Most importantly, we have added instructions for adding new functions +to the virtual machine. See ADDFUNCS.rst for more details. Thanks to +Luke Shaw for these contributions. Project documentation is available at: http://numexpr.readthedocs.io/ -Changes from 2.10.2 to 2.11.0 +Changes from 2.11.0 to 2.12.0 ----------------------------- -* Initial support for free-threaded Python 3.13t has been added. - This is still experimental, so please report any issues you find. - For more info, see discussions PRs #504, #505 and #508. - Thanks to @andfoy, @rgommers and @FrancescAlted for the work. +* Added isnan/isfinite/isinf functions. Thanks to Luke Shaw. -* Fix imaginary evaluation in the form of `1.1e1j`. This was - previously not supported and would raise an error. Thanks to @27rabbitlt - for the fix. +* New instructions for adding new functions to the virtual machine. + They are available at ADDFUNCS.rst. Thanks to Luke Shaw. -* The test suite has been modernized to use `pytest` instead of `unittest`. - This should make it easier to run the tests and contribute to the project. +* We are distributing binary wheels for Python 3.14 and 3.14t now. -* Python 3.10 is now the minimum supported version. +* We are distributing musllinux wheels too! Thanks to Clément Robert. What's Numexpr? --------------- diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 9b133d9..1b75a06 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -1,11 +1,18 @@ ===================================== -Release notes for NumExpr 2.11 series +Release notes for NumExpr 2.12 series ===================================== -Changes from 2.11.0 to 2.11.1 +Changes from 2.11.0 to 2.12.0 ----------------------------- -* **Under development.** +* Added isnan/isfinite/isinf functions. Thanks to Luke Shaw. + +* New instructions for adding new functions to the virtual machine. + They are available at ADDFUNCS.rst. Thanks to Luke Shaw. + +* We are distributing binary wheels for Python 3.14 and 3.14t now. + +* We are distributing musllinux wheels too! Thanks to Clément Robert. Changes from 2.10.2 to 2.11.0 diff --git a/VERSION b/VERSION index 2cc9678..d8b6989 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.11.1.dev0 +2.12.0 From 8dd6ab3d15cabdc6a1b64f0d191942bbd4f38e8d Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 10 Sep 2025 19:03:27 +0200 Subject: [PATCH 078/166] Post 2.12.0 release actions done --- README.rst | 2 +- RELEASE_NOTES.rst | 5 +++++ VERSION | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 264fd2b..98069ce 100644 --- a/README.rst +++ b/README.rst @@ -106,7 +106,7 @@ See `requirements.txt` for the required version of NumPy. NumExpr is built in the standard Python way:: - python setup.py build install + pip install [-e] . You can test `numexpr` with:: diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 1b75a06..3f2da48 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,11 @@ Release notes for NumExpr 2.12 series ===================================== +Changes from 2.12.0 to 2.12.1 +----------------------------- + +* **Under development.** + Changes from 2.11.0 to 2.12.0 ----------------------------- diff --git a/VERSION b/VERSION index d8b6989..a791a25 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.12.0 +2.12.1.dev0 From 11b19519658b094ab61ce83bc52df4b75b5e96de Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 11 Sep 2025 11:03:23 +0200 Subject: [PATCH 079/166] Update instructions for compiling with MKL --- site.cfg.example | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/site.cfg.example b/site.cfg.example index c8e2dfb..1c0de0b 100644 --- a/site.cfg.example +++ b/site.cfg.example @@ -2,10 +2,12 @@ # file to "site.cfg" and edit the paths according to your installation of the # Intel MKL. -# Example for Intel(R) MKL 2018 on Linux +# Example for Intel(R) OneAPI MKL 2025 on Linux +# When compiling (with e.g. `pip install -e. -v`), first do a: +# $ source /opt/intel/oneapi/setvars.sh >/dev/null 2>&1 || true # [mkl] -# library_dirs = /opt/intel/compilers_and_libraries_2018/linux/mkl/lib/intel64 -# include_dirs = /opt/intel/compilers_and_libraries_2018/linux/mkl/include +# include_dirs = /opt/intel/oneapi/mkl/latest/include +# library_dirs = /opt/intel/oneapi/mkl/latest/lib/intel64 # libraries = mkl_rt From 9c328137a5a991c6d1c5e93d03b6cd4d377cf83b Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 11 Sep 2025 11:03:47 +0200 Subject: [PATCH 080/166] Added complex->bool funcs and try to fix vml for boolean output funcs --- numexpr/complex_functions.hpp | 29 +++++++++++ numexpr/functions.hpp | 14 ++++++ numexpr/interp_body.cpp | 11 +++++ numexpr/interpreter.cpp | 91 ++++++++++++++++++++++++++++++++++- numexpr/interpreter.hpp | 6 +++ numexpr/module.cpp | 2 + numexpr/opcodes.hpp | 53 ++++++++++---------- numexpr/tests/test_numexpr.py | 5 ++ 8 files changed, 184 insertions(+), 27 deletions(-) diff --git a/numexpr/complex_functions.hpp b/numexpr/complex_functions.hpp index ae89167..fd92068 100644 --- a/numexpr/complex_functions.hpp +++ b/numexpr/complex_functions.hpp @@ -424,4 +424,33 @@ nc_abs(std::complex *x, std::complex *r) r->imag(0); } +static bool +nc_isinf(std::complex *x) +{ + double xr=x->real(), xi=x->imag(); + bool bi,br; + bi = isinfd(xi); + br = isinfd(xr); + return bi || br; +} + +static bool +nc_isnan(std::complex *x) +{ + double xr=x->real(), xi=x->imag(); + bool bi,br; + bi = isnand(xi); + br = isnand(xr); + return bi || br; +} + +static bool +nc_isfinite(std::complex *x) +{ + double xr=x->real(), xi=x->imag(); + bool bi,br; + bi = isfinited(xi); + br = isfinited(xr); + return bi && br; +} #endif // NUMEXPR_COMPLEX_FUNCTIONS_HPP diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index a3344b6..17310d6 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -166,3 +166,17 @@ FUNC_CCC(FUNC_CCC_LAST, NULL, NULL) #undef ELIDE_FUNC_CCC #undef FUNC_CCC #endif + +// complex -> boolean functions +#ifndef FUNC_BC +#define ELIDE_FUNC_BC +#define FUNC_BC(...) +#endif // use wrappers as there is name collision with isnanf in std +FUNC_BC(FUNC_ISNAN_BC, "isnan_bc", nc_isnan, vzIsnan) +FUNC_BC(FUNC_ISFINITE_BC, "isfinite_bc", nc_isfinite, vzIsfinite) +FUNC_BC(FUNC_ISINF_BC, "isinf_bc", nc_isinf, vzIsinf) +FUNC_BC(FUNC_BC_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_BC +#undef ELIDE_FUNC_BC +#undef FUNC_BC +#endif diff --git a/numexpr/interp_body.cpp b/numexpr/interp_body.cpp index 297d48d..b6b7da2 100644 --- a/numexpr/interp_body.cpp +++ b/numexpr/interp_body.cpp @@ -469,6 +469,17 @@ VEC_ARG1(b_dest = functions_bd[arg2](d1)); #endif + case OP_FUNC_BCN: +#ifdef USE_VML + VEC_ARG1_VML(functions_bc_vml[arg2](BLOCK_SIZE, + (const MKL_Complex16*)x1, (bool*)dest)); +#else + VEC_ARG1(ca.real(c1r); + ca.imag(c1i); + b_dest = functions_bc[arg2](&ca)); +#endif + + /* Reductions */ case OP_SUM_IIN: VEC_ARG1(i_reduce += i1); case OP_SUM_LLN: VEC_ARG1(l_reduce += l1); diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index d3c811b..bebb82f 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -220,6 +220,31 @@ FuncBFPtr functions_bf[] = { }; #endif +#ifdef USE_VML +/* no isnan, isfinite or isinf in VML */ +static void vfIsfinite(MKL_INT n, const float* x1, bool* dest) +{ + MKL_INT j; + for (j=0; j*); +FuncBCPtr functions_bc[] = { +#define FUNC_BC(fop, s, f, ...) f, +#include "functions.hpp" +#undef FUNC_BC +}; + +#ifdef USE_VML +/* no isnan, isfinite or isinf in VML */ +static void vzIsfinite(MKL_INT n, const MKL_Complex16* x1, bool* dest) +{ + MKL_INT j; + for (j=0; j= OP_REDUCTION) { + } + else if (op == OP_FUNC_BCN) { + if (arg < 0 || arg >= FUNC_BC_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op >= OP_REDUCTION) { ; } else { PyErr_Format(PyExc_RuntimeError, "invalid program: internal checker error processing %i", argloc); diff --git a/numexpr/interpreter.hpp b/numexpr/interpreter.hpp index 6a3448a..55f210c 100644 --- a/numexpr/interpreter.hpp +++ b/numexpr/interpreter.hpp @@ -42,6 +42,12 @@ enum FuncBDCodes { #undef FUNC_BD }; +enum FuncBCCodes { +#define FUNC_BC(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_BC +}; + enum FuncDDDCodes { #define FUNC_DDD(fop, ...) fop, #include "functions.hpp" diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 95f810c..649aa17 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -508,6 +508,7 @@ PyInit_interpreter(void) { #define FUNC_DD(name, sname, ...) add_func(name, sname); #define FUNC_BF(name, sname, ...) add_func(name, sname); #define FUNC_BD(name, sname, ...) add_func(name, sname); +#define FUNC_BC(name, sname, ...) add_func(name, sname); #define FUNC_DDD(name, sname, ...) add_func(name, sname); #define FUNC_CC(name, sname, ...) add_func(name, sname); #define FUNC_CCC(name, sname, ...) add_func(name, sname); @@ -515,6 +516,7 @@ PyInit_interpreter(void) { #undef FUNC_CCC #undef FUNC_CC #undef FUNC_DDD +#undef FUNC_BC #undef FUNC_BD #undef FUNC_BF #undef FUNC_DD diff --git a/numexpr/opcodes.hpp b/numexpr/opcodes.hpp index bfd8487..935753e 100644 --- a/numexpr/opcodes.hpp +++ b/numexpr/opcodes.hpp @@ -150,38 +150,39 @@ OPCODE(105, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) //Boolean outputs OPCODE(106, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) +OPCODE(108, OP_FUNC_BCN, "func_bcn", Tb, Tc, Tn, T0) // Reductions always have to be at the end - parts of the code // use > OP_REDUCTION to decide whether operation is a reduction -OPCODE(108, OP_REDUCTION, NULL, T0, T0, T0, T0) +OPCODE(109, OP_REDUCTION, NULL, T0, T0, T0, T0) /* Last argument in a reduction is the axis of the array the reduction should be applied along. */ -OPCODE(109, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) -OPCODE(110, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) -OPCODE(111, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) -OPCODE(112, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) -OPCODE(113, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) - -OPCODE(114, OP_PROD, NULL, T0, T0, T0, T0) -OPCODE(115, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) -OPCODE(116, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) -OPCODE(117, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) -OPCODE(118, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) -OPCODE(119, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) - -OPCODE(120, OP_MIN, NULL, T0, T0, T0, T0) -OPCODE(121, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) -OPCODE(122, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) -OPCODE(123, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) -OPCODE(124, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) - -OPCODE(125, OP_MAX, NULL, T0, T0, T0, T0) -OPCODE(126, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) -OPCODE(127, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) -OPCODE(128, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) -OPCODE(129, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) +OPCODE(110, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) +OPCODE(111, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) +OPCODE(112, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) +OPCODE(113, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) +OPCODE(114, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) + +OPCODE(115, OP_PROD, NULL, T0, T0, T0, T0) +OPCODE(116, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) +OPCODE(117, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) +OPCODE(118, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) +OPCODE(119, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) +OPCODE(120, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) + +OPCODE(121, OP_MIN, NULL, T0, T0, T0, T0) +OPCODE(122, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) +OPCODE(123, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) +OPCODE(124, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) +OPCODE(125, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) + +OPCODE(126, OP_MAX, NULL, T0, T0, T0, T0) +OPCODE(127, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) +OPCODE(128, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) +OPCODE(129, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) +OPCODE(130, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) /* When we get to 255, will maybe have to change code again @@ -189,4 +190,4 @@ When we get to 255, will maybe have to change code again other than unsigned char for OPCODE table) */ /* Should be the last opcode */ -OPCODE(130, OP_END, NULL, T0, T0, T0, T0) +OPCODE(131, OP_END, NULL, T0, T0, T0, T0) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 10ce851..512f2ea 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -709,6 +709,11 @@ def test_bool_funcs(self): assert np.all(evaluate("isnan(a)") == np.isnan(a)) assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) assert np.all(evaluate("isinf(a)") == np.isinf(a)) + a = a.astype(np.complex128) + assert a.dtype == np.complex128 + assert np.all(evaluate("isnan(a)") == np.isnan(a)) + assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) + assert np.all(evaluate("isinf(a)") == np.isinf(a)) if 'sparc' not in platform.machine(): # Execution order set here so as to not use too many threads From 91526e17ff7fc475ea9d47b3b8e8196b069cb0e0 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 11 Sep 2025 11:15:27 +0200 Subject: [PATCH 081/166] Now compiles on MKL --- numexpr/interpreter.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index bebb82f..b724295 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -327,6 +327,15 @@ static void vzIsnan(MKL_INT n, const MKL_Complex16* x1, bool* dest) }; #endif +#ifdef USE_VML +typedef void (*FuncBCPtr_vml)(MKL_INT, const MKL_Complex16[], bool*); +FuncBCPtr_vml functions_bc_vml[] = { +#define FUNC_BC(fop, s, f, f_vml) f_vml, +#include "functions.hpp" +#undef FUNC_BC +}; +#endif + #ifdef USE_VML /* Fake vdConj function just for casting purposes inside numexpr */ static void vdConj(MKL_INT n, const double* x1, double* dest) From e2943dd29a3e28705267b0167464d55954785418 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 11 Sep 2025 11:32:49 +0200 Subject: [PATCH 082/166] Update ADDFUNCS documentation --- ADDFUNCS.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index afd348b..2a8d17f 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -171,6 +171,23 @@ Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making }; #endif +Some functions (e.g. ``fmod``, ``isnan``) are not available in MKL, and so must be hard-coded here as well: + +.. code-block:: cpp + + #ifdef USE_VML + /* no isnan, isfinite or isinf in VML */ + static void vdIsfinite(MKL_INT n, const double* x1, bool* dest) + { + MKL_INT j; + for (j=0; j Date: Thu, 11 Sep 2025 11:40:59 +0200 Subject: [PATCH 083/166] Update user guide to new version --- doc/user_guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/user_guide.rst b/doc/user_guide.rst index ce2ff9d..39bbecb 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -1,4 +1,4 @@ -NumExpr 2.8 User Guide +NumExpr 2.12 User Guide ====================== The NumExpr package supplies routines for the fast evaluation of From b93a4bbfef7e3e74db1ceed5de186545aa8144f6 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Thu, 11 Sep 2025 11:41:10 +0200 Subject: [PATCH 084/166] Update user guide to new version --- doc/user_guide.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 39bbecb..ade4e46 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -201,6 +201,8 @@ The next are the current supported set: * :code:`where(bool, number1, number2): number` -- number1 if the bool condition is true, number2 otherwise. + * :code:`{isinf, isnan, isfinite}(float|complex): bool` -- returns element-wise True + for ``inf`` or ``NaN``, ``NaN``, not ``inf`` respectively. * :code:`{sin,cos,tan}(float|complex): float|complex` -- trigonometric sine, cosine or tangent. * :code:`{arcsin,arccos,arctan}(float|complex): float|complex` -- trigonometric From d57d82c4593185d1d1821c1fd8cc960c1e456317 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 11 Sep 2025 12:25:57 +0200 Subject: [PATCH 085/166] Modernize and make MKL benchmark more demanding --- bench/vml_timing2.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bench/vml_timing2.py b/bench/vml_timing2.py index 1c460d0..4491162 100644 --- a/bench/vml_timing2.py +++ b/bench/vml_timing2.py @@ -13,7 +13,7 @@ import numexpr as ne -N = int(2**26) +N = int(2**28) x = np.linspace(0, 1, N) y = np.linspace(0, 1, N) @@ -31,17 +31,17 @@ print("Time for an algebraic expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) t0 = time() -z = np.sin(x)**2 + np.cos(y)**2 +z = np.sin(x)**3.2 + np.cos(y)**3.2 t1 = time() gbs = working_set_GB / (t1-t0) print("Time for a transcendental expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) if ne.use_vml: ne.set_vml_num_threads(1) - ne.set_num_threads(8) + ne.set_num_threads(16) print("NumExpr version: %s, Using MKL ver. %s, Num threads: %s" % (ne.__version__, ne.get_vml_version(), ne.nthreads)) else: - ne.set_num_threads(8) + ne.set_num_threads(16) print("NumExpr version: %s, Not Using MKL, Num threads: %s" % (ne.__version__, ne.nthreads)) t0 = time() @@ -51,7 +51,7 @@ print("Time for an algebraic expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) t0 = time() -ne.evaluate('sin(x)**2 + cos(y)**2', out = z) +ne.evaluate('sin(x)**3.2 + cos(y)**3.2', out = z) t1 = time() gbs = working_set_GB / (t1-t0) print("Time for a transcendental expression: %.3f s / %.3f GB/s" % (t1-t0, gbs)) From 6002a823fe267c51b312f9955ea89d3bc31108ea Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 11 Sep 2025 12:36:42 +0200 Subject: [PATCH 086/166] Getting ready for release 2.12.1 --- ANNOUNCE.rst | 22 ++++++++++------------ RELEASE_NOTES.rst | 10 +++++++++- VERSION | 2 +- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index 885b2b3..03fc73f 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,29 +1,27 @@ ========================= -Announcing NumExpr 2.12.0 +Announcing NumExpr 2.12.1 ========================= Hi everyone, -NumExpr 2.12.0 comes with new isnan/isfinite/isinf functions. -Most importantly, we have added instructions for adding new functions -to the virtual machine. See ADDFUNCS.rst for more details. Thanks to -Luke Shaw for these contributions. +NumExpr 2.12.1 allows isnan/isfinite/isinf functions to be used with complex. +Also, OneAPI MKL has been fixed. Thanks to Luke Shaw for these contributions. Project documentation is available at: http://numexpr.readthedocs.io/ -Changes from 2.11.0 to 2.12.0 +Changes from 2.12.0 to 2.12.1 ----------------------------- -* Added isnan/isfinite/isinf functions. Thanks to Luke Shaw. +* Added complex counterparts for isnan/isfinite/isinf functions. + Thanks to Luke Shaw. -* New instructions for adding new functions to the virtual machine. - They are available at ADDFUNCS.rst. Thanks to Luke Shaw. +* Updated documentation for the new functions and instructions + for adding new functions to the virtual machine. Thanks to Luke Shaw. -* We are distributing binary wheels for Python 3.14 and 3.14t now. - -* We are distributing musllinux wheels too! Thanks to Clément Robert. +* Fixed MKL support; it was broken in 2.12.0. Thanks to + Christoph Gohlke for reporting the issue. What's Numexpr? --------------- diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 3f2da48..88afe31 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5,7 +5,15 @@ Release notes for NumExpr 2.12 series Changes from 2.12.0 to 2.12.1 ----------------------------- -* **Under development.** +* Added complex counterparts for isnan/isfinite/isinf functions. + Thanks to Luke Shaw. + +* Updated documentation for the new functions and instructions + for adding new functions to the virtual machine. Thanks to Luke Shaw. + +* Fixed MKL support; it was broken in 2.12.0. Thanks to + Christoph Gohlke for reporting the issue. + Changes from 2.11.0 to 2.12.0 ----------------------------- diff --git a/VERSION b/VERSION index a791a25..3cf561c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.12.1.dev0 +2.12.1 From 6a964ee6e97eff36ad1ed6a3f382adeb0d4d1bc6 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 11 Sep 2025 13:08:46 +0200 Subject: [PATCH 087/166] Post 2.12.1 release actions done --- RELEASE_NOTES.rst | 7 +++++++ VERSION | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 88afe31..fded329 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,13 @@ Release notes for NumExpr 2.12 series ===================================== + +Changes from 2.12.1 to 2.12.2 +----------------------------- + +* **Under development.** + + Changes from 2.12.0 to 2.12.1 ----------------------------- diff --git a/VERSION b/VERSION index 3cf561c..8b62883 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.12.1 +2.12.2.dev0 From e594325b6af26dba1995b8acebfb85953c2815b0 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Fri, 12 Sep 2025 12:32:19 +0200 Subject: [PATCH 088/166] Add Luke to the list of authors --- AUTHORS.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AUTHORS.txt b/AUTHORS.txt index 57410db..592fcd1 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -29,3 +29,6 @@ since 2016 to 2023. Teng Liu fixed many bugs, and in particular, contributed valuable fixes to the new regex sanitizer for expressions. + +Luke Shaw contributed new isnan/isinf/isfinite functions, and expanded +the amount of opcodes from 128 to 256. From 0d7b21eeb572c1969479524c5744754dba2f40b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 12 Sep 2025 14:25:40 +0200 Subject: [PATCH 089/166] Close comment to avoid compiler warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This appears over and over in the build log: In file included from numexpr/module.hpp:17, from numexpr/module.cpp:12: numexpr/numexpr_config.hpp:52:1: warning: ‘/*’ within comment [-Wcomment] 52 | /* Due to casting problems (normally return ints not bools, easiest to define --- numexpr/numexpr_config.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index abb5539..6c708cd 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -48,9 +48,9 @@ #include "msvc_function_stubs.hpp" #else /* GCC/Clang version: use std:: (can't use it for windows) -msvc_function_stubs contains windows alternatives + msvc_function_stubs contains windows alternatives */ /* Due to casting problems (normally return ints not bools, easiest to define -non-overloaded wrappers for these functions) */ + non-overloaded wrappers for these functions) */ inline bool isfinitef_(float x) { return !!::isfinite(x); } inline bool isnanf_(float x) { return !!::isnan(x); } inline bool isfinited(double x) { return !!::isfinite(x); } From 52d96786f521c1bf559f8ea34fabf8eff8ccb605 Mon Sep 17 00:00:00 2001 From: Thomas Klausner Date: Mon, 15 Sep 2025 12:37:46 +0200 Subject: [PATCH 090/166] Fix build on NetBSD with gcc 12. Closes #528. --- numexpr/numexpr_config.hpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 6c708cd..98f2701 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -40,6 +40,7 @@ #include "mkl_vml.h" #include "mkl_service.h" #endif +#include #ifdef _WIN32 #ifndef __MINGW32__ @@ -51,12 +52,12 @@ msvc_function_stubs contains windows alternatives */ /* Due to casting problems (normally return ints not bools, easiest to define non-overloaded wrappers for these functions) */ -inline bool isfinitef_(float x) { return !!::isfinite(x); } -inline bool isnanf_(float x) { return !!::isnan(x); } -inline bool isfinited(double x) { return !!::isfinite(x); } -inline bool isnand(double x) { return !!::isnan(x); } -inline bool isinff_(float x) { return !!::isinf(x); } -inline bool isinfd(double x) { return !!::isinf(x); } +inline bool isfinitef_(float x) { return !!std::isfinite(x); } +inline bool isnanf_(float x) { return !!std::isnan(x); } +inline bool isfinited(double x) { return !!std::isfinite(x); } +inline bool isnand(double x) { return !!std::isnan(x); } +inline bool isinff_(float x) { return !!std::isinf(x); } +inline bool isinfd(double x) { return !!std::isinf(x); } #endif #endif // NUMEXPR_CONFIG_HPP From 2b10af01c836b66ad8caa6b3a07244356684c1bd Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Mon, 22 Sep 2025 09:57:13 +0200 Subject: [PATCH 091/166] Use version for forthcoming release --- doc/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 6edbd64..60cf6c5 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -70,9 +70,9 @@ # built documents. # # The short X.Y version. -version = '2.8' +version = '2.13' # The full version, including alpha/beta/rc tags. -release = '2.8.5.dev1' +release = '2.13.dev1' # The language for content autogenerated by Sphinx. Refer to documentation From a39c806f979c638aebd85293d68461f1eda8dc42 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 22 Sep 2025 12:58:13 +0200 Subject: [PATCH 092/166] Add (true) bitwise ops, bitwise and logical xor --- numexpr/expressions.py | 8 +- numexpr/interp_body.cpp | 11 ++ numexpr/opcodes.hpp | 311 ++++++++++++++++++---------------- numexpr/tests/test_numexpr.py | 23 +++ 4 files changed, 199 insertions(+), 154 deletions(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index db5490d..ce724bf 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -451,10 +451,10 @@ def __bool__(self): __rshift__ = binop('rshift') __rrshift__ = binop('rshift', reversed=True) - # boolean operations - - __and__ = binop('and', kind='bool') - __or__ = binop('or', kind='bool') + # bitwise or logical operations + __and__ = binop('and') + __or__ = binop('or') + __xor__ = binop('xor') __gt__ = binop('gt', kind='bool') __ge__ = binop('ge', kind='bool') diff --git a/numexpr/interp_body.cpp b/numexpr/interp_body.cpp index b6b7da2..e207e1a 100644 --- a/numexpr/interp_body.cpp +++ b/numexpr/interp_body.cpp @@ -220,6 +220,7 @@ case OP_INVERT_BB: VEC_ARG1(b_dest = !b1); case OP_AND_BBB: VEC_ARG2(b_dest = (b1 && b2)); case OP_OR_BBB: VEC_ARG2(b_dest = (b1 || b2)); + case OP_XOR_BBB: VEC_ARG2(b_dest = (b1 || b2) && !(b1 && b2) ); case OP_EQ_BBB: VEC_ARG2(b_dest = (b1 == b2)); case OP_NE_BBB: VEC_ARG2(b_dest = (b1 != b2)); @@ -268,6 +269,11 @@ case OP_RSHIFT_III: VEC_ARG2(i_dest = i1 >> i2); case OP_WHERE_IBII: VEC_ARG3(i_dest = b1 ? i2 : i3); + //Bitwise ops + case OP_INVERT_II: VEC_ARG1(i_dest = ~i1); + case OP_AND_III: VEC_ARG2(i_dest = (i1 & i2)); + case OP_OR_III: VEC_ARG2(i_dest = (i1 | i2)); + case OP_XOR_III: VEC_ARG2(i_dest = (i1 ^ i2)); /* Long */ case OP_CAST_LI: VEC_ARG1(l_dest = (long long)(i1)); @@ -288,6 +294,11 @@ case OP_RSHIFT_LLL: VEC_ARG2(l_dest = l1 >> l2); case OP_WHERE_LBLL: VEC_ARG3(l_dest = b1 ? l2 : l3); + //Bitwise ops + case OP_INVERT_LL: VEC_ARG1(l_dest = ~l1); + case OP_AND_LLL: VEC_ARG2(l_dest = (l1 & l2)); + case OP_OR_LLL: VEC_ARG2(l_dest = (l1 | l2)); + case OP_XOR_LLL: VEC_ARG2(l_dest = (l1 ^ l2)); /* Float */ case OP_CAST_FI: VEC_ARG1(f_dest = (float)(i1)); diff --git a/numexpr/opcodes.hpp b/numexpr/opcodes.hpp index 935753e..dafd89c 100644 --- a/numexpr/opcodes.hpp +++ b/numexpr/opcodes.hpp @@ -26,163 +26,174 @@ OPCODE(1, OP_COPY_BB, "copy_bb", Tb, Tb, T0, T0) OPCODE(2, OP_INVERT_BB, "invert_bb", Tb, Tb, T0, T0) OPCODE(3, OP_AND_BBB, "and_bbb", Tb, Tb, Tb, T0) OPCODE(4, OP_OR_BBB, "or_bbb", Tb, Tb, Tb, T0) - -OPCODE(5, OP_EQ_BBB, "eq_bbb", Tb, Tb, Tb, T0) -OPCODE(6, OP_NE_BBB, "ne_bbb", Tb, Tb, Tb, T0) - -OPCODE(7, OP_GT_BII, "gt_bii", Tb, Ti, Ti, T0) -OPCODE(8, OP_GE_BII, "ge_bii", Tb, Ti, Ti, T0) -OPCODE(9, OP_EQ_BII, "eq_bii", Tb, Ti, Ti, T0) -OPCODE(10, OP_NE_BII, "ne_bii", Tb, Ti, Ti, T0) - -OPCODE(11, OP_GT_BLL, "gt_bll", Tb, Tl, Tl, T0) -OPCODE(12, OP_GE_BLL, "ge_bll", Tb, Tl, Tl, T0) -OPCODE(13, OP_EQ_BLL, "eq_bll", Tb, Tl, Tl, T0) -OPCODE(14, OP_NE_BLL, "ne_bll", Tb, Tl, Tl, T0) - -OPCODE(15, OP_GT_BFF, "gt_bff", Tb, Tf, Tf, T0) -OPCODE(16, OP_GE_BFF, "ge_bff", Tb, Tf, Tf, T0) -OPCODE(17, OP_EQ_BFF, "eq_bff", Tb, Tf, Tf, T0) -OPCODE(18, OP_NE_BFF, "ne_bff", Tb, Tf, Tf, T0) - -OPCODE(19, OP_GT_BDD, "gt_bdd", Tb, Td, Td, T0) -OPCODE(20, OP_GE_BDD, "ge_bdd", Tb, Td, Td, T0) -OPCODE(21, OP_EQ_BDD, "eq_bdd", Tb, Td, Td, T0) -OPCODE(22, OP_NE_BDD, "ne_bdd", Tb, Td, Td, T0) - -OPCODE(23, OP_GT_BSS, "gt_bss", Tb, Ts, Ts, T0) -OPCODE(24, OP_GE_BSS, "ge_bss", Tb, Ts, Ts, T0) -OPCODE(25, OP_EQ_BSS, "eq_bss", Tb, Ts, Ts, T0) -OPCODE(26, OP_NE_BSS, "ne_bss", Tb, Ts, Ts, T0) - -OPCODE(27, OP_CAST_IB, "cast_ib", Ti, Tb, T0, T0) -OPCODE(28, OP_COPY_II, "copy_ii", Ti, Ti, T0, T0) -OPCODE(29, OP_ONES_LIKE_II, "ones_like_ii", Ti, T0, T0, T0) -OPCODE(30, OP_NEG_II, "neg_ii", Ti, Ti, T0, T0) -OPCODE(31, OP_ADD_III, "add_iii", Ti, Ti, Ti, T0) -OPCODE(32, OP_SUB_III, "sub_iii", Ti, Ti, Ti, T0) -OPCODE(33, OP_MUL_III, "mul_iii", Ti, Ti, Ti, T0) -OPCODE(34, OP_DIV_III, "div_iii", Ti, Ti, Ti, T0) -OPCODE(35, OP_POW_III, "pow_iii", Ti, Ti, Ti, T0) -OPCODE(36, OP_MOD_III, "mod_iii", Ti, Ti, Ti, T0) - -OPCODE(37, OP_LSHIFT_III, "lshift_iii", Ti, Ti, Ti, T0) -OPCODE(38, OP_RSHIFT_III, "rshift_iii", Ti, Ti, Ti, T0) - -OPCODE(39, OP_WHERE_IBII, "where_ibii", Ti, Tb, Ti, Ti) - -OPCODE(40, OP_CAST_LI, "cast_li", Tl, Ti, T0, T0) -OPCODE(41, OP_COPY_LL, "copy_ll", Tl, Tl, T0, T0) -OPCODE(42, OP_ONES_LIKE_LL, "ones_like_ll", Tl, T0, T0, T0) -OPCODE(43, OP_NEG_LL, "neg_ll", Tl, Tl, T0, T0) -OPCODE(44, OP_ADD_LLL, "add_lll", Tl, Tl, Tl, T0) -OPCODE(45, OP_SUB_LLL, "sub_lll", Tl, Tl, Tl, T0) -OPCODE(46, OP_MUL_LLL, "mul_lll", Tl, Tl, Tl, T0) -OPCODE(47, OP_DIV_LLL, "div_lll", Tl, Tl, Tl, T0) -OPCODE(48, OP_POW_LLL, "pow_lll", Tl, Tl, Tl, T0) -OPCODE(49, OP_MOD_LLL, "mod_lll", Tl, Tl, Tl, T0) - -OPCODE(50, OP_LSHIFT_LLL, "lshift_lll", Tl, Tl, Tl, T0) -OPCODE(51, OP_RSHIFT_LLL, "rshift_lll", Tl, Tl, Tl, T0) - -OPCODE(52, OP_WHERE_LBLL, "where_lbll", Tl, Tb, Tl, Tl) - -OPCODE(53, OP_CAST_FI, "cast_fi", Tf, Ti, T0, T0) -OPCODE(54, OP_CAST_FL, "cast_fl", Tf, Tl, T0, T0) -OPCODE(55, OP_COPY_FF, "copy_ff", Tf, Tf, T0, T0) -OPCODE(56, OP_ONES_LIKE_FF, "ones_like_ff", Tf, T0, T0, T0) -OPCODE(57, OP_NEG_FF, "neg_ff", Tf, Tf, T0, T0) -OPCODE(58, OP_ADD_FFF, "add_fff", Tf, Tf, Tf, T0) -OPCODE(59, OP_SUB_FFF, "sub_fff", Tf, Tf, Tf, T0) -OPCODE(60, OP_MUL_FFF, "mul_fff", Tf, Tf, Tf, T0) -OPCODE(61, OP_DIV_FFF, "div_fff", Tf, Tf, Tf, T0) -OPCODE(62, OP_POW_FFF, "pow_fff", Tf, Tf, Tf, T0) -OPCODE(63, OP_MOD_FFF, "mod_fff", Tf, Tf, Tf, T0) -OPCODE(64, OP_SQRT_FF, "sqrt_ff", Tf, Tf, T0, T0) -OPCODE(65, OP_WHERE_FBFF, "where_fbff", Tf, Tb, Tf, Tf) -OPCODE(66, OP_FUNC_FFN, "func_ffn", Tf, Tf, Tn, T0) -OPCODE(67, OP_FUNC_FFFN, "func_fffn", Tf, Tf, Tf, Tn) - -OPCODE(68, OP_CAST_DI, "cast_di", Td, Ti, T0, T0) -OPCODE(69, OP_CAST_DL, "cast_dl", Td, Tl, T0, T0) -OPCODE(70, OP_CAST_DF, "cast_df", Td, Tf, T0, T0) -OPCODE(71, OP_COPY_DD, "copy_dd", Td, Td, T0, T0) -OPCODE(72, OP_ONES_LIKE_DD, "ones_like_dd", Td, T0, T0, T0) -OPCODE(73, OP_NEG_DD, "neg_dd", Td, Td, T0, T0) -OPCODE(74, OP_ADD_DDD, "add_ddd", Td, Td, Td, T0) -OPCODE(75, OP_SUB_DDD, "sub_ddd", Td, Td, Td, T0) -OPCODE(76, OP_MUL_DDD, "mul_ddd", Td, Td, Td, T0) -OPCODE(77, OP_DIV_DDD, "div_ddd", Td, Td, Td, T0) -OPCODE(78, OP_POW_DDD, "pow_ddd", Td, Td, Td, T0) -OPCODE(79, OP_MOD_DDD, "mod_ddd", Td, Td, Td, T0) -OPCODE(80, OP_SQRT_DD, "sqrt_dd", Td, Td, T0, T0) -OPCODE(81, OP_WHERE_DBDD, "where_dbdd", Td, Tb, Td, Td) -OPCODE(82, OP_FUNC_DDN, "func_ddn", Td, Td, Tn, T0) -OPCODE(83, OP_FUNC_DDDN, "func_dddn", Td, Td, Td, Tn) - -OPCODE(84, OP_EQ_BCC, "eq_bcc", Tb, Tc, Tc, T0) -OPCODE(85, OP_NE_BCC, "ne_bcc", Tb, Tc, Tc, T0) - -OPCODE(86, OP_CAST_CI, "cast_ci", Tc, Ti, T0, T0) -OPCODE(87, OP_CAST_CL, "cast_cl", Tc, Tl, T0, T0) -OPCODE(88, OP_CAST_CF, "cast_cf", Tc, Tf, T0, T0) -OPCODE(89, OP_CAST_CD, "cast_cd", Tc, Td, T0, T0) -OPCODE(90, OP_ONES_LIKE_CC, "ones_like_cc", Tc, T0, T0, T0) -OPCODE(91, OP_COPY_CC, "copy_cc", Tc, Tc, T0, T0) -OPCODE(92, OP_NEG_CC, "neg_cc", Tc, Tc, T0, T0) -OPCODE(93, OP_ADD_CCC, "add_ccc", Tc, Tc, Tc, T0) -OPCODE(94, OP_SUB_CCC, "sub_ccc", Tc, Tc, Tc, T0) -OPCODE(95, OP_MUL_CCC, "mul_ccc", Tc, Tc, Tc, T0) -OPCODE(96, OP_DIV_CCC, "div_ccc", Tc, Tc, Tc, T0) -OPCODE(97, OP_WHERE_CBCC, "where_cbcc", Tc, Tb, Tc, Tc) -OPCODE(98, OP_FUNC_CCN, "func_ccn", Tc, Tc, Tn, T0) -OPCODE(99, OP_FUNC_CCCN, "func_cccn", Tc, Tc, Tc, Tn) - -OPCODE(100, OP_REAL_DC, "real_dc", Td, Tc, T0, T0) -OPCODE(101, OP_IMAG_DC, "imag_dc", Td, Tc, T0, T0) -OPCODE(102, OP_COMPLEX_CDD, "complex_cdd", Tc, Td, Td, T0) - -OPCODE(103, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) - -OPCODE(104, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) - -OPCODE(105, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) +OPCODE(5, OP_XOR_BBB, "xor_bbb", Tb, Tb, Tb, T0) + +OPCODE(6, OP_EQ_BBB, "eq_bbb", Tb, Tb, Tb, T0) +OPCODE(7, OP_NE_BBB, "ne_bbb", Tb, Tb, Tb, T0) + +OPCODE(8, OP_GT_BII, "gt_bii", Tb, Ti, Ti, T0) +OPCODE(9, OP_GE_BII, "ge_bii", Tb, Ti, Ti, T0) +OPCODE(10, OP_EQ_BII, "eq_bii", Tb, Ti, Ti, T0) +OPCODE(11, OP_NE_BII, "ne_bii", Tb, Ti, Ti, T0) + +OPCODE(12, OP_GT_BLL, "gt_bll", Tb, Tl, Tl, T0) +OPCODE(13, OP_GE_BLL, "ge_bll", Tb, Tl, Tl, T0) +OPCODE(14, OP_EQ_BLL, "eq_bll", Tb, Tl, Tl, T0) +OPCODE(15, OP_NE_BLL, "ne_bll", Tb, Tl, Tl, T0) + +OPCODE(16, OP_GT_BFF, "gt_bff", Tb, Tf, Tf, T0) +OPCODE(17, OP_GE_BFF, "ge_bff", Tb, Tf, Tf, T0) +OPCODE(18, OP_EQ_BFF, "eq_bff", Tb, Tf, Tf, T0) +OPCODE(19, OP_NE_BFF, "ne_bff", Tb, Tf, Tf, T0) + +OPCODE(20, OP_GT_BDD, "gt_bdd", Tb, Td, Td, T0) +OPCODE(21, OP_GE_BDD, "ge_bdd", Tb, Td, Td, T0) +OPCODE(22, OP_EQ_BDD, "eq_bdd", Tb, Td, Td, T0) +OPCODE(23, OP_NE_BDD, "ne_bdd", Tb, Td, Td, T0) + +OPCODE(24, OP_GT_BSS, "gt_bss", Tb, Ts, Ts, T0) +OPCODE(25, OP_GE_BSS, "ge_bss", Tb, Ts, Ts, T0) +OPCODE(26, OP_EQ_BSS, "eq_bss", Tb, Ts, Ts, T0) +OPCODE(27, OP_NE_BSS, "ne_bss", Tb, Ts, Ts, T0) + +OPCODE(28, OP_CAST_IB, "cast_ib", Ti, Tb, T0, T0) +OPCODE(29, OP_COPY_II, "copy_ii", Ti, Ti, T0, T0) +OPCODE(30, OP_ONES_LIKE_II, "ones_like_ii", Ti, T0, T0, T0) +OPCODE(31, OP_NEG_II, "neg_ii", Ti, Ti, T0, T0) +OPCODE(32, OP_ADD_III, "add_iii", Ti, Ti, Ti, T0) +OPCODE(33, OP_SUB_III, "sub_iii", Ti, Ti, Ti, T0) +OPCODE(34, OP_MUL_III, "mul_iii", Ti, Ti, Ti, T0) +OPCODE(35, OP_DIV_III, "div_iii", Ti, Ti, Ti, T0) +OPCODE(36, OP_POW_III, "pow_iii", Ti, Ti, Ti, T0) +OPCODE(37, OP_MOD_III, "mod_iii", Ti, Ti, Ti, T0) + +OPCODE(38, OP_LSHIFT_III, "lshift_iii", Ti, Ti, Ti, T0) +OPCODE(39, OP_RSHIFT_III, "rshift_iii", Ti, Ti, Ti, T0) + +OPCODE(40, OP_WHERE_IBII, "where_ibii", Ti, Tb, Ti, Ti) +// Bitwise ops +OPCODE(41, OP_INVERT_II, "invert_ii", Ti, Ti, T0, T0) +OPCODE(42, OP_AND_III, "and_iii", Ti, Ti, Ti, T0) +OPCODE(43, OP_OR_III, "or_iii", Ti, Ti, Ti, T0) +OPCODE(44, OP_XOR_III, "xor_iii", Ti, Ti, Ti, T0) + +OPCODE(45, OP_CAST_LI, "cast_li", Tl, Ti, T0, T0) +OPCODE(46, OP_COPY_LL, "copy_ll", Tl, Tl, T0, T0) +OPCODE(47, OP_ONES_LIKE_LL, "ones_like_ll", Tl, T0, T0, T0) +OPCODE(48, OP_NEG_LL, "neg_ll", Tl, Tl, T0, T0) +OPCODE(49, OP_ADD_LLL, "add_lll", Tl, Tl, Tl, T0) +OPCODE(50, OP_SUB_LLL, "sub_lll", Tl, Tl, Tl, T0) +OPCODE(51, OP_MUL_LLL, "mul_lll", Tl, Tl, Tl, T0) +OPCODE(52, OP_DIV_LLL, "div_lll", Tl, Tl, Tl, T0) +OPCODE(53, OP_POW_LLL, "pow_lll", Tl, Tl, Tl, T0) +OPCODE(54, OP_MOD_LLL, "mod_lll", Tl, Tl, Tl, T0) + +OPCODE(55, OP_LSHIFT_LLL, "lshift_lll", Tl, Tl, Tl, T0) +OPCODE(56, OP_RSHIFT_LLL, "rshift_lll", Tl, Tl, Tl, T0) + +OPCODE(57, OP_WHERE_LBLL, "where_lbll", Tl, Tb, Tl, Tl) +// Bitwise ops +OPCODE(58, OP_INVERT_LL, "invert_ll", Tl, Tl, T0, T0) +OPCODE(59, OP_AND_LLL, "and_lll", Tl, Tl, Tl, T0) +OPCODE(60, OP_OR_LLL, "or_lll", Tl, Tl, Tl, T0) +OPCODE(61, OP_XOR_LLL, "xor_lll", Tl, Tl, Tl, T0) + +OPCODE(62, OP_CAST_FI, "cast_fi", Tf, Ti, T0, T0) +OPCODE(63, OP_CAST_FL, "cast_fl", Tf, Tl, T0, T0) +OPCODE(64, OP_COPY_FF, "copy_ff", Tf, Tf, T0, T0) +OPCODE(65, OP_ONES_LIKE_FF, "ones_like_ff", Tf, T0, T0, T0) +OPCODE(66, OP_NEG_FF, "neg_ff", Tf, Tf, T0, T0) +OPCODE(67, OP_ADD_FFF, "add_fff", Tf, Tf, Tf, T0) +OPCODE(68, OP_SUB_FFF, "sub_fff", Tf, Tf, Tf, T0) +OPCODE(69, OP_MUL_FFF, "mul_fff", Tf, Tf, Tf, T0) +OPCODE(70, OP_DIV_FFF, "div_fff", Tf, Tf, Tf, T0) +OPCODE(71, OP_POW_FFF, "pow_fff", Tf, Tf, Tf, T0) +OPCODE(72, OP_MOD_FFF, "mod_fff", Tf, Tf, Tf, T0) +OPCODE(73, OP_SQRT_FF, "sqrt_ff", Tf, Tf, T0, T0) +OPCODE(74, OP_WHERE_FBFF, "where_fbff", Tf, Tb, Tf, Tf) +OPCODE(75, OP_FUNC_FFN, "func_ffn", Tf, Tf, Tn, T0) +OPCODE(76, OP_FUNC_FFFN, "func_fffn", Tf, Tf, Tf, Tn) + +OPCODE(77, OP_CAST_DI, "cast_di", Td, Ti, T0, T0) +OPCODE(78, OP_CAST_DL, "cast_dl", Td, Tl, T0, T0) +OPCODE(79, OP_CAST_DF, "cast_df", Td, Tf, T0, T0) +OPCODE(80, OP_COPY_DD, "copy_dd", Td, Td, T0, T0) +OPCODE(81, OP_ONES_LIKE_DD, "ones_like_dd", Td, T0, T0, T0) +OPCODE(82, OP_NEG_DD, "neg_dd", Td, Td, T0, T0) +OPCODE(83, OP_ADD_DDD, "add_ddd", Td, Td, Td, T0) +OPCODE(84, OP_SUB_DDD, "sub_ddd", Td, Td, Td, T0) +OPCODE(85, OP_MUL_DDD, "mul_ddd", Td, Td, Td, T0) +OPCODE(86, OP_DIV_DDD, "div_ddd", Td, Td, Td, T0) +OPCODE(87, OP_POW_DDD, "pow_ddd", Td, Td, Td, T0) +OPCODE(88, OP_MOD_DDD, "mod_ddd", Td, Td, Td, T0) +OPCODE(89, OP_SQRT_DD, "sqrt_dd", Td, Td, T0, T0) +OPCODE(90, OP_WHERE_DBDD, "where_dbdd", Td, Tb, Td, Td) +OPCODE(91, OP_FUNC_DDN, "func_ddn", Td, Td, Tn, T0) +OPCODE(92, OP_FUNC_DDDN, "func_dddn", Td, Td, Td, Tn) + +OPCODE(93, OP_EQ_BCC, "eq_bcc", Tb, Tc, Tc, T0) +OPCODE(94, OP_NE_BCC, "ne_bcc", Tb, Tc, Tc, T0) + +OPCODE(95, OP_CAST_CI, "cast_ci", Tc, Ti, T0, T0) +OPCODE(96, OP_CAST_CL, "cast_cl", Tc, Tl, T0, T0) +OPCODE(97, OP_CAST_CF, "cast_cf", Tc, Tf, T0, T0) +OPCODE(98, OP_CAST_CD, "cast_cd", Tc, Td, T0, T0) +OPCODE(99, OP_ONES_LIKE_CC, "ones_like_cc", Tc, T0, T0, T0) +OPCODE(100, OP_COPY_CC, "copy_cc", Tc, Tc, T0, T0) +OPCODE(101, OP_NEG_CC, "neg_cc", Tc, Tc, T0, T0) +OPCODE(102, OP_ADD_CCC, "add_ccc", Tc, Tc, Tc, T0) +OPCODE(103, OP_SUB_CCC, "sub_ccc", Tc, Tc, Tc, T0) +OPCODE(104, OP_MUL_CCC, "mul_ccc", Tc, Tc, Tc, T0) +OPCODE(105, OP_DIV_CCC, "div_ccc", Tc, Tc, Tc, T0) +OPCODE(106, OP_WHERE_CBCC, "where_cbcc", Tc, Tb, Tc, Tc) +OPCODE(107, OP_FUNC_CCN, "func_ccn", Tc, Tc, Tn, T0) +OPCODE(108, OP_FUNC_CCCN, "func_cccn", Tc, Tc, Tc, Tn) + +OPCODE(109, OP_REAL_DC, "real_dc", Td, Tc, T0, T0) +OPCODE(110, OP_IMAG_DC, "imag_dc", Td, Tc, T0, T0) +OPCODE(111, OP_COMPLEX_CDD, "complex_cdd", Tc, Td, Td, T0) + +OPCODE(112, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) + +OPCODE(113, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) + +OPCODE(114, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) //Boolean outputs -OPCODE(106, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) -OPCODE(107, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) -OPCODE(108, OP_FUNC_BCN, "func_bcn", Tb, Tc, Tn, T0) +OPCODE(115, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) +OPCODE(116, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) +OPCODE(117, OP_FUNC_BCN, "func_bcn", Tb, Tc, Tn, T0) // Reductions always have to be at the end - parts of the code // use > OP_REDUCTION to decide whether operation is a reduction -OPCODE(109, OP_REDUCTION, NULL, T0, T0, T0, T0) +OPCODE(118, OP_REDUCTION, NULL, T0, T0, T0, T0) /* Last argument in a reduction is the axis of the array the reduction should be applied along. */ -OPCODE(110, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) -OPCODE(111, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) -OPCODE(112, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) -OPCODE(113, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) -OPCODE(114, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) - -OPCODE(115, OP_PROD, NULL, T0, T0, T0, T0) -OPCODE(116, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) -OPCODE(117, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) -OPCODE(118, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) -OPCODE(119, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) -OPCODE(120, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) - -OPCODE(121, OP_MIN, NULL, T0, T0, T0, T0) -OPCODE(122, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) -OPCODE(123, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) -OPCODE(124, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) -OPCODE(125, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) - -OPCODE(126, OP_MAX, NULL, T0, T0, T0, T0) -OPCODE(127, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) -OPCODE(128, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) -OPCODE(129, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) -OPCODE(130, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) +OPCODE(119, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) +OPCODE(120, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) +OPCODE(121, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) +OPCODE(122, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) +OPCODE(123, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) + +OPCODE(124, OP_PROD, NULL, T0, T0, T0, T0) +OPCODE(125, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) +OPCODE(126, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) +OPCODE(127, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) +OPCODE(128, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) +OPCODE(129, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) + +OPCODE(130, OP_MIN, NULL, T0, T0, T0, T0) +OPCODE(131, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) +OPCODE(132, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) +OPCODE(133, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) +OPCODE(134, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) + +OPCODE(135, OP_MAX, NULL, T0, T0, T0, T0) +OPCODE(136, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) +OPCODE(137, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) +OPCODE(138, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) +OPCODE(139, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) /* When we get to 255, will maybe have to change code again @@ -190,4 +201,4 @@ When we get to 255, will maybe have to change code again other than unsigned char for OPCODE table) */ /* Should be the last opcode */ -OPCODE(131, OP_END, NULL, T0, T0, T0, T0) +OPCODE(140, OP_END, NULL, T0, T0, T0, T0) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 512f2ea..488f383 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -456,6 +456,29 @@ def test_boolean_operator(self): else: raise ValueError("should raise exception!") + x = np.ones(10, dtype='bool') + y = np.zeros(10, dtype='bool') + assert_array_equal(evaluate("x & y"), x & y) # and + assert_array_equal(evaluate("x ^ y"), x ^ y) # xor + assert_array_equal(evaluate("x | y"), x | y) # or + assert_array_equal(evaluate("~x"), ~x) # invert + + def test_bitwise_operators(self): + x = arange(10, dtype='i4') + y = arange(10, dtype='i4') + assert_array_equal(evaluate("x & y"), x & y) # and + assert_array_equal(evaluate("x ^ y"), x ^ y) # xor + assert_array_equal(evaluate("x | y"), x | y) # or + assert_array_equal(evaluate("~x"), ~x) # invert + + x = arange(10, dtype='i8') + y = arange(10, dtype='i8') + assert_array_equal(evaluate("x & y"), x & y) # and + assert_array_equal(evaluate("x ^ y"), x ^ y) # xor + assert_array_equal(evaluate("x | y"), x | y) # or + assert_array_equal(evaluate("~x"), ~x) # invert + + def test_rational_expr(self): a = arange(1e6) b = arange(1e6) * 0.1 From d02cc8d3702105921a7c44953bc3bfe8f9d6a3bb Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 22 Sep 2025 17:17:10 +0200 Subject: [PATCH 093/166] Added floor division and log2 --- numexpr/complex_functions.hpp | 11 ++ numexpr/expressions.py | 2 + numexpr/functions.hpp | 3 + numexpr/interp_body.cpp | 4 + numexpr/msvc_function_stubs.hpp | 5 + numexpr/necompiler.py | 1 + numexpr/opcodes.hpp | 241 ++++++++++++++++---------------- numexpr/tests/test_numexpr.py | 14 +- 8 files changed, 159 insertions(+), 122 deletions(-) diff --git a/numexpr/complex_functions.hpp b/numexpr/complex_functions.hpp index fd92068..2c275b6 100644 --- a/numexpr/complex_functions.hpp +++ b/numexpr/complex_functions.hpp @@ -347,6 +347,8 @@ nc_cosh(std::complex *x, std::complex *r) #define M_LOG10_E 0.434294481903251827651128918916605082294397 +#define M_LOG2_E 1.44269504088896340735992468100189213742664 + static void nc_log10(std::complex *x, std::complex *r) @@ -357,6 +359,15 @@ nc_log10(std::complex *x, std::complex *r) return; } +static void +nc_log2(std::complex *x, std::complex *r) +{ + nc_log(x, r); + r->real(r->real() * M_LOG2_E); + r->imag(r->imag() * M_LOG2_E); + return; +} + static void nc_sin(std::complex *x, std::complex *r) { diff --git a/numexpr/expressions.py b/numexpr/expressions.py index ce724bf..ea2ab85 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -352,6 +352,7 @@ def multiply(x, y): 'log': func(numpy.log, 'float'), 'log1p': func(numpy.log1p, 'float'), 'log10': func(numpy.log10, 'float'), + 'log2': func(numpy.log2, 'float'), 'exp': func(numpy.exp, 'float'), 'expm1': func(numpy.expm1, 'float'), @@ -441,6 +442,7 @@ def __bool__(self): __mul__ = __rmul__ = binop('mul') __truediv__ = truediv_op __rtruediv__ = rtruediv_op + __floordiv__ = binop("floordiv") __pow__ = pow_op __rpow__ = binop('pow', reversed=True) __mod__ = binop('mod') diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index 17310d6..d666e34 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -30,6 +30,7 @@ FUNC_FF(FUNC_ARCTANH_FF, "arctanh_ff", atanhf, atanhf2, vsAtanh) FUNC_FF(FUNC_LOG_FF, "log_ff", logf, logf2, vsLn) FUNC_FF(FUNC_LOG1P_FF, "log1p_ff", log1pf, log1pf2, vsLog1p) FUNC_FF(FUNC_LOG10_FF, "log10_ff", log10f, log10f2, vsLog10) +FUNC_FF(FUNC_LOG2_FF, "log2_ff", log2f, log2f2, vsLog2) FUNC_FF(FUNC_EXP_FF, "exp_ff", expf, expf2, vsExp) FUNC_FF(FUNC_EXPM1_FF, "expm1_ff", expm1f, expm1f2, vsExpm1) FUNC_FF(FUNC_ABS_FF, "absolute_ff", fabsf, fabsf2, vsAbs) @@ -74,6 +75,7 @@ FUNC_DD(FUNC_ARCTANH_DD, "arctanh_dd", atanh, vdAtanh) FUNC_DD(FUNC_LOG_DD, "log_dd", log, vdLn) FUNC_DD(FUNC_LOG1P_DD, "log1p_dd", log1p, vdLog1p) FUNC_DD(FUNC_LOG10_DD, "log10_dd", log10, vdLog10) +FUNC_DD(FUNC_LOG2_DD, "log2_dd", log2, vdLog2) FUNC_DD(FUNC_EXP_DD, "exp_dd", exp, vdExp) FUNC_DD(FUNC_EXPM1_DD, "expm1_dd", expm1, vdExpm1) FUNC_DD(FUNC_ABS_DD, "absolute_dd", fabs, vdAbs) @@ -146,6 +148,7 @@ FUNC_CC(FUNC_ARCTANH_CC, "arctanh_cc", nc_atanh, vzAtanh) FUNC_CC(FUNC_LOG_CC, "log_cc", nc_log, vzLn) FUNC_CC(FUNC_LOG1P_CC, "log1p_cc", nc_log1p, vzLog1p) FUNC_CC(FUNC_LOG10_CC, "log10_cc", nc_log10, vzLog10) +FUNC_CC(FUNC_LOG2_CC, "log2_cc", nc_log2, vzLog2) FUNC_CC(FUNC_EXP_CC, "exp_cc", nc_exp, vzExp) FUNC_CC(FUNC_EXPM1_CC, "expm1_cc", nc_expm1, vzExpm1) FUNC_CC(FUNC_ABS_CC, "absolute_cc", nc_abs, vzAbs_) diff --git a/numexpr/interp_body.cpp b/numexpr/interp_body.cpp index e207e1a..e82e47b 100644 --- a/numexpr/interp_body.cpp +++ b/numexpr/interp_body.cpp @@ -265,6 +265,7 @@ case OP_DIV_III: VEC_ARG2(i_dest = i2 ? (i1 / i2) : 0); case OP_POW_III: VEC_ARG2(i_dest = (i2 < 0) ? (1 / i1) : (int)pow((double)i1, i2)); case OP_MOD_III: VEC_ARG2(i_dest = i2 == 0 ? 0 :((i1 % i2) + i2) % i2); + case OP_FLOORDIV_III: VEC_ARG2(i_dest = i2 ? (i1 / i2) - ((i1 % i2 != 0) && (i1 < 0 != i2 < 0)) : 0); case OP_LSHIFT_III: VEC_ARG2(i_dest = i1 << i2); case OP_RSHIFT_III: VEC_ARG2(i_dest = i1 >> i2); @@ -290,6 +291,7 @@ case OP_POW_LLL: VEC_ARG2(l_dest = (l2 < 0) ? (1 / l1) : (long long)llround(pow((long double)l1, (long double)l2))); #endif case OP_MOD_LLL: VEC_ARG2(l_dest = l2 == 0 ? 0 :((l1 % l2) + l2) % l2); + case OP_FLOORDIV_LLL: VEC_ARG2(l_dest = l2 ? (l1 / l2) - ((l1 % l2 != 0) && (l1 < 0 != l2 < 0)): 0); case OP_LSHIFT_LLL: VEC_ARG2(l_dest = l1 << l2); case OP_RSHIFT_LLL: VEC_ARG2(l_dest = l1 >> l2); @@ -324,6 +326,7 @@ VEC_ARG2(f_dest = powf(f1, f2)); #endif case OP_MOD_FFF: VEC_ARG2(f_dest = f1 - floorf(f1/f2) * f2); + case OP_FLOORDIV_FFF: VEC_ARG2(f_dest = floorf(f1/f2)); case OP_SQRT_FF: #ifdef USE_VML @@ -375,6 +378,7 @@ VEC_ARG2(d_dest = pow(d1, d2)); #endif case OP_MOD_DDD: VEC_ARG2(d_dest = d1 - floor(d1/d2) * d2); + case OP_FLOORDIV_DDD: VEC_ARG2(d_dest = floor(d1/d2)); case OP_SQRT_DD: #ifdef USE_VML diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 27ea54b..02eef6f 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -35,6 +35,7 @@ #define logf(x) ((float)log((double)(x))) #define log1pf(x) ((float)log1p((double)(x))) #define log10f(x) ((float)log10((double)(x))) +#define log2f(x) ((float)log2((double)(x))) #define expf(x) ((float)exp((double)(x))) #define expm1f(x) ((float)expm1((double)(x))) #define fabsf(x) ((float)fabs((double)(x))) @@ -123,6 +124,10 @@ inline float log10f2(float x) { return log10f(x); } +inline float log2f2(float x) { + return log2f(x); +} + inline float expf2(float x) { return expf(x); } diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index aea9dfc..1417029 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -62,6 +62,7 @@ "log", "log1p", "log10", + "log2", "exp", "expm1", "absolute", diff --git a/numexpr/opcodes.hpp b/numexpr/opcodes.hpp index dafd89c..703525b 100644 --- a/numexpr/opcodes.hpp +++ b/numexpr/opcodes.hpp @@ -66,134 +66,141 @@ OPCODE(34, OP_MUL_III, "mul_iii", Ti, Ti, Ti, T0) OPCODE(35, OP_DIV_III, "div_iii", Ti, Ti, Ti, T0) OPCODE(36, OP_POW_III, "pow_iii", Ti, Ti, Ti, T0) OPCODE(37, OP_MOD_III, "mod_iii", Ti, Ti, Ti, T0) +OPCODE(38, OP_FLOORDIV_III, "floordiv_iii", Ti, Ti, Ti, T0) -OPCODE(38, OP_LSHIFT_III, "lshift_iii", Ti, Ti, Ti, T0) -OPCODE(39, OP_RSHIFT_III, "rshift_iii", Ti, Ti, Ti, T0) -OPCODE(40, OP_WHERE_IBII, "where_ibii", Ti, Tb, Ti, Ti) +OPCODE(39, OP_LSHIFT_III, "lshift_iii", Ti, Ti, Ti, T0) +OPCODE(40, OP_RSHIFT_III, "rshift_iii", Ti, Ti, Ti, T0) + +OPCODE(41, OP_WHERE_IBII, "where_ibii", Ti, Tb, Ti, Ti) // Bitwise ops -OPCODE(41, OP_INVERT_II, "invert_ii", Ti, Ti, T0, T0) -OPCODE(42, OP_AND_III, "and_iii", Ti, Ti, Ti, T0) -OPCODE(43, OP_OR_III, "or_iii", Ti, Ti, Ti, T0) -OPCODE(44, OP_XOR_III, "xor_iii", Ti, Ti, Ti, T0) - -OPCODE(45, OP_CAST_LI, "cast_li", Tl, Ti, T0, T0) -OPCODE(46, OP_COPY_LL, "copy_ll", Tl, Tl, T0, T0) -OPCODE(47, OP_ONES_LIKE_LL, "ones_like_ll", Tl, T0, T0, T0) -OPCODE(48, OP_NEG_LL, "neg_ll", Tl, Tl, T0, T0) -OPCODE(49, OP_ADD_LLL, "add_lll", Tl, Tl, Tl, T0) -OPCODE(50, OP_SUB_LLL, "sub_lll", Tl, Tl, Tl, T0) -OPCODE(51, OP_MUL_LLL, "mul_lll", Tl, Tl, Tl, T0) -OPCODE(52, OP_DIV_LLL, "div_lll", Tl, Tl, Tl, T0) -OPCODE(53, OP_POW_LLL, "pow_lll", Tl, Tl, Tl, T0) -OPCODE(54, OP_MOD_LLL, "mod_lll", Tl, Tl, Tl, T0) - -OPCODE(55, OP_LSHIFT_LLL, "lshift_lll", Tl, Tl, Tl, T0) -OPCODE(56, OP_RSHIFT_LLL, "rshift_lll", Tl, Tl, Tl, T0) - -OPCODE(57, OP_WHERE_LBLL, "where_lbll", Tl, Tb, Tl, Tl) +OPCODE(42, OP_INVERT_II, "invert_ii", Ti, Ti, T0, T0) +OPCODE(43, OP_AND_III, "and_iii", Ti, Ti, Ti, T0) +OPCODE(44, OP_OR_III, "or_iii", Ti, Ti, Ti, T0) +OPCODE(45, OP_XOR_III, "xor_iii", Ti, Ti, Ti, T0) + +OPCODE(46, OP_CAST_LI, "cast_li", Tl, Ti, T0, T0) +OPCODE(47, OP_COPY_LL, "copy_ll", Tl, Tl, T0, T0) +OPCODE(48, OP_ONES_LIKE_LL, "ones_like_ll", Tl, T0, T0, T0) +OPCODE(49, OP_NEG_LL, "neg_ll", Tl, Tl, T0, T0) +OPCODE(50, OP_ADD_LLL, "add_lll", Tl, Tl, Tl, T0) +OPCODE(51, OP_SUB_LLL, "sub_lll", Tl, Tl, Tl, T0) +OPCODE(52, OP_MUL_LLL, "mul_lll", Tl, Tl, Tl, T0) +OPCODE(53, OP_DIV_LLL, "div_lll", Tl, Tl, Tl, T0) +OPCODE(54, OP_POW_LLL, "pow_lll", Tl, Tl, Tl, T0) +OPCODE(55, OP_MOD_LLL, "mod_lll", Tl, Tl, Tl, T0) +OPCODE(56, OP_FLOORDIV_LLL, "floordiv_lll", Tl, Tl, Tl, T0) + +OPCODE(57, OP_LSHIFT_LLL, "lshift_lll", Tl, Tl, Tl, T0) +OPCODE(58, OP_RSHIFT_LLL, "rshift_lll", Tl, Tl, Tl, T0) + +OPCODE(59, OP_WHERE_LBLL, "where_lbll", Tl, Tb, Tl, Tl) // Bitwise ops -OPCODE(58, OP_INVERT_LL, "invert_ll", Tl, Tl, T0, T0) -OPCODE(59, OP_AND_LLL, "and_lll", Tl, Tl, Tl, T0) -OPCODE(60, OP_OR_LLL, "or_lll", Tl, Tl, Tl, T0) -OPCODE(61, OP_XOR_LLL, "xor_lll", Tl, Tl, Tl, T0) - -OPCODE(62, OP_CAST_FI, "cast_fi", Tf, Ti, T0, T0) -OPCODE(63, OP_CAST_FL, "cast_fl", Tf, Tl, T0, T0) -OPCODE(64, OP_COPY_FF, "copy_ff", Tf, Tf, T0, T0) -OPCODE(65, OP_ONES_LIKE_FF, "ones_like_ff", Tf, T0, T0, T0) -OPCODE(66, OP_NEG_FF, "neg_ff", Tf, Tf, T0, T0) -OPCODE(67, OP_ADD_FFF, "add_fff", Tf, Tf, Tf, T0) -OPCODE(68, OP_SUB_FFF, "sub_fff", Tf, Tf, Tf, T0) -OPCODE(69, OP_MUL_FFF, "mul_fff", Tf, Tf, Tf, T0) -OPCODE(70, OP_DIV_FFF, "div_fff", Tf, Tf, Tf, T0) -OPCODE(71, OP_POW_FFF, "pow_fff", Tf, Tf, Tf, T0) -OPCODE(72, OP_MOD_FFF, "mod_fff", Tf, Tf, Tf, T0) -OPCODE(73, OP_SQRT_FF, "sqrt_ff", Tf, Tf, T0, T0) -OPCODE(74, OP_WHERE_FBFF, "where_fbff", Tf, Tb, Tf, Tf) -OPCODE(75, OP_FUNC_FFN, "func_ffn", Tf, Tf, Tn, T0) -OPCODE(76, OP_FUNC_FFFN, "func_fffn", Tf, Tf, Tf, Tn) - -OPCODE(77, OP_CAST_DI, "cast_di", Td, Ti, T0, T0) -OPCODE(78, OP_CAST_DL, "cast_dl", Td, Tl, T0, T0) -OPCODE(79, OP_CAST_DF, "cast_df", Td, Tf, T0, T0) -OPCODE(80, OP_COPY_DD, "copy_dd", Td, Td, T0, T0) -OPCODE(81, OP_ONES_LIKE_DD, "ones_like_dd", Td, T0, T0, T0) -OPCODE(82, OP_NEG_DD, "neg_dd", Td, Td, T0, T0) -OPCODE(83, OP_ADD_DDD, "add_ddd", Td, Td, Td, T0) -OPCODE(84, OP_SUB_DDD, "sub_ddd", Td, Td, Td, T0) -OPCODE(85, OP_MUL_DDD, "mul_ddd", Td, Td, Td, T0) -OPCODE(86, OP_DIV_DDD, "div_ddd", Td, Td, Td, T0) -OPCODE(87, OP_POW_DDD, "pow_ddd", Td, Td, Td, T0) -OPCODE(88, OP_MOD_DDD, "mod_ddd", Td, Td, Td, T0) -OPCODE(89, OP_SQRT_DD, "sqrt_dd", Td, Td, T0, T0) -OPCODE(90, OP_WHERE_DBDD, "where_dbdd", Td, Tb, Td, Td) -OPCODE(91, OP_FUNC_DDN, "func_ddn", Td, Td, Tn, T0) -OPCODE(92, OP_FUNC_DDDN, "func_dddn", Td, Td, Td, Tn) - -OPCODE(93, OP_EQ_BCC, "eq_bcc", Tb, Tc, Tc, T0) -OPCODE(94, OP_NE_BCC, "ne_bcc", Tb, Tc, Tc, T0) - -OPCODE(95, OP_CAST_CI, "cast_ci", Tc, Ti, T0, T0) -OPCODE(96, OP_CAST_CL, "cast_cl", Tc, Tl, T0, T0) -OPCODE(97, OP_CAST_CF, "cast_cf", Tc, Tf, T0, T0) -OPCODE(98, OP_CAST_CD, "cast_cd", Tc, Td, T0, T0) -OPCODE(99, OP_ONES_LIKE_CC, "ones_like_cc", Tc, T0, T0, T0) -OPCODE(100, OP_COPY_CC, "copy_cc", Tc, Tc, T0, T0) -OPCODE(101, OP_NEG_CC, "neg_cc", Tc, Tc, T0, T0) -OPCODE(102, OP_ADD_CCC, "add_ccc", Tc, Tc, Tc, T0) -OPCODE(103, OP_SUB_CCC, "sub_ccc", Tc, Tc, Tc, T0) -OPCODE(104, OP_MUL_CCC, "mul_ccc", Tc, Tc, Tc, T0) -OPCODE(105, OP_DIV_CCC, "div_ccc", Tc, Tc, Tc, T0) -OPCODE(106, OP_WHERE_CBCC, "where_cbcc", Tc, Tb, Tc, Tc) -OPCODE(107, OP_FUNC_CCN, "func_ccn", Tc, Tc, Tn, T0) -OPCODE(108, OP_FUNC_CCCN, "func_cccn", Tc, Tc, Tc, Tn) - -OPCODE(109, OP_REAL_DC, "real_dc", Td, Tc, T0, T0) -OPCODE(110, OP_IMAG_DC, "imag_dc", Td, Tc, T0, T0) -OPCODE(111, OP_COMPLEX_CDD, "complex_cdd", Tc, Td, Td, T0) - -OPCODE(112, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) - -OPCODE(113, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) - -OPCODE(114, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) +OPCODE(60, OP_INVERT_LL, "invert_ll", Tl, Tl, T0, T0) +OPCODE(61, OP_AND_LLL, "and_lll", Tl, Tl, Tl, T0) +OPCODE(62, OP_OR_LLL, "or_lll", Tl, Tl, Tl, T0) +OPCODE(63, OP_XOR_LLL, "xor_lll", Tl, Tl, Tl, T0) + +OPCODE(64, OP_CAST_FI, "cast_fi", Tf, Ti, T0, T0) +OPCODE(65, OP_CAST_FL, "cast_fl", Tf, Tl, T0, T0) +OPCODE(66, OP_COPY_FF, "copy_ff", Tf, Tf, T0, T0) +OPCODE(67, OP_ONES_LIKE_FF, "ones_like_ff", Tf, T0, T0, T0) +OPCODE(68, OP_NEG_FF, "neg_ff", Tf, Tf, T0, T0) +OPCODE(69, OP_ADD_FFF, "add_fff", Tf, Tf, Tf, T0) +OPCODE(70, OP_SUB_FFF, "sub_fff", Tf, Tf, Tf, T0) +OPCODE(71, OP_MUL_FFF, "mul_fff", Tf, Tf, Tf, T0) +OPCODE(72, OP_DIV_FFF, "div_fff", Tf, Tf, Tf, T0) +OPCODE(73, OP_POW_FFF, "pow_fff", Tf, Tf, Tf, T0) +OPCODE(74, OP_MOD_FFF, "mod_fff", Tf, Tf, Tf, T0) +OPCODE(75, OP_FLOORDIV_FFF, "floordiv_fff", Tf, Tf, Tf, T0) +OPCODE(76, OP_SQRT_FF, "sqrt_ff", Tf, Tf, T0, T0) +OPCODE(77, OP_WHERE_FBFF, "where_fbff", Tf, Tb, Tf, Tf) + +OPCODE(78, OP_FUNC_FFN, "func_ffn", Tf, Tf, Tn, T0) +OPCODE(79, OP_FUNC_FFFN, "func_fffn", Tf, Tf, Tf, Tn) + +OPCODE(80, OP_CAST_DI, "cast_di", Td, Ti, T0, T0) +OPCODE(81, OP_CAST_DL, "cast_dl", Td, Tl, T0, T0) +OPCODE(82, OP_CAST_DF, "cast_df", Td, Tf, T0, T0) +OPCODE(83, OP_COPY_DD, "copy_dd", Td, Td, T0, T0) +OPCODE(84, OP_ONES_LIKE_DD, "ones_like_dd", Td, T0, T0, T0) +OPCODE(85, OP_NEG_DD, "neg_dd", Td, Td, T0, T0) +OPCODE(86, OP_ADD_DDD, "add_ddd", Td, Td, Td, T0) +OPCODE(87, OP_SUB_DDD, "sub_ddd", Td, Td, Td, T0) +OPCODE(88, OP_MUL_DDD, "mul_ddd", Td, Td, Td, T0) +OPCODE(89, OP_DIV_DDD, "div_ddd", Td, Td, Td, T0) +OPCODE(90, OP_POW_DDD, "pow_ddd", Td, Td, Td, T0) +OPCODE(91, OP_MOD_DDD, "mod_ddd", Td, Td, Td, T0) +OPCODE(92, OP_FLOORDIV_DDD, "floordiv_ddd", Td, Td, Td, T0) + +OPCODE(93, OP_SQRT_DD, "sqrt_dd", Td, Td, T0, T0) +OPCODE(94, OP_WHERE_DBDD, "where_dbdd", Td, Tb, Td, Td) +OPCODE(95, OP_FUNC_DDN, "func_ddn", Td, Td, Tn, T0) +OPCODE(96, OP_FUNC_DDDN, "func_dddn", Td, Td, Td, Tn) + +OPCODE(97, OP_EQ_BCC, "eq_bcc", Tb, Tc, Tc, T0) +OPCODE(98, OP_NE_BCC, "ne_bcc", Tb, Tc, Tc, T0) + +OPCODE(99, OP_CAST_CI, "cast_ci", Tc, Ti, T0, T0) +OPCODE(100, OP_CAST_CL, "cast_cl", Tc, Tl, T0, T0) +OPCODE(101, OP_CAST_CF, "cast_cf", Tc, Tf, T0, T0) +OPCODE(102, OP_CAST_CD, "cast_cd", Tc, Td, T0, T0) +OPCODE(103, OP_ONES_LIKE_CC, "ones_like_cc", Tc, T0, T0, T0) +OPCODE(104, OP_COPY_CC, "copy_cc", Tc, Tc, T0, T0) +OPCODE(105, OP_NEG_CC, "neg_cc", Tc, Tc, T0, T0) +OPCODE(106, OP_ADD_CCC, "add_ccc", Tc, Tc, Tc, T0) +OPCODE(107, OP_SUB_CCC, "sub_ccc", Tc, Tc, Tc, T0) +OPCODE(108, OP_MUL_CCC, "mul_ccc", Tc, Tc, Tc, T0) +OPCODE(109, OP_DIV_CCC, "div_ccc", Tc, Tc, Tc, T0) +OPCODE(110, OP_WHERE_CBCC, "where_cbcc", Tc, Tb, Tc, Tc) +OPCODE(111, OP_FUNC_CCN, "func_ccn", Tc, Tc, Tn, T0) +OPCODE(112, OP_FUNC_CCCN, "func_cccn", Tc, Tc, Tc, Tn) + +OPCODE(113, OP_REAL_DC, "real_dc", Td, Tc, T0, T0) +OPCODE(114, OP_IMAG_DC, "imag_dc", Td, Tc, T0, T0) +OPCODE(115, OP_COMPLEX_CDD, "complex_cdd", Tc, Td, Td, T0) + +OPCODE(116, OP_COPY_SS, "copy_ss", Ts, Ts, T0, T0) + +OPCODE(117, OP_WHERE_BBBB, "where_bbbb", Tb, Tb, Tb, Tb) + +OPCODE(118, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) //Boolean outputs -OPCODE(115, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) -OPCODE(116, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) -OPCODE(117, OP_FUNC_BCN, "func_bcn", Tb, Tc, Tn, T0) +OPCODE(119, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) +OPCODE(120, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) +OPCODE(121, OP_FUNC_BCN, "func_bcn", Tb, Tc, Tn, T0) // Reductions always have to be at the end - parts of the code // use > OP_REDUCTION to decide whether operation is a reduction -OPCODE(118, OP_REDUCTION, NULL, T0, T0, T0, T0) +OPCODE(122, OP_REDUCTION, NULL, T0, T0, T0, T0) /* Last argument in a reduction is the axis of the array the reduction should be applied along. */ -OPCODE(119, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) -OPCODE(120, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) -OPCODE(121, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) -OPCODE(122, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) -OPCODE(123, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) - -OPCODE(124, OP_PROD, NULL, T0, T0, T0, T0) -OPCODE(125, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) -OPCODE(126, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) -OPCODE(127, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) -OPCODE(128, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) -OPCODE(129, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) - -OPCODE(130, OP_MIN, NULL, T0, T0, T0, T0) -OPCODE(131, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) -OPCODE(132, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) -OPCODE(133, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) -OPCODE(134, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) - -OPCODE(135, OP_MAX, NULL, T0, T0, T0, T0) -OPCODE(136, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) -OPCODE(137, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) -OPCODE(138, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) -OPCODE(139, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) +OPCODE(123, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) +OPCODE(124, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) +OPCODE(125, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) +OPCODE(126, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) +OPCODE(127, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) + +OPCODE(128, OP_PROD, NULL, T0, T0, T0, T0) +OPCODE(129, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) +OPCODE(130, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) +OPCODE(131, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) +OPCODE(132, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) +OPCODE(133, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) + +OPCODE(134, OP_MIN, NULL, T0, T0, T0, T0) +OPCODE(135, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) +OPCODE(136, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) +OPCODE(137, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) +OPCODE(138, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) + +OPCODE(139, OP_MAX, NULL, T0, T0, T0, T0) +OPCODE(140, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) +OPCODE(141, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) +OPCODE(142, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) +OPCODE(143, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) /* When we get to 255, will maybe have to change code again @@ -201,4 +208,4 @@ When we get to 255, will maybe have to change code again other than unsigned char for OPCODE table) */ /* Should be the last opcode */ -OPCODE(140, OP_END, NULL, T0, T0, T0, T0) +OPCODE(144, OP_END, NULL, T0, T0, T0, T0) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 488f383..e3d4a26 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -25,9 +25,9 @@ from numpy import (allclose, arange, arccos, arccosh, arcsin, arcsinh, arctan, arctan2, arctanh, array, array_equal, cdouble, ceil, conj, copy, cos, cosh, empty, exp, expm1, float64, floor, fmod, - int32, int64, isinf, isnan, linspace, log, log1p, log10, - ones_like, prod, ravel, rec, shape, sin, sinh, sqrt, sum, - tan, tanh, uint16, where, zeros) + int32, int64, isinf, isnan, linspace, log, log1p, log2, + log10, ones_like, prod, ravel, rec, shape, sin, sinh, sqrt, + sum, tan, tanh, uint16, where, zeros) from numpy.testing import (assert_allclose, assert_array_almost_equal, assert_array_equal, assert_equal) @@ -689,11 +689,15 @@ def test_negative_mod(self): n = np.array([-360, -360, -360, 360, 360, 360], dtype=np.int32) out_i = evaluate('a % n') assert_equal(out_i, np.mod(a, n)) + main_i = evaluate('a // n') + assert_equal(main_i, a // n) b = a.astype(np.int64) m = n.astype(np.int64) out_l = evaluate('b % m') assert_equal(out_l, np.mod(b, m)) + main_l = evaluate('b // m') + assert_equal(main_l, a // m) def test_negative_power_scalar(self): # Test for issue #428, where the power is negative and the base is an @@ -709,7 +713,7 @@ def test_negative_power_scalar(self): def test_ex_uses_vml(self): vml_funcs = [ "sin", "cos", "tan", "arcsin", "arccos", "arctan", "sinh", "cosh", "tanh", "arcsinh", "arccosh", "arctanh", - "log", "log1p","log10", "exp", "expm1", "abs", "conj", + "log", "log1p","log10", "log2", "exp", "expm1", "abs", "conj", "arctan2", "fmod"] for func in vml_funcs: strexpr = func+'(a)' @@ -803,7 +807,7 @@ def test_changing_nthreads_01_dec(self): for func in ['copy', 'ones_like', 'sqrt', 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', - 'log', 'log1p', 'log10', 'exp', 'expm1', 'abs', 'conj', + 'log', 'log1p', 'log10', "log2", 'exp', 'expm1', 'abs', 'conj', 'ceil', 'floor']: func1tests.append("a + %s(b+c)" % func) tests.append(('1_ARG_FUNCS', func1tests)) From 07b54adfa87997da30caa5183dc2d2d057a5e92f Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 22 Sep 2025 20:01:25 +0200 Subject: [PATCH 094/166] Add hypot --- numexpr/expressions.py | 1 + numexpr/functions.hpp | 2 ++ numexpr/interpreter.cpp | 10 ++++++++++ numexpr/msvc_function_stubs.hpp | 6 ++++++ numexpr/necompiler.py | 1 + numexpr/tests/test_numexpr.py | 11 ++++++----- 6 files changed, 26 insertions(+), 5 deletions(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index ea2ab85..8456089 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -348,6 +348,7 @@ def multiply(x, y): 'fmod': func(numpy.fmod, 'float'), 'arctan2': func(numpy.arctan2, 'float'), + 'hypot': func(numpy.hypot, 'float'), 'log': func(numpy.log, 'float'), 'log1p': func(numpy.log1p, 'float'), diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index d666e34..74ca95c 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -49,6 +49,7 @@ FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) #endif FUNC_FFF(FUNC_FMOD_FFF, "fmod_fff", fmodf, fmodf2, vsfmod) FUNC_FFF(FUNC_ARCTAN2_FFF, "arctan2_fff", atan2f, atan2f2, vsAtan2) +FUNC_FFF(FUNC_HYPOT_FFF, "hypot_fff", hypotf, hypotf2, vsHypot) FUNC_FFF(FUNC_FFF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FFF #undef ELIDE_FUNC_FFF @@ -122,6 +123,7 @@ FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) #endif FUNC_DDD(FUNC_FMOD_DDD, "fmod_ddd", fmod, vdfmod) FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) +FUNC_DDD(FUNC_HYPOT_DDD, "hypot_ddd", hypot, vdHypot) FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DDD #undef ELIDE_FUNC_DDD diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index b724295..4e4b181 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -413,6 +413,16 @@ static void vzLog1p(MKL_INT n, const MKL_Complex16* x1, MKL_Complex16* dest) vzLn(n, dest, dest); }; +static void vzLog2(MKL_INT n, const MKL_Complex16* x1, MKL_Complex16* dest) +{ + MKL_INT j; + vzLn(n, x1, dest); + for (j=0; j Date: Mon, 22 Sep 2025 20:58:40 +0200 Subject: [PATCH 095/166] Added trunc, round, nextafter --- numexpr/complex_functions.hpp | 7 +++++++ numexpr/expressions.py | 5 ++++- numexpr/functions.hpp | 14 ++++++++++++-- numexpr/interpreter.cpp | 11 ++++++++++- numexpr/msvc_function_stubs.hpp | 15 +++++++++++++++ numexpr/necompiler.py | 5 ++++- numexpr/tests/test_numexpr.py | 12 +++++++----- 7 files changed, 59 insertions(+), 10 deletions(-) diff --git a/numexpr/complex_functions.hpp b/numexpr/complex_functions.hpp index 2c275b6..11e81fa 100644 --- a/numexpr/complex_functions.hpp +++ b/numexpr/complex_functions.hpp @@ -435,6 +435,13 @@ nc_abs(std::complex *x, std::complex *r) r->imag(0); } +static void +nc_rint(std::complex *x, std::complex *r) +{ + r->real(rint(x->real())); + r->imag(rint(x->imag())); +} + static bool nc_isinf(std::complex *x) { diff --git a/numexpr/expressions.py b/numexpr/expressions.py index 8456089..1b361ee 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -348,7 +348,8 @@ def multiply(x, y): 'fmod': func(numpy.fmod, 'float'), 'arctan2': func(numpy.arctan2, 'float'), - 'hypot': func(numpy.hypot, 'float'), + 'hypot': func(numpy.hypot, 'double'), + 'nextafter': func(numpy.nextafter, 'double'), 'log': func(numpy.log, 'float'), 'log1p': func(numpy.log1p, 'float'), @@ -360,6 +361,8 @@ def multiply(x, y): 'abs': func(numpy.absolute, 'float'), 'ceil': func(numpy.ceil, 'float', 'double'), 'floor': func(numpy.floor, 'float', 'double'), + 'round': func(numpy.round, 'double'), + 'trunc': func(numpy.trunc, 'double'), 'where': where_func, diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index 74ca95c..ed54f07 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -30,13 +30,16 @@ FUNC_FF(FUNC_ARCTANH_FF, "arctanh_ff", atanhf, atanhf2, vsAtanh) FUNC_FF(FUNC_LOG_FF, "log_ff", logf, logf2, vsLn) FUNC_FF(FUNC_LOG1P_FF, "log1p_ff", log1pf, log1pf2, vsLog1p) FUNC_FF(FUNC_LOG10_FF, "log10_ff", log10f, log10f2, vsLog10) -FUNC_FF(FUNC_LOG2_FF, "log2_ff", log2f, log2f2, vsLog2) +FUNC_FF(FUNC_LOG2_FF, "log2_ff", log2f, log2f2, vsLog2) FUNC_FF(FUNC_EXP_FF, "exp_ff", expf, expf2, vsExp) FUNC_FF(FUNC_EXPM1_FF, "expm1_ff", expm1f, expm1f2, vsExpm1) FUNC_FF(FUNC_ABS_FF, "absolute_ff", fabsf, fabsf2, vsAbs) FUNC_FF(FUNC_CONJ_FF, "conjugate_ff",fconjf, fconjf2, vsConj) FUNC_FF(FUNC_CEIL_FF, "ceil_ff", ceilf, ceilf2, vsCeil) FUNC_FF(FUNC_FLOOR_FF, "floor_ff", floorf, floorf2, vsFloor) +FUNC_FF(FUNC_TRUNC_FF, "trunc_ff", truncf, truncf2, vsTrunc) +//rint rounds to nearest even integer, matching NumPy (round doesn't) +FUNC_FF(FUNC_ROUND_FF, "round_ff", rintf, rintf2, vsRint) FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FF #undef ELIDE_FUNC_FF @@ -50,6 +53,7 @@ FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) FUNC_FFF(FUNC_FMOD_FFF, "fmod_fff", fmodf, fmodf2, vsfmod) FUNC_FFF(FUNC_ARCTAN2_FFF, "arctan2_fff", atan2f, atan2f2, vsAtan2) FUNC_FFF(FUNC_HYPOT_FFF, "hypot_fff", hypotf, hypotf2, vsHypot) +FUNC_FFF(FUNC_NEXTAFTER_FFF, "nextafter_fff", nextafterf, nextafterf2, vsNextAfter) FUNC_FFF(FUNC_FFF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FFF #undef ELIDE_FUNC_FFF @@ -83,6 +87,9 @@ FUNC_DD(FUNC_ABS_DD, "absolute_dd", fabs, vdAbs) FUNC_DD(FUNC_CONJ_DD, "conjugate_dd",fconj, vdConj) FUNC_DD(FUNC_CEIL_DD, "ceil_dd", ceil, vdCeil) FUNC_DD(FUNC_FLOOR_DD, "floor_dd", floor, vdFloor) +FUNC_DD(FUNC_TRUNC_DD, "trunc_dd", trunc, vdTrunc) + //rint rounds to nearest even integer, matching NumPy (round doesn't) +FUNC_DD(FUNC_ROUND_DD, "round_dd", rint, vdRint) FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DD #undef ELIDE_FUNC_DD @@ -124,6 +131,7 @@ FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) FUNC_DDD(FUNC_FMOD_DDD, "fmod_ddd", fmod, vdfmod) FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) FUNC_DDD(FUNC_HYPOT_DDD, "hypot_ddd", hypot, vdHypot) +FUNC_DDD(FUNC_NEXTAFTER_DDD, "nextafter_ddd", nextafter, vdNextAfter) FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DDD #undef ELIDE_FUNC_DDD @@ -150,11 +158,13 @@ FUNC_CC(FUNC_ARCTANH_CC, "arctanh_cc", nc_atanh, vzAtanh) FUNC_CC(FUNC_LOG_CC, "log_cc", nc_log, vzLn) FUNC_CC(FUNC_LOG1P_CC, "log1p_cc", nc_log1p, vzLog1p) FUNC_CC(FUNC_LOG10_CC, "log10_cc", nc_log10, vzLog10) -FUNC_CC(FUNC_LOG2_CC, "log2_cc", nc_log2, vzLog2) +FUNC_CC(FUNC_LOG2_CC, "log2_cc", nc_log2, vzLog2) FUNC_CC(FUNC_EXP_CC, "exp_cc", nc_exp, vzExp) FUNC_CC(FUNC_EXPM1_CC, "expm1_cc", nc_expm1, vzExpm1) FUNC_CC(FUNC_ABS_CC, "absolute_cc", nc_abs, vzAbs_) FUNC_CC(FUNC_CONJ_CC, "conjugate_cc",nc_conj, vzConj) +// rint rounds to nearest even integer, matches NumPy behaviour (round doesn't) +FUNC_CC(FUNC_ROUND_CC, "round_cc", nc_rint, vzRint) FUNC_CC(FUNC_CC_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_CC #undef ELIDE_FUNC_CC diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index 4e4b181..df53528 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -393,7 +393,7 @@ FuncCCPtr functions_cc[] = { }; #ifdef USE_VML -/* complex expm1 not available in VML */ +/* various functions not available in VML */ static void vzExpm1(MKL_INT n, const MKL_Complex16* x1, MKL_Complex16* dest) { MKL_INT j; @@ -423,6 +423,15 @@ static void vzLog2(MKL_INT n, const MKL_Complex16* x1, MKL_Complex16* dest) }; }; +static void vzRint(MKL_INT n, const MKL_Complex16* x1, MKL_Complex16* dest) +{ + MKL_INT j; + for (j=0; j Date: Mon, 22 Sep 2025 21:42:06 +0200 Subject: [PATCH 096/166] Added copysign, signbit --- numexpr/expressions.py | 10 ++++++---- numexpr/functions.hpp | 12 ++++++++---- numexpr/interpreter.cpp | 24 +++++++++++++++++++----- numexpr/msvc_function_stubs.hpp | 10 ++++++++++ numexpr/necompiler.py | 4 +++- numexpr/numexpr_config.hpp | 3 ++- numexpr/tests/test_numexpr.py | 30 ++++++++++++++++++------------ 7 files changed, 66 insertions(+), 27 deletions(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index 1b361ee..3d7722a 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -350,6 +350,7 @@ def multiply(x, y): 'arctan2': func(numpy.arctan2, 'float'), 'hypot': func(numpy.hypot, 'double'), 'nextafter': func(numpy.nextafter, 'double'), + 'copysign': func(numpy.copysign, 'double'), 'log': func(numpy.log, 'float'), 'log1p': func(numpy.log1p, 'float'), @@ -371,9 +372,10 @@ def multiply(x, y): 'complex': func(complex, 'complex'), 'conj': func(numpy.conj, 'complex'), - 'isnan': func(numpy.isnan, 'bool'), - 'isfinite': func(numpy.isfinite, 'bool'), - 'isinf': func(numpy.isinf, 'bool'), + 'isnan': func(numpy.isnan, 'double'), + 'isfinite': func(numpy.isfinite, 'double'), + 'isinf': func(numpy.isinf, 'double'), + 'signbit': func(numpy.signbit, 'double'), 'sum': gen_reduce_axis_func('sum'), 'prod': gen_reduce_axis_func('prod'), @@ -531,6 +533,6 @@ class FuncNode(OpNode): def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) - if opcode in ("isnan", "isfinite", "isinf"): # bodge for boolean return functions + if opcode in ("isnan", "isfinite", "isinf", "signbit"): # bodge for boolean return functions kind = 'bool' OpNode.__init__(self, opcode, args, kind) diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index ed54f07..700e88c 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -54,6 +54,7 @@ FUNC_FFF(FUNC_FMOD_FFF, "fmod_fff", fmodf, fmodf2, vsfmod) FUNC_FFF(FUNC_ARCTAN2_FFF, "arctan2_fff", atan2f, atan2f2, vsAtan2) FUNC_FFF(FUNC_HYPOT_FFF, "hypot_fff", hypotf, hypotf2, vsHypot) FUNC_FFF(FUNC_NEXTAFTER_FFF, "nextafter_fff", nextafterf, nextafterf2, vsNextAfter) +FUNC_FFF(FUNC_COPYSIGN_FFF, "copysign_fff", copysignf, copysignf2, vsCopySign) FUNC_FFF(FUNC_FFF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FFF #undef ELIDE_FUNC_FFF @@ -89,7 +90,7 @@ FUNC_DD(FUNC_CEIL_DD, "ceil_dd", ceil, vdCeil) FUNC_DD(FUNC_FLOOR_DD, "floor_dd", floor, vdFloor) FUNC_DD(FUNC_TRUNC_DD, "trunc_dd", trunc, vdTrunc) //rint rounds to nearest even integer, matching NumPy (round doesn't) -FUNC_DD(FUNC_ROUND_DD, "round_dd", rint, vdRint) +FUNC_DD(FUNC_ROUND_DD, "round_dd", rint, vdRint) FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DD #undef ELIDE_FUNC_DD @@ -104,6 +105,7 @@ FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) FUNC_BD(FUNC_ISNAN_BD, "isnan_bd", isnand, vdIsnan) FUNC_BD(FUNC_ISFINITE_BD, "isfinite_bd", isfinited, vdIsfinite) FUNC_BD(FUNC_ISINF_BD, "isinf_bd", isinfd, vdIsinf) +FUNC_BD(FUNC_SIGNBIT_BD, "signbit_bd", signbit, vdSignBit) FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_BD #undef ELIDE_FUNC_BD @@ -115,9 +117,10 @@ FUNC_BD(FUNC_BD_LAST, NULL, NULL, NULL) #define ELIDE_FUNC_BF #define FUNC_BF(...) #endif // use wrappers as there is name collision with isnanf in std -FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_, isnanf2, vfIsnan) -FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_, isfinitef2, vfIsfinite) -FUNC_BF(FUNC_ISINF_BF, "isinf_bf", isinff_, isinff2, vfIsinf) +FUNC_BF(FUNC_ISNAN_BF, "isnan_bf", isnanf_, isnanf2, vsIsnan) +FUNC_BF(FUNC_ISFINITE_BF, "isfinite_bf", isfinitef_, isfinitef2, vsIsfinite) +FUNC_BF(FUNC_ISINF_BF, "isinf_bf", isinff_, isinff2, vsIsinf) +FUNC_BF(FUNC_SIGNBIT_BF, "signbit_bf", signbitf, signbitf2, vsSignBit) FUNC_BF(FUNC_BF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_BF #undef ELIDE_FUNC_BF @@ -132,6 +135,7 @@ FUNC_DDD(FUNC_FMOD_DDD, "fmod_ddd", fmod, vdfmod) FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) FUNC_DDD(FUNC_HYPOT_DDD, "hypot_ddd", hypot, vdHypot) FUNC_DDD(FUNC_NEXTAFTER_DDD, "nextafter_ddd", nextafter, vdNextAfter) +FUNC_DDD(FUNC_COPYSIGN_DDD, "copysign_ddd", copysign, vdCopySign) FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DDD #undef ELIDE_FUNC_DDD diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index df53528..5b2e1ae 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -221,28 +221,35 @@ FuncBFPtr functions_bf[] = { #endif #ifdef USE_VML -/* no isnan, isfinite or isinf in VML */ -static void vfIsfinite(MKL_INT n, const float* x1, bool* dest) +/* no isnan, isfinite, isinf or signbit in VML */ +static void vsIsfinite(MKL_INT n, const float* x1, bool* dest) { MKL_INT j; for (j=0; j - +//no single precision version of signbit in C++ standard +inline bool signbitf(float x) { return signbit((double)x); } #ifdef _WIN32 #ifndef __MINGW32__ #include "missing_posix_functions.hpp" diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 99eb014..1c4ae2f 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -24,11 +24,11 @@ from numpy import all as alltrue from numpy import (allclose, arange, arccos, arccosh, arcsin, arcsinh, arctan, arctan2, arctanh, array, array_equal, cdouble, ceil, conj, - copy, cos, cosh, empty, exp, expm1, float64, floor, fmod, - hypot, int32, int64, isinf, isnan, linspace, log, log1p, - log2, log10, nextafter, ones_like, prod, ravel, rec, round, - shape, sin, sinh, sqrt, sum, tan, tanh, trunc, uint16, - where, zeros) + copy, copysign, cos, cosh, empty, exp, expm1, float64, + floor, fmod, hypot, int32, int64, isfinite, isinf, isnan, + linspace, log, log1p, log2, log10, nextafter, ones_like, + prod, ravel, rec, round, shape, signbit, sin, sinh, sqrt, + sum, tan, tanh, trunc, uint16, where, zeros) from numpy.testing import (assert_allclose, assert_array_almost_equal, assert_array_equal, assert_equal) @@ -728,20 +728,26 @@ def test_bool_funcs(self): a = np.arange(2 * array_size, dtype=dtype) a[array_size//2] = np.nan a[array_size//3] = np.inf + a[array_size//4] = -2 + + assert_equal(evaluate("isnan(a)"), isnan(a)) + assert_equal(evaluate("isfinite(a)"), isfinite(a)) + assert_equal(evaluate("isinf(a)"), isinf(a)) + assert_equal(evaluate("signbit(a)"), signbit(a)) - assert np.all(evaluate("isnan(a)") == np.isnan(a)) - assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) - assert np.all(evaluate("isinf(a)") == np.isinf(a)) a = a.astype(np.float64) assert a.dtype == np.float64 - assert np.all(evaluate("isnan(a)") == np.isnan(a)) - assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) - assert np.all(evaluate("isinf(a)") == np.isinf(a)) + assert_equal(evaluate("isnan(a)"), isnan(a)) + assert_equal(evaluate("isfinite(a)"), isfinite(a)) + assert_equal(evaluate("isinf(a)"), isinf(a)) + assert_equal(evaluate("signbit(a)"), signbit(a)) + a = a.astype(np.complex128) assert a.dtype == np.complex128 assert np.all(evaluate("isnan(a)") == np.isnan(a)) assert np.all(evaluate("isfinite(a)") == np.isfinite(a)) assert np.all(evaluate("isinf(a)") == np.isinf(a)) + # signbit not defined for complex numbers if 'sparc' not in platform.machine(): # Execution order set here so as to not use too many threads @@ -814,7 +820,7 @@ def test_changing_nthreads_01_dec(self): tests.append(('1_ARG_FUNCS', func1tests)) func2tests = [] -for func in ['arctan2', 'fmod', 'hypot', 'nextafter']: +for func in ['arctan2', 'fmod', 'hypot', 'nextafter', 'copysign']: func2tests.append("a + %s(b+c, d+1)" % func) func2tests.append("a + %s(b+c, 1)" % func) func2tests.append("a + %s(1, d+1)" % func) From 03ead57dedb8a4293ceb28babd6039332f15b1bd Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 08:48:48 +0200 Subject: [PATCH 097/166] Correct output type of signbitf --- numexpr/msvc_function_stubs.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 7b2fb2b..a54e6b0 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -203,7 +203,7 @@ inline float truncf2(float x) { return truncf(x); } -inline float signbitf2(float x) { +inline bool signbitf2(float x) { return signbitf(x); } From 38907d7475ae1a22fb522fa73c429e2375070524 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 13:00:28 +0200 Subject: [PATCH 098/166] Add sign, maximum, minimum --- ADDFUNCS.rst | 4 +- numexpr/bespoke_functions.hpp | 244 ++++++++++++++++++++++++++++++++ numexpr/complex_functions.hpp | 21 +++ numexpr/expressions.py | 4 + numexpr/functions.hpp | 34 ++++- numexpr/interp_body.cpp | 15 ++ numexpr/interpreter.cpp | 224 ++++++----------------------- numexpr/interpreter.hpp | 12 ++ numexpr/module.cpp | 4 + numexpr/msvc_function_stubs.hpp | 17 ++- numexpr/necompiler.py | 5 +- numexpr/opcodes.hpp | 55 +++---- numexpr/tests/test_numexpr.py | 23 ++- 13 files changed, 448 insertions(+), 214 deletions(-) create mode 100644 numexpr/bespoke_functions.hpp diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index 2a8d17f..fffe45a 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -171,7 +171,7 @@ Add clauses to generate the FUNC_CODES from the ``functions.hpp`` header, making }; #endif -Some functions (e.g. ``fmod``, ``isnan``) are not available in MKL, and so must be hard-coded here as well: +Some functions (e.g. ``fmod``, ``isnan``) are not available in MKL, and so must be hard-coded in ``bespoke_functions.hpp`` as well: .. code-block:: cpp @@ -186,7 +186,7 @@ Some functions (e.g. ``fmod``, ``isnan``) are not available in MKL, and so must }; #endif -The complex case is slightlñy different (see other examples in the same file). +The complex case is slightly different (see other examples in the same file). Add case handling to the ``check_program`` function diff --git a/numexpr/bespoke_functions.hpp b/numexpr/bespoke_functions.hpp new file mode 100644 index 0000000..76e6d35 --- /dev/null +++ b/numexpr/bespoke_functions.hpp @@ -0,0 +1,244 @@ +#include +#include +#include +#include +#include +#include "numexpr_config.hpp" // isnan definitions + +// Generic sign function +inline int signi(int x) {return (0 < x) - (x < 0);} +inline long signl(long x) {return (0 < x) - (x < 0);} +inline double sign(double x){ + // Floats: -1.0, 0.0, +1.0, NaN stays NaN + if (isnand(x)) {return NAN;} + if (x > 0) {return 1;} + if (x < 0) {return -1;} + return 0; // handles +0.0 and -0.0 + } +inline float signf(float x){ + // Floats: -1.0, 0.0, +1.0, NaN stays NaN + if (isnanf_(x)) {return NAN;} + if (x > 0) {return 1;} + if (x < 0) {return -1;} + return 0; // handles +0.0 and -0.0 + } + + +#ifdef USE_VML +/* Fake vsConj function just for casting purposes inside numexpr */ +static void vsConj(MKL_INT n, const float* x1, float* dest) +{ + MKL_INT j; + for (j=0; j +#include // NAN #include /* constants */ @@ -471,4 +472,24 @@ nc_isfinite(std::complex *x) br = isfinited(xr); return bi && br; } + +static void +nc_sign(std::complex *x, std::complex *r) +{ + if (nc_isnan(x)){ + r->real(NAN); + r->imag(NAN); + } + std::complex mag; + nc_abs(x, &mag); + if (mag.real() == 0){ + r->real(0); + r->imag(0); + } + else{ + r->real(x->real()/mag.real()); + r->imag(x->imag()/mag.real()); + } +} + #endif // NUMEXPR_COMPLEX_FUNCTIONS_HPP diff --git a/numexpr/expressions.py b/numexpr/expressions.py index 3d7722a..03db3c0 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -351,6 +351,9 @@ def multiply(x, y): 'hypot': func(numpy.hypot, 'double'), 'nextafter': func(numpy.nextafter, 'double'), 'copysign': func(numpy.copysign, 'double'), + 'maximum': func(numpy.maximum, 'double'), + 'minimum': func(numpy.minimum, 'double'), + 'log': func(numpy.log, 'float'), 'log1p': func(numpy.log1p, 'float'), @@ -364,6 +367,7 @@ def multiply(x, y): 'floor': func(numpy.floor, 'float', 'double'), 'round': func(numpy.round, 'double'), 'trunc': func(numpy.trunc, 'double'), + 'sign': func(numpy.sign, 'double'), 'where': where_func, diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index 700e88c..448b43e 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -38,6 +38,7 @@ FUNC_FF(FUNC_CONJ_FF, "conjugate_ff",fconjf, fconjf2, vsConj) FUNC_FF(FUNC_CEIL_FF, "ceil_ff", ceilf, ceilf2, vsCeil) FUNC_FF(FUNC_FLOOR_FF, "floor_ff", floorf, floorf2, vsFloor) FUNC_FF(FUNC_TRUNC_FF, "trunc_ff", truncf, truncf2, vsTrunc) +FUNC_FF(FUNC_SIGN_FF, "sign_ff", signf, signf2, vsSign) //rint rounds to nearest even integer, matching NumPy (round doesn't) FUNC_FF(FUNC_ROUND_FF, "round_ff", rintf, rintf2, vsRint) FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) @@ -55,6 +56,8 @@ FUNC_FFF(FUNC_ARCTAN2_FFF, "arctan2_fff", atan2f, atan2f2, vsAtan2) FUNC_FFF(FUNC_HYPOT_FFF, "hypot_fff", hypotf, hypotf2, vsHypot) FUNC_FFF(FUNC_NEXTAFTER_FFF, "nextafter_fff", nextafterf, nextafterf2, vsNextAfter) FUNC_FFF(FUNC_COPYSIGN_FFF, "copysign_fff", copysignf, copysignf2, vsCopySign) +FUNC_FFF(FUNC_MAXIMUM_FFF, "maximum_fff", fmaxf, fmaxf2, vsFmax) +FUNC_FFF(FUNC_MINIMUM_FFF, "minimum_fff", fminf, fminf2, vsFmin) FUNC_FFF(FUNC_FFF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FFF #undef ELIDE_FUNC_FFF @@ -81,7 +84,7 @@ FUNC_DD(FUNC_ARCTANH_DD, "arctanh_dd", atanh, vdAtanh) FUNC_DD(FUNC_LOG_DD, "log_dd", log, vdLn) FUNC_DD(FUNC_LOG1P_DD, "log1p_dd", log1p, vdLog1p) FUNC_DD(FUNC_LOG10_DD, "log10_dd", log10, vdLog10) -FUNC_DD(FUNC_LOG2_DD, "log2_dd", log2, vdLog2) +FUNC_DD(FUNC_LOG2_DD, "log2_dd", log2, vdLog2) FUNC_DD(FUNC_EXP_DD, "exp_dd", exp, vdExp) FUNC_DD(FUNC_EXPM1_DD, "expm1_dd", expm1, vdExpm1) FUNC_DD(FUNC_ABS_DD, "absolute_dd", fabs, vdAbs) @@ -89,7 +92,8 @@ FUNC_DD(FUNC_CONJ_DD, "conjugate_dd",fconj, vdConj) FUNC_DD(FUNC_CEIL_DD, "ceil_dd", ceil, vdCeil) FUNC_DD(FUNC_FLOOR_DD, "floor_dd", floor, vdFloor) FUNC_DD(FUNC_TRUNC_DD, "trunc_dd", trunc, vdTrunc) - //rint rounds to nearest even integer, matching NumPy (round doesn't) +FUNC_DD(FUNC_SIGN_DD, "sign_dd", sign, vdSign) +//rint rounds to nearest even integer, matching NumPy (round doesn't) FUNC_DD(FUNC_ROUND_DD, "round_dd", rint, vdRint) FUNC_DD(FUNC_DD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DD @@ -136,6 +140,8 @@ FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) FUNC_DDD(FUNC_HYPOT_DDD, "hypot_ddd", hypot, vdHypot) FUNC_DDD(FUNC_NEXTAFTER_DDD, "nextafter_ddd", nextafter, vdNextAfter) FUNC_DDD(FUNC_COPYSIGN_DDD, "copysign_ddd", copysign, vdCopySign) +FUNC_DDD(FUNC_MAXIMUM_DDD, "maximum_ddd", fmax, vdFmax) +FUNC_DDD(FUNC_MINIMUM_DDD, "minimum_ddd", fmin, vdFmin) FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DDD #undef ELIDE_FUNC_DDD @@ -167,6 +173,7 @@ FUNC_CC(FUNC_EXP_CC, "exp_cc", nc_exp, vzExp) FUNC_CC(FUNC_EXPM1_CC, "expm1_cc", nc_expm1, vzExpm1) FUNC_CC(FUNC_ABS_CC, "absolute_cc", nc_abs, vzAbs_) FUNC_CC(FUNC_CONJ_CC, "conjugate_cc",nc_conj, vzConj) +FUNC_CC(FUNC_SIGN_CC, "sign_cc", nc_sign, vzSign) // rint rounds to nearest even integer, matches NumPy behaviour (round doesn't) FUNC_CC(FUNC_ROUND_CC, "round_cc", nc_rint, vzRint) FUNC_CC(FUNC_CC_LAST, NULL, NULL, NULL) @@ -199,3 +206,26 @@ FUNC_BC(FUNC_BC_LAST, NULL, NULL, NULL) #undef ELIDE_FUNC_BC #undef FUNC_BC #endif + +// int -> int functions +#ifndef FUNC_II +#define ELIDE_FUNC_II +#define FUNC_II(...) +#endif +FUNC_II(FUNC_SIGN_II, "sign_ii", signi, viSign) +FUNC_II(FUNC_II_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_II +#undef ELIDE_FUNC_II +#undef FUNC_II +#endif + +#ifndef FUNC_LL +#define ELIDE_FUNC_LL +#define FUNC_LL(...) +#endif +FUNC_LL(FUNC_SIGN_LL, "sign_LL", signl, vlSign) +FUNC_LL(FUNC_LL_LAST, NULL, NULL, NULL) +#ifdef ELIDE_FUNC_LL +#undef ELIDE_FUNC_LL +#undef FUNC_LL +#endif diff --git a/numexpr/interp_body.cpp b/numexpr/interp_body.cpp index e82e47b..743f8ab 100644 --- a/numexpr/interp_body.cpp +++ b/numexpr/interp_body.cpp @@ -494,6 +494,21 @@ b_dest = functions_bc[arg2](&ca)); #endif + /* Integer return types */ + case OP_FUNC_IIN: +#ifdef USE_VML + VEC_ARG1_VML(functions_ii_vml[arg2](BLOCK_SIZE, + (int*)x1, (int*)dest)); +#else + VEC_ARG1(i_dest = functions_ii[arg2](i1)); +#endif + case OP_FUNC_LLN: +#ifdef USE_VML + VEC_ARG1_VML(functions_ll_vml[arg2](BLOCK_SIZE, + (long*)x1, (long*)dest)); +#else + VEC_ARG1(l_dest = functions_ll[arg2](l1)); +#endif /* Reductions */ case OP_SUM_IIN: VEC_ARG1(i_reduce += i1); diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index 5b2e1ae..e31f4f6 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -18,6 +18,7 @@ #include "complex_functions.hpp" #include "interpreter.hpp" #include "numexpr_object.hpp" +#include "bespoke_functions.hpp" #ifdef _MSC_VER /* Some missing symbols and functions for Win */ @@ -142,17 +143,6 @@ FuncFFPtr functions_ff[] = { }; #endif -#ifdef USE_VML -/* Fake vsConj function just for casting purposes inside numexpr */ -static void vsConj(MKL_INT n, const float* x1, float* dest) -{ - MKL_INT j; - for (j=0; j= FUNC_II_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } + } + else if (op == OP_FUNC_LLN) { + if (arg < 0 || arg >= FUNC_LL_LAST) { + PyErr_Format(PyExc_RuntimeError, "invalid program: funccode out of range (%i) at %i", arg, argloc); + return -1; + } } else if (op >= OP_REDUCTION) { ; diff --git a/numexpr/interpreter.hpp b/numexpr/interpreter.hpp index 55f210c..3ec09bb 100644 --- a/numexpr/interpreter.hpp +++ b/numexpr/interpreter.hpp @@ -48,6 +48,18 @@ enum FuncBCCodes { #undef FUNC_BC }; +enum FuncIICodes { +#define FUNC_II(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_II +}; + +enum FuncLLCodes { +#define FUNC_LL(fop, ...) fop, +#include "functions.hpp" +#undef FUNC_LL +}; + enum FuncDDDCodes { #define FUNC_DDD(fop, ...) fop, #include "functions.hpp" diff --git a/numexpr/module.cpp b/numexpr/module.cpp index 649aa17..67629bd 100644 --- a/numexpr/module.cpp +++ b/numexpr/module.cpp @@ -512,7 +512,11 @@ PyInit_interpreter(void) { #define FUNC_DDD(name, sname, ...) add_func(name, sname); #define FUNC_CC(name, sname, ...) add_func(name, sname); #define FUNC_CCC(name, sname, ...) add_func(name, sname); +#define FUNC_II(name, sname, ...) add_func(name, sname); +#define FUNC_LL(name, sname, ...) add_func(name, sname); #include "functions.hpp" +#undef FUNC_LL +#undef FUNC_II #undef FUNC_CCC #undef FUNC_CC #undef FUNC_DDD diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index a54e6b0..c41e874 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -43,11 +43,12 @@ #define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) #define hypotf(x, y) ((float)hypot((double)(x), (double)(y))) #define copysignf(x, y) ((float)copysign((double)(x), (double)(y))) +#define nextafterf(x, y) ((float)nextafter((double)(x), (double)(y))) #define ceilf(x) ((float)ceil((double)(x))) #define hypotf(x) ((float)hypot((double)(x))) #define rintf(x) ((float)rint((double)(x))) #define truncf(x) ((float)trunc((double)(x))) -#define nextafterf(x) ((float)nextafter((double)(x))) +#define fmaxf(x) ((float)fmax((double)(x))) /* The next are directly called from interp_body.cpp */ #define powf(x, y) ((float)pow((double)(x), (double)(y))) @@ -166,6 +167,14 @@ inline float copysignf2(float x, float y) { return copysignf(x, y); } +inline float fmaxf2(float x, float y) { + return fmaxf(x, y); +} + +inline float fminf2(float x, float y) { + return fminf(x, y); +} + // Boolean output functions inline bool isnanf2(float x) { @@ -207,4 +216,10 @@ inline bool signbitf2(float x) { return signbitf(x); } +inline float signf2(float x) { + return signf(x); +} + + + #endif // NUMEXPR_MSVC_FUNCTION_STUBS_HPP diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 04ce165..8b80737 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -79,7 +79,10 @@ "trunc", "nextafter", "copysign", - "signbit" + "signbit", + "sign", + "minimum", + "maximum", ] diff --git a/numexpr/opcodes.hpp b/numexpr/opcodes.hpp index 703525b..5b1c46f 100644 --- a/numexpr/opcodes.hpp +++ b/numexpr/opcodes.hpp @@ -169,38 +169,41 @@ OPCODE(118, OP_CONTAINS_BSS, "contains_bss", Tb, Ts, Ts, T0) OPCODE(119, OP_FUNC_BDN, "func_bdn", Tb, Td, Tn, T0) OPCODE(120, OP_FUNC_BFN, "func_bfn", Tb, Tf, Tn, T0) OPCODE(121, OP_FUNC_BCN, "func_bcn", Tb, Tc, Tn, T0) +//Integer funcs +OPCODE(122, OP_FUNC_IIN, "func_iin", Ti, Ti, Tn, T0) +OPCODE(123, OP_FUNC_LLN, "func_lln", Tl, Tl, Tn, T0) // Reductions always have to be at the end - parts of the code // use > OP_REDUCTION to decide whether operation is a reduction -OPCODE(122, OP_REDUCTION, NULL, T0, T0, T0, T0) +OPCODE(124, OP_REDUCTION, NULL, T0, T0, T0, T0) /* Last argument in a reduction is the axis of the array the reduction should be applied along. */ -OPCODE(123, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) -OPCODE(124, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) -OPCODE(125, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) -OPCODE(126, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) -OPCODE(127, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) - -OPCODE(128, OP_PROD, NULL, T0, T0, T0, T0) -OPCODE(129, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) -OPCODE(130, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) -OPCODE(131, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) -OPCODE(132, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) -OPCODE(133, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) - -OPCODE(134, OP_MIN, NULL, T0, T0, T0, T0) -OPCODE(135, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) -OPCODE(136, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) -OPCODE(137, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) -OPCODE(138, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) - -OPCODE(139, OP_MAX, NULL, T0, T0, T0, T0) -OPCODE(140, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) -OPCODE(141, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) -OPCODE(142, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) -OPCODE(143, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) +OPCODE(125, OP_SUM_IIN, "sum_iin", Ti, Ti, Tn, T0) +OPCODE(126, OP_SUM_LLN, "sum_lln", Tl, Tl, Tn, T0) +OPCODE(127, OP_SUM_FFN, "sum_ffn", Tf, Tf, Tn, T0) +OPCODE(128, OP_SUM_DDN, "sum_ddn", Td, Td, Tn, T0) +OPCODE(129, OP_SUM_CCN, "sum_ccn", Tc, Tc, Tn, T0) + +OPCODE(130, OP_PROD, NULL, T0, T0, T0, T0) +OPCODE(131, OP_PROD_IIN, "prod_iin", Ti, Ti, Tn, T0) +OPCODE(132, OP_PROD_LLN, "prod_lln", Tl, Tl, Tn, T0) +OPCODE(133, OP_PROD_FFN, "prod_ffn", Tf, Tf, Tn, T0) +OPCODE(134, OP_PROD_DDN, "prod_ddn", Td, Td, Tn, T0) +OPCODE(135, OP_PROD_CCN, "prod_ccn", Tc, Tc, Tn, T0) + +OPCODE(136, OP_MIN, NULL, T0, T0, T0, T0) +OPCODE(137, OP_MIN_IIN, "min_iin", Ti, Ti, Tn, T0) +OPCODE(138, OP_MIN_LLN, "min_lln", Tl, Tl, Tn, T0) +OPCODE(139, OP_MIN_FFN, "min_ffn", Tf, Tf, Tn, T0) +OPCODE(140, OP_MIN_DDN, "min_ddn", Td, Td, Tn, T0) + +OPCODE(141, OP_MAX, NULL, T0, T0, T0, T0) +OPCODE(142, OP_MAX_IIN, "max_iin", Ti, Ti, Tn, T0) +OPCODE(143, OP_MAX_LLN, "max_lln", Tl, Tl, Tn, T0) +OPCODE(144, OP_MAX_FFN, "max_ffn", Tf, Tf, Tn, T0) +OPCODE(145, OP_MAX_DDN, "max_ddn", Td, Td, Tn, T0) /* When we get to 255, will maybe have to change code again @@ -208,4 +211,4 @@ When we get to 255, will maybe have to change code again other than unsigned char for OPCODE table) */ /* Should be the last opcode */ -OPCODE(144, OP_END, NULL, T0, T0, T0, T0) +OPCODE(146, OP_END, NULL, T0, T0, T0, T0) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 1c4ae2f..498408f 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -26,9 +26,10 @@ arctan2, arctanh, array, array_equal, cdouble, ceil, conj, copy, copysign, cos, cosh, empty, exp, expm1, float64, floor, fmod, hypot, int32, int64, isfinite, isinf, isnan, - linspace, log, log1p, log2, log10, nextafter, ones_like, - prod, ravel, rec, round, shape, signbit, sin, sinh, sqrt, - sum, tan, tanh, trunc, uint16, where, zeros) + linspace, log, log1p, log2, log10, maximum, minimum, + nextafter, ones_like, prod, ravel, rec, round, shape, sign, + signbit, sin, sinh, sqrt, sum, tan, tanh, trunc, uint16, + where, zeros) from numpy.testing import (assert_allclose, assert_array_almost_equal, assert_array_equal, assert_equal) @@ -479,6 +480,20 @@ def test_bitwise_operators(self): assert_array_equal(evaluate("x | y"), x | y) # or assert_array_equal(evaluate("~x"), ~x) # invert + def test_maximum_minimum(self): + for dtype in [float, double, int, np.int64]: + x = arange(10, dtype=dtype) + y = 2 * arange(10, dtype=dtype)[::-1] + assert_array_equal(evaluate("maximum(x,y)"), maximum(x,y)) + assert_array_equal(evaluate("minimum(x,y)"), minimum(x,y)) + + def test_sign(self): + for dtype in [float, double, int, np.int64, complex]: + x = arange(10, dtype=dtype) + y = 2 * arange(10, dtype=dtype)[::-1] + r = x-y + r[-1] = np.nan if not np.issubdtype(dtype, int) else -2 + assert_array_equal(evaluate("sign(r)"), sign(r)) def test_rational_expr(self): a = arange(1e6) @@ -815,7 +830,7 @@ def test_changing_nthreads_01_dec(self): 'sin', 'cos', 'tan', 'arcsin', 'arccos', 'arctan', 'sinh', 'cosh', 'tanh', 'arcsinh', 'arccosh', 'arctanh', 'log', 'log1p', 'log10', "log2", 'exp', 'expm1', 'abs', 'conj', - 'ceil', 'floor', 'round', 'trunc']: + 'ceil', 'floor', 'round', 'trunc', 'sign']: func1tests.append("a + %s(b+c)" % func) tests.append(('1_ARG_FUNCS', func1tests)) From fbe2dcf01635a3b9a799f42257c169010f809415 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 13:08:30 +0200 Subject: [PATCH 099/166] Reorder imports for Windows --- numexpr/msvc_function_stubs.hpp | 3 --- numexpr/numexpr_config.hpp | 7 ++++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index c41e874..1cca937 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -216,9 +216,6 @@ inline bool signbitf2(float x) { return signbitf(x); } -inline float signf2(float x) { - return signf(x); -} diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 152d88c..b32d198 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -60,5 +60,10 @@ inline bool isnand(double x) { return !!std::isnan(x); } inline bool isinff_(float x) { return !!std::isinf(x); } inline bool isinfd(double x) { return !!std::isinf(x); } #endif - +#include "bespoke_functions.hpp" +#ifdef _WIN32 //need signf from bespoke_functions +inline float signf2(float x) { + return signf(x); +} + #endif #endif // NUMEXPR_CONFIG_HPP From 2c6b79627c33ee33ce98e8e95e2553b83c87181e Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 13:14:49 +0200 Subject: [PATCH 100/166] Delay definition of signf2 --- numexpr/interpreter.cpp | 3 +++ numexpr/msvc_function_stubs.hpp | 3 --- numexpr/numexpr_config.hpp | 7 +------ 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/numexpr/interpreter.cpp b/numexpr/interpreter.cpp index e31f4f6..409ad3d 100644 --- a/numexpr/interpreter.cpp +++ b/numexpr/interpreter.cpp @@ -130,6 +130,9 @@ op_signature(int op, unsigned int n) { typedef float (*FuncFFPtr)(float); #ifdef _WIN32 +inline float signf2(float x) { // needed to wait for bespoke_functions to be loaded + return signf(x); +} FuncFFPtr functions_ff[] = { #define FUNC_FF(fop, s, f, f_win32, ...) f_win32, #include "functions.hpp" diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 1cca937..2671edc 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -216,7 +216,4 @@ inline bool signbitf2(float x) { return signbitf(x); } - - - #endif // NUMEXPR_MSVC_FUNCTION_STUBS_HPP diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index b32d198..152d88c 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -60,10 +60,5 @@ inline bool isnand(double x) { return !!std::isnan(x); } inline bool isinff_(float x) { return !!std::isinf(x); } inline bool isinfd(double x) { return !!std::isinf(x); } #endif -#include "bespoke_functions.hpp" -#ifdef _WIN32 //need signf from bespoke_functions -inline float signf2(float x) { - return signf(x); -} - #endif + #endif // NUMEXPR_CONFIG_HPP From 7e1585bdfb3137ea82ecd81308f1314d42d5c2c6 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 14:29:20 +0200 Subject: [PATCH 101/166] Fix typo with maxf/minf funcs for Windows --- numexpr/msvc_function_stubs.hpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 2671edc..3f41666 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -44,11 +44,13 @@ #define hypotf(x, y) ((float)hypot((double)(x), (double)(y))) #define copysignf(x, y) ((float)copysign((double)(x), (double)(y))) #define nextafterf(x, y) ((float)nextafter((double)(x), (double)(y))) +#define fmaxf(x, y) ((float)fmax((double)(x), (double)(y))) +#define fminf(x, y) ((float)fmax((double)(x), (double)(y))) #define ceilf(x) ((float)ceil((double)(x))) #define hypotf(x) ((float)hypot((double)(x))) #define rintf(x) ((float)rint((double)(x))) #define truncf(x) ((float)trunc((double)(x))) -#define fmaxf(x) ((float)fmax((double)(x))) + /* The next are directly called from interp_body.cpp */ #define powf(x, y) ((float)pow((double)(x), (double)(y))) From 3538e4adad5d69be017361237a75827760466fe9 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 14:58:17 +0200 Subject: [PATCH 102/166] Handle overloading of fmax/fmin --- ADDFUNCS.rst | 2 +- numexpr/functions.hpp | 4 ++-- numexpr/msvc_function_stubs.hpp | 4 ++-- numexpr/numexpr_config.hpp | 3 +++ 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/ADDFUNCS.rst b/ADDFUNCS.rst index fffe45a..497c6fe 100644 --- a/ADDFUNCS.rst +++ b/ADDFUNCS.rst @@ -54,7 +54,7 @@ Example: #define FUNC_FF(...) #endif ... - FUNC_FF(FUNC_MYFUNC_FF, "myfunc_ff", myfuncf, myfuncf2, vfMyfunc) + FUNC_FF(FUNC_MYFUNC_FF, "myfunc_ff", myfuncf, myfuncf2, vsMyfunc) FUNC_FF(FUNC_FF_LAST, NULL, NULL, NULL, NULL) #ifdef ELIDE_FUNC_FF #undef ELIDE_FUNC_FF diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index 448b43e..a364a44 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -140,8 +140,8 @@ FUNC_DDD(FUNC_ARCTAN2_DDD, "arctan2_ddd", atan2, vdAtan2) FUNC_DDD(FUNC_HYPOT_DDD, "hypot_ddd", hypot, vdHypot) FUNC_DDD(FUNC_NEXTAFTER_DDD, "nextafter_ddd", nextafter, vdNextAfter) FUNC_DDD(FUNC_COPYSIGN_DDD, "copysign_ddd", copysign, vdCopySign) -FUNC_DDD(FUNC_MAXIMUM_DDD, "maximum_ddd", fmax, vdFmax) -FUNC_DDD(FUNC_MINIMUM_DDD, "minimum_ddd", fmin, vdFmin) +FUNC_DDD(FUNC_MAXIMUM_DDD, "maximum_ddd", fmaxd, vdFmax) +FUNC_DDD(FUNC_MINIMUM_DDD, "minimum_ddd", fmind, vdFmin) FUNC_DDD(FUNC_DDD_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_DDD #undef ELIDE_FUNC_DDD diff --git a/numexpr/msvc_function_stubs.hpp b/numexpr/msvc_function_stubs.hpp index 3f41666..6fac288 100644 --- a/numexpr/msvc_function_stubs.hpp +++ b/numexpr/msvc_function_stubs.hpp @@ -44,8 +44,8 @@ #define hypotf(x, y) ((float)hypot((double)(x), (double)(y))) #define copysignf(x, y) ((float)copysign((double)(x), (double)(y))) #define nextafterf(x, y) ((float)nextafter((double)(x), (double)(y))) -#define fmaxf(x, y) ((float)fmax((double)(x), (double)(y))) -#define fminf(x, y) ((float)fmax((double)(x), (double)(y))) +#define fmaxf(x, y) ((float)fmaxd((double)(x), (double)(y))) +#define fminf(x, y) ((float)fmind((double)(x), (double)(y))) #define ceilf(x) ((float)ceil((double)(x))) #define hypotf(x) ((float)hypot((double)(x))) #define rintf(x) ((float)rint((double)(x))) diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 152d88c..4ed64ab 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -43,6 +43,9 @@ #include //no single precision version of signbit in C++ standard inline bool signbitf(float x) { return signbit((double)x); } +// To handle overloading of fmax/fmin in cmath +inline double fmaxd(double x, double y) { return fmax(x, y); } +inline double fmind(double x, double y) { return fmin(x, y); } #ifdef _WIN32 #ifndef __MINGW32__ #include "missing_posix_functions.hpp" From 544c8d017c3377e3afaa64bdffc1294d2b23c345 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 15:41:41 +0200 Subject: [PATCH 103/166] Fix test for stricter Windows --- numexpr/tests/test_numexpr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 498408f..8e5cd3d 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -492,7 +492,8 @@ def test_sign(self): x = arange(10, dtype=dtype) y = 2 * arange(10, dtype=dtype)[::-1] r = x-y - r[-1] = np.nan if not np.issubdtype(dtype, int) else -2 + if not np.issubdtype(dtype, int): + r[-1] = np.nan assert_array_equal(evaluate("sign(r)"), sign(r)) def test_rational_expr(self): From 4eab0e0c0acf0cb2115ad4bc43052f5c3783f817 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 23 Sep 2025 15:50:56 +0200 Subject: [PATCH 104/166] Fix test --- numexpr/tests/test_numexpr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 8e5cd3d..62e6215 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -492,7 +492,7 @@ def test_sign(self): x = arange(10, dtype=dtype) y = 2 * arange(10, dtype=dtype)[::-1] r = x-y - if not np.issubdtype(dtype, int): + if not np.issubdtype(dtype, np.integer): r[-1] = np.nan assert_array_equal(evaluate("sign(r)"), sign(r)) From c9775f2dde64c8fe3dd1b06da889e301325a3816 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 24 Sep 2025 11:07:43 +0200 Subject: [PATCH 105/166] Improve casting for ints to match Numpy and add docs --- doc/user_guide.rst | 19 +++++++--- numexpr/bespoke_functions.hpp | 68 +++++++++++++++++++++++++++++------ numexpr/expressions.py | 9 +++-- numexpr/functions.hpp | 8 +++-- numexpr/tests/test_numexpr.py | 7 ++-- 5 files changed, 88 insertions(+), 23 deletions(-) diff --git a/doc/user_guide.rst b/doc/user_guide.rst index ade4e46..75dc4de 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -188,10 +188,10 @@ Supported operators *NumExpr* supports the set of operators listed below: - * Bitwise operators (and, or, not, xor): :code:`&, |, ~, ^` + * Bitwise and logical operators (and, or, not, xor): :code:`&, |, ~, ^` * Comparison operators: :code:`<, <=, ==, !=, >=, >` * Unary arithmetic operators: :code:`-` - * Binary arithmetic operators: :code:`+, -, *, /, **, %, <<, >>` + * Binary arithmetic operators: :code:`+, -, *, /, //, **, %, <<, >>` Supported functions @@ -203,22 +203,33 @@ The next are the current supported set: is true, number2 otherwise. * :code:`{isinf, isnan, isfinite}(float|complex): bool` -- returns element-wise True for ``inf`` or ``NaN``, ``NaN``, not ``inf`` respectively. + * :code:`signbit(float|complex): bool` -- returns element-wise True if signbit is set + False otherwise. * :code:`{sin,cos,tan}(float|complex): float|complex` -- trigonometric sine, cosine or tangent. * :code:`{arcsin,arccos,arctan}(float|complex): float|complex` -- trigonometric inverse sine, cosine or tangent. * :code:`arctan2(float1, float2): float` -- trigonometric inverse tangent of float1/float2. + * :code:`hypot(float1, float2): float` -- Euclidean distance between float1, float2 + * :code:`nextafter(float1, float2): float` -- next representable floating-point value after + float1 in direction of float2 + * :code:`copysign(float1, float2): float` -- return number with magnitude of float1 and + sign of float2 + * :code:`{maximum,minimum}(float1, float2): float` -- return max/min of float1, float2 * :code:`{sinh,cosh,tanh}(float|complex): float|complex` -- hyperbolic sine, cosine or tangent. * :code:`{arcsinh,arccosh,arctanh}(float|complex): float|complex` -- hyperbolic inverse sine, cosine or tangent. - * :code:`{log,log10,log1p}(float|complex): float|complex` -- natural, base-10 and + * :code:`{log,log10,log1p,log2}(float|complex): float|complex` -- natural, base-10 and log(1+x) logarithms. * :code:`{exp,expm1}(float|complex): float|complex` -- exponential and exponential minus one. * :code:`sqrt(float|complex): float|complex` -- square root. - * :code:`abs(float|complex): float|complex` -- absolute value. + * :code:`trunc(float): float` -- round towards zero + * :code:`round(float|complex|int): float|complex|int` -- round to nearest integer (`rint`) + * :code:`sign(float|complex|int): float|complex|int` -- return -1, 0, +1 depending on sign + * :code:`abs(float|complex|int): float|complex|int` -- absolute value. * :code:`conj(complex): complex` -- conjugate value. * :code:`{real,imag}(complex): float` -- real or imaginary part of complex. * :code:`complex(float, float): complex` -- complex from real and imaginary diff --git a/numexpr/bespoke_functions.hpp b/numexpr/bespoke_functions.hpp index 76e6d35..782f358 100644 --- a/numexpr/bespoke_functions.hpp +++ b/numexpr/bespoke_functions.hpp @@ -23,8 +23,43 @@ inline float signf(float x){ return 0; // handles +0.0 and -0.0 } +// round function for ints +inline int rinti(int x) {return x;} +inline long rintl(long x) {return x;} +// abs function for ints +inline int fabsi(int x) {return x<0 ? -x: x;} +inline long fabsl(long x) {return x<0 ? -x: x;} +// fmod function for ints +inline int fmodi(int x, int y) {return (int)fmodf((float)x, (float)y);} +inline long fmodl(long x, long y) {return (long)fmodf((long)x, (long)y);} #ifdef USE_VML +static void viRint(MKL_INT n, const int* x, int* dest) +{ + memcpy(dest, x, n * sizeof(int)); // just copy x1 which is already int +}; + +static void vlRint(MKL_INT n, const long* x, long* dest) +{ + memcpy(dest, x, n * sizeof(long)); // just copy x1 which is already int +}; + +static void viFabs(MKL_INT n, const int* x, int* dest) +{ + MKL_INT j; + for (j=0; j kind_rank.index(kind): diff --git a/numexpr/functions.hpp b/numexpr/functions.hpp index a364a44..109b8f3 100644 --- a/numexpr/functions.hpp +++ b/numexpr/functions.hpp @@ -212,7 +212,9 @@ FUNC_BC(FUNC_BC_LAST, NULL, NULL, NULL) #define ELIDE_FUNC_II #define FUNC_II(...) #endif -FUNC_II(FUNC_SIGN_II, "sign_ii", signi, viSign) +FUNC_II(FUNC_SIGN_II, "sign_ii", signi, viSign) +FUNC_II(FUNC_ROUND_II, "round_ii", rinti, viRint) +FUNC_II(FUNC_ABS_II, "absolute_ii", fabsi, viFabs) FUNC_II(FUNC_II_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_II #undef ELIDE_FUNC_II @@ -223,7 +225,9 @@ FUNC_II(FUNC_II_LAST, NULL, NULL, NULL) #define ELIDE_FUNC_LL #define FUNC_LL(...) #endif -FUNC_LL(FUNC_SIGN_LL, "sign_LL", signl, vlSign) +FUNC_LL(FUNC_SIGN_LL, "sign_ll", signl, vlSign) +FUNC_LL(FUNC_ROUND_LL, "round_ll", rintl, vlRint) +FUNC_LL(FUNC_ABS_LL, "absolute_ll", fabsl, vlFabs) FUNC_LL(FUNC_LL_LAST, NULL, NULL, NULL) #ifdef ELIDE_FUNC_LL #undef ELIDE_FUNC_LL diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 62e6215..9d97eab 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -487,14 +487,17 @@ def test_maximum_minimum(self): assert_array_equal(evaluate("maximum(x,y)"), maximum(x,y)) assert_array_equal(evaluate("minimum(x,y)"), minimum(x,y)) - def test_sign(self): - for dtype in [float, double, int, np.int64, complex]: + def test_sign_round(self): + for dtype in [float, double, np.int32, np.int64, complex]: x = arange(10, dtype=dtype) y = 2 * arange(10, dtype=dtype)[::-1] r = x-y if not np.issubdtype(dtype, np.integer): r[-1] = np.nan + assert evaluate("round(r)").dtype == round(r).dtype + assert evaluate("sign(r)").dtype == sign(r).dtype assert_array_equal(evaluate("sign(r)"), sign(r)) + assert_array_equal(evaluate("round(r)"), round(r)) def test_rational_expr(self): a = arange(1e6) From e789d78f811e56a4eb91796d62b12170c493769c Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 24 Sep 2025 12:30:45 +0200 Subject: [PATCH 106/166] Getting ready for release 2.13.0 --- ANNOUNCE.rst | 8 +++++--- AUTHORS.txt | 4 ++-- RELEASE_NOTES.rst | 15 ++++++++++++--- VERSION | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index 03fc73f..791ffc5 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,11 +1,13 @@ ========================= -Announcing NumExpr 2.12.1 +Announcing NumExpr 2.13.0 ========================= Hi everyone, -NumExpr 2.12.1 allows isnan/isfinite/isinf functions to be used with complex. -Also, OneAPI MKL has been fixed. Thanks to Luke Shaw for these contributions. +NumExpr 2.13.0 introduced a bunch of new features including new +bitwise operators (&, |, ^, ~), floor division (//). It also adds +many new functions (like hypot, log2, maximum, minimum, nextafter...). +Thanks to Luke Shaw for these contributions. Project documentation is available at: diff --git a/AUTHORS.txt b/AUTHORS.txt index 592fcd1..28d978c 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -30,5 +30,5 @@ since 2016 to 2023. Teng Liu fixed many bugs, and in particular, contributed valuable fixes to the new regex sanitizer for expressions. -Luke Shaw contributed new isnan/isinf/isfinite functions, and expanded -the amount of opcodes from 128 to 256. +Luke Shaw contributed a bunch of new functions, and expanded the amount +of opcodes from 128 to 256. diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index fded329..fe4e22b 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -1,13 +1,22 @@ ===================================== -Release notes for NumExpr 2.12 series +Release notes for NumExpr 2.13 series ===================================== -Changes from 2.12.1 to 2.12.2 +Changes from 2.12.1 to 2.13.0 ----------------------------- -* **Under development.** +* New functionality has been added: + * Bitwise operators (and, or, not, xor): `&, |, ~, ^` + * New binary arithmetic operator for floor division: `//` + * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, + `minimum`, `log2`, `trunc`, `round` and `sign`. + * Also enables integer outputs for integer inputs for + `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. + Thanks to Luke Shaw for the contributions. + +* New wheels for Python 3.14 and 3.14t are provided. Changes from 2.12.0 to 2.12.1 ----------------------------- diff --git a/VERSION b/VERSION index 8b62883..fb2c076 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.12.2.dev0 +2.13.0 From e7aac75f0334c0a602dae52990ffd19d364e69a0 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 24 Sep 2025 12:46:13 +0200 Subject: [PATCH 107/166] Fix fmod bug --- numexpr/bespoke_functions.hpp | 34 ++++++++++++++++++---------------- numexpr/expressions.py | 4 +++- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/numexpr/bespoke_functions.hpp b/numexpr/bespoke_functions.hpp index 782f358..cb85957 100644 --- a/numexpr/bespoke_functions.hpp +++ b/numexpr/bespoke_functions.hpp @@ -30,8 +30,9 @@ inline long rintl(long x) {return x;} inline int fabsi(int x) {return x<0 ? -x: x;} inline long fabsl(long x) {return x<0 ? -x: x;} // fmod function for ints -inline int fmodi(int x, int y) {return (int)fmodf((float)x, (float)y);} -inline long fmodl(long x, long y) {return (long)fmodf((long)x, (long)y);} +/* Have to add FUNC_III, FUNC_LLL signatures to functions.hpp*/ +// inline int fmodi(int x, int y) {return (int)fmodf((float)x, (float)y);} +// inline long fmodl(long x, long y) {return (long)fmodf((long)x, (long)y);} #ifdef USE_VML static void viRint(MKL_INT n, const int* x, int* dest) @@ -84,20 +85,21 @@ static void vdfmod(MKL_INT n, const double* x1, const double* x2, double* dest) dest[j] = fmod(x1[j], x2[j]); }; }; -static void vifmod(MKL_INT n, const int* x1, const int* x2, int* dest) -{ - MKL_INT j; - for(j=0; j < n; j++) { - dest[j] = fmodi(x1[j], x2[j]); - }; -}; -static void vlfmod(MKL_INT n, const long* x1, const long* x2, long* dest) -{ - MKL_INT j; - for(j=0; j < n; j++) { - dest[j] = fmodl(x1[j], x2[j]); - }; -}; +/* Have to add FUNC_III, FUNC_LLL signatures to functions.hpp*/ +// static void vifmod(MKL_INT n, const int* x1, const int* x2, int* dest) +// { +// MKL_INT j; +// for(j=0; j < n; j++) { +// dest[j] = fmodi(x1[j], x2[j]); +// }; +// }; +// static void vlfmod(MKL_INT n, const long* x1, const long* x2, long* dest) +// { +// MKL_INT j; +// for(j=0; j < n; j++) { +// dest[j] = fmodl(x1[j], x2[j]); +// }; +// }; /* no isnan, isfinite, isinf or signbit in VML */ static void vsIsfinite(MKL_INT n, const float* x1, bool* dest) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index c15538e..f11dd6c 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -186,9 +186,11 @@ def function(*args): return ConstantNode(func(*[x.value for x in args])) kind = commonKind(args) if kind in ('int', 'long'): - if func.__name__ not in ('copy', 'abs', 'fmod', 'ones_like', 'round', 'sign'): + if func.__name__ not in ('copy', 'abs', 'ones_like', 'round', 'sign'): # except for these special functions (which return ints for int inputs in NumPy) # just do a cast to double + # FIXME: 'fmod' outputs ints for NumPy when inputs are ints, but need to + # add new function signatures FUNC_LLL FUNC_III to support this kind = 'double' else: # Apply regular casting rules From 9c1d5981accfec11666891eebb4418b8883c0057 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 24 Sep 2025 13:10:46 +0200 Subject: [PATCH 108/166] Fix test --- numexpr/tests/test_numexpr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 9d97eab..be0b055 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -903,8 +903,10 @@ class Skip(Exception): pass or "%" in expr or "arctan2" in expr or "fmod" in expr - # or "hypot" in expr - # or "nextafter" in expr + or "hypot" in expr + or "nextafter" in expr + or "copysign" in expr + or "trunc" in expr or "floor" in expr or "ceil" in expr ) From 235972629164022f74419b8ff408058ce44e2a28 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Wed, 24 Sep 2025 13:13:30 +0200 Subject: [PATCH 109/166] Add TODO --- numexpr/bespoke_functions.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numexpr/bespoke_functions.hpp b/numexpr/bespoke_functions.hpp index cb85957..e404edc 100644 --- a/numexpr/bespoke_functions.hpp +++ b/numexpr/bespoke_functions.hpp @@ -30,7 +30,7 @@ inline long rintl(long x) {return x;} inline int fabsi(int x) {return x<0 ? -x: x;} inline long fabsl(long x) {return x<0 ? -x: x;} // fmod function for ints -/* Have to add FUNC_III, FUNC_LLL signatures to functions.hpp*/ +// TODO: Have to add FUNC_III, FUNC_LLL signatures to functions.hpp to enable these // inline int fmodi(int x, int y) {return (int)fmodf((float)x, (float)y);} // inline long fmodl(long x, long y) {return (long)fmodf((long)x, (long)y);} @@ -85,7 +85,7 @@ static void vdfmod(MKL_INT n, const double* x1, const double* x2, double* dest) dest[j] = fmod(x1[j], x2[j]); }; }; -/* Have to add FUNC_III, FUNC_LLL signatures to functions.hpp*/ +// TODO: Have to add FUNC_III, FUNC_LLL signatures to functions.hpp // static void vifmod(MKL_INT n, const int* x1, const int* x2, int* dest) // { // MKL_INT j; From fbb4e14263612662e4e5cb71ac44bc47ba5099f6 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 24 Sep 2025 14:26:06 +0200 Subject: [PATCH 110/166] Post 2.13.0 release actions done --- ANNOUNCE.rst | 19 +++++++++++-------- RELEASE_NOTES.rst | 6 ++++++ VERSION | 2 +- doc/user_guide.rst | 4 ++-- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index 791ffc5..a668477 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -11,19 +11,22 @@ Thanks to Luke Shaw for these contributions. Project documentation is available at: -http://numexpr.readthedocs.io/ +https://numexpr.readthedocs.io/ -Changes from 2.12.0 to 2.12.1 +Changes from 2.12.1 to 2.13.0 ----------------------------- -* Added complex counterparts for isnan/isfinite/isinf functions. - Thanks to Luke Shaw. +* New functionality has been added: + * Bitwise operators (and, or, not, xor): `&, |, ~, ^` + * New binary arithmetic operator for floor division: `//` + * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, + `minimum`, `log2`, `trunc`, `round` and `sign`. + * Also enables integer outputs for integer inputs for + `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. -* Updated documentation for the new functions and instructions - for adding new functions to the virtual machine. Thanks to Luke Shaw. + Thanks to Luke Shaw for the contributions. -* Fixed MKL support; it was broken in 2.12.0. Thanks to - Christoph Gohlke for reporting the issue. +* New wheels for Python 3.14 and 3.14t are provided. What's Numexpr? --------------- diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index fe4e22b..8ed7222 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,11 @@ Release notes for NumExpr 2.13 series ===================================== +Changes from 2.13.0 to 2.13.1 +----------------------------- + +* **Under development.** + Changes from 2.12.1 to 2.13.0 ----------------------------- @@ -18,6 +23,7 @@ Changes from 2.12.1 to 2.13.0 * New wheels for Python 3.14 and 3.14t are provided. + Changes from 2.12.0 to 2.12.1 ----------------------------- diff --git a/VERSION b/VERSION index fb2c076..6dea080 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.13.0 +2.13.1.dev0 diff --git a/doc/user_guide.rst b/doc/user_guide.rst index 75dc4de..84f085d 100644 --- a/doc/user_guide.rst +++ b/doc/user_guide.rst @@ -1,5 +1,5 @@ -NumExpr 2.12 User Guide -====================== +NumExpr User Guide +================== The NumExpr package supplies routines for the fast evaluation of array expressions elementwise by using a vector-based virtual From 34c8486b6ceb9b0ecb4b02c78e88e0edafcc2632 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 15:22:56 +0200 Subject: [PATCH 111/166] typing stubs for `version` and `interpreter` --- MANIFEST.in | 2 +- numexpr/interpreter.pyi | 22 ++++++++++++++++++++++ numexpr/version.pyi | 6 ++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 numexpr/interpreter.pyi create mode 100644 numexpr/version.pyi diff --git a/MANIFEST.in b/MANIFEST.in index 4ec8d9f..11f9a15 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,7 @@ include MANIFEST.in VERSION include *.rst *.txt *.cfg site.cfg.example -recursive-include numexpr *.cpp *.hpp *.py +recursive-include numexpr *.cpp *.hpp *.py *.pyi recursive-include numexpr/win32 *.c *.h exclude numexpr/__config__.py RELEASING.txt site.cfg diff --git a/numexpr/interpreter.pyi b/numexpr/interpreter.pyi new file mode 100644 index 0000000..1b9b8af --- /dev/null +++ b/numexpr/interpreter.pyi @@ -0,0 +1,22 @@ +from typing import Final, Literal, TypeAlias + + +_VMLAccuracyMode: TypeAlias = Literal[0, 1, 2, 3] + +MAX_THREADS: Final[int] = ... +__BLOCK_SIZE1__: Final[int] = ... + +#ifdef USE_VML +def _get_vml_version() -> str: ... +def _set_vml_accuracy_mode(mode_in: _VMLAccuracyMode, /) -> _VMLAccuracyMode: ... +def _set_vml_num_threads(max_num_threads: int, /) -> None: ... +def _get_vml_num_threads() -> int: ... +#endif +def _get_num_threads() -> int: ... +def _set_num_threads(num_threads: int, /) -> int: ... + +allaxes: Final = 255 +funccodes: Final[dict[bytes, int]] = ... +maxdims: Final[int] = ... +opcodes: Final[dict[bytes, int]] = ... +use_vml: Final[bool] = ... diff --git a/numexpr/version.pyi b/numexpr/version.pyi new file mode 100644 index 0000000..2dfa994 --- /dev/null +++ b/numexpr/version.pyi @@ -0,0 +1,6 @@ +from typing import Final + +__version__: Final[str] = ... +version: Final[str] = ... +numpy_build_version: Final[str] = ... +platform_machine: Final[str] = ... From 65ad97ecb1d5c9bc63d35f505b9beba5b506c5f9 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 15:56:31 +0200 Subject: [PATCH 112/166] annotate `utils` --- numexpr/utils.py | 131 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 89 insertions(+), 42 deletions(-) diff --git a/numexpr/utils.py b/numexpr/utils.py index 9e45fbe..9993b43 100644 --- a/numexpr/utils.py +++ b/numexpr/utils.py @@ -15,8 +15,21 @@ import contextvars import os import subprocess - -from numexpr import use_vml +from typing import ( + Final, + Generic, + ItemsView, + Iterable, + Iterator, + KeysView, + Literal, + TypeVar, + ValuesView, + cast, + overload, +) + +from numexpr import use_vml # type: ignore[attr-defined] from numexpr.interpreter import MAX_THREADS, _get_num_threads, _set_num_threads from . import version @@ -25,19 +38,25 @@ from numexpr.interpreter import (_get_vml_num_threads, _get_vml_version, _set_vml_accuracy_mode, _set_vml_num_threads) +else: + # mypy does not understand this, whereas pyright does + _get_vml_num_threads = None # type: ignore[assignment] + _get_vml_version = None # type: ignore[assignment] + _set_vml_accuracy_mode = None # type: ignore[assignment] + _set_vml_num_threads = None # type: ignore[assignment] -def get_vml_version(): +def get_vml_version() -> str | None: """ Get the VML/MKL library version. """ - if use_vml: + if _get_vml_version is not None: return _get_vml_version() else: return None -def set_vml_accuracy_mode(mode): +def set_vml_accuracy_mode(mode: str | None) -> Literal['low', 'high', 'fast'] | None: """ Set the accuracy mode for VML operations. @@ -56,8 +75,10 @@ def set_vml_accuracy_mode(mode): Returns old accuracy settings. """ - if use_vml: + if _set_vml_accuracy_mode is not None: + acc_dict: dict[str | None, Literal[0, 1, 2, 3]] acc_dict = {None: 0, 'low': 1, 'high': 2, 'fast': 3} + acc_reverse_dict: dict[int, Literal['low', 'high', 'fast']] acc_reverse_dict = {1: 'low', 2: 'high', 3: 'fast'} if mode not in list(acc_dict.keys()): raise ValueError( @@ -68,7 +89,7 @@ def set_vml_accuracy_mode(mode): return None -def set_vml_num_threads(nthreads): +def set_vml_num_threads(nthreads: int) -> None: """ Suggests a maximum number of threads to be used in VML operations. @@ -80,11 +101,11 @@ def set_vml_num_threads(nthreads): for more info about it. """ - if use_vml: + if _set_vml_num_threads is not None: _set_vml_num_threads(nthreads) pass -def get_vml_num_threads(): +def get_vml_num_threads() -> int | None: """ Gets the maximum number of threads to be used in VML operations. @@ -96,11 +117,11 @@ def get_vml_num_threads(): for more info about it. """ - if use_vml: + if _get_vml_num_threads is not None: return _get_vml_num_threads() return None -def set_num_threads(nthreads): +def set_num_threads(nthreads: int) -> int: """ Sets a number of threads to be used in operations. @@ -112,13 +133,13 @@ def set_num_threads(nthreads): old_nthreads = _set_num_threads(nthreads) return old_nthreads -def get_num_threads(): +def get_num_threads() -> int: """ Gets the number of threads currently in use for operations. """ return _get_num_threads() -def _init_num_threads(): +def _init_num_threads() -> int: """ Detects the environment variable 'NUMEXPR_MAX_THREADS' to set the threadpool size, and if necessary the slightly redundant 'NUMEXPR_NUM_THREADS' or @@ -168,7 +189,7 @@ def _init_num_threads(): return requested_threads -def detect_number_of_cores(): +def detect_number_of_cores() -> int: """ Detects the number of cores on a system. Cribbed from pp. """ @@ -177,7 +198,7 @@ def detect_number_of_cores(): if "SC_NPROCESSORS_ONLN" in os.sysconf_names: # Linux & Unix: ncpus = os.sysconf("SC_NPROCESSORS_ONLN") - if isinstance(ncpus, int) and ncpus > 0: + if isinstance(ncpus, int) and ncpus > 0: # type: ignore[redundant-expr] return ncpus else: # OSX: return int(subprocess.check_output(["sysctl", "-n", "hw.ncpu"])) @@ -191,7 +212,7 @@ def detect_number_of_cores(): return 1 # Default -def detect_number_of_threads(): +def detect_number_of_threads() -> int: """ DEPRECATED: use `_init_num_threads` instead. If this is modified, please update the note in: https://github.com/pydata/numexpr/wiki/Numexpr-Users-Guide @@ -211,64 +232,90 @@ def detect_number_of_threads(): return nthreads -class CacheDict(dict): +_KT = TypeVar('_KT') +_VT = TypeVar('_VT') + + +class CacheDict(dict[_KT, _VT], Generic[_KT, _VT]): """ A dictionary that prevents itself from growing too much. """ - def __init__(self, maxentries): + maxentries: Final[int] + + def __init__(self, maxentries: int) -> None: self.maxentries = maxentries - super(CacheDict, self).__init__(self) + super().__init__(self) - def __setitem__(self, key, value): + def __setitem__(self, key: _KT, value: _VT) -> None: # Protection against growing the cache too much if len(self) > self.maxentries: # Remove a 10% of (arbitrary) elements from the cache entries_to_remove = self.maxentries // 10 for k in list(self.keys())[:entries_to_remove]: - super(CacheDict, self).__delitem__(k) - super(CacheDict, self).__setitem__(key, value) + super().__delitem__(k) + super().__setitem__(key, value) -class ContextDict: +class ContextDict(Generic[_VT]): """ A context aware version dictionary """ - def __init__(self): + _context_data: contextvars.ContextVar[dict[str, _VT]] + + def __init__(self) -> None: self._context_data = contextvars.ContextVar('context_data', default={}) - def set(self, key=None, value=None, **kwargs): + @overload + def set(self, key: None = None, value: None = None, **kwargs: _VT) -> None: ... + @overload + def set(self, key: str, value: _VT, **kwargs: _VT) -> None: ... + def set(self, key: str | None = None, value: _VT | None = None, **kwargs: _VT) -> None: data = self._context_data.get().copy() if key is not None: - data[key] = value + data[key] = cast('_VT', value) for k, v in kwargs.items(): data[k] = v self._context_data.set(data) - def get(self, key, default=None): + @overload + def get(self, key: str, default: _VT) -> _VT: ... + @overload + def get(self, key: str, default: _VT | None = None) -> _VT | None: ... + def get(self, key: str, default: _VT | None = None) -> _VT | None: data = self._context_data.get() return data.get(key, default) - def delete(self, key): + def delete(self, key: str) -> None: data = self._context_data.get().copy() if key in data: del data[key] self._context_data.set(data) - def clear(self): + def clear(self) -> None: self._context_data.set({}) - def all(self): + def all(self) -> dict[str, _VT]: return self._context_data.get() - def update(self, *args, **kwargs): + @overload + def update(self, **kwargs: _VT) -> None: ... + @overload + def update(self, other: dict[str, _VT], /, **kwargs: _VT) -> None: ... + @overload + def update(self, other: Iterable[tuple[str, _VT]], /, **kwargs: _VT) -> None: ... + def update( # type: ignore[misc] # false positive mypy error + self, + *args: dict[str, _VT] | Iterable[tuple[str, _VT]], + **kwargs: _VT, + ) -> None: data = self._context_data.get().copy() if args: - if len(args) > 1: + if len(args) != 1: raise TypeError(f"update() takes at most 1 positional argument ({len(args)} given)") other = args[0] if isinstance(other, dict): @@ -280,32 +327,32 @@ def update(self, *args, **kwargs): data.update(kwargs) self._context_data.set(data) - def keys(self): + def keys(self) -> KeysView[str]: return self._context_data.get().keys() - def values(self): + def values(self) -> ValuesView[_VT]: return self._context_data.get().values() - def items(self): + def items(self) -> ItemsView[str, _VT]: return self._context_data.get().items() - def __getitem__(self, key): + def __getitem__(self, key: str) -> _VT | None: return self.get(key) - def __setitem__(self, key, value): + def __setitem__(self, key: str, value: _VT) -> None: self.set(key, value) - def __delitem__(self, key): + def __delitem__(self, key: str) -> None: self.delete(key) - def __contains__(self, key): + def __contains__(self, key: str) -> bool: return key in self._context_data.get() - def __len__(self): + def __len__(self) -> int: return len(self._context_data.get()) - def __iter__(self): + def __iter__(self) -> Iterator[str]: return iter(self._context_data.get()) - def __repr__(self): + def __repr__(self) -> str: return repr(self._context_data.get()) From 121f2b0dc8dd4c6599e1f212e2f54f2f51eb7487 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 17:07:47 +0200 Subject: [PATCH 113/166] annotate `cpuinfo` --- numexpr/cpuinfo.py | 528 +++++++++++++++++++++++++-------------------- 1 file changed, 290 insertions(+), 238 deletions(-) diff --git a/numexpr/cpuinfo.py b/numexpr/cpuinfo.py index 897a4ca..676e56e 100755 --- a/numexpr/cpuinfo.py +++ b/numexpr/cpuinfo.py @@ -29,25 +29,44 @@ import re import subprocess import sys -import types import warnings - -is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE - -def getoutput(cmd, successful_status=(0,), stacklevel=1): +from typing import ( + Any, + Callable, + ClassVar, + Container, + Final, + Generator, + NoReturn, + Sequence, + TypeAlias, + TypeVar, + overload, +) + +_CMD: TypeAlias = str | Sequence[str] +_Statuses: TypeAlias = Container[int] + +is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE + +def getoutput(cmd: _CMD, + successful_status: _Statuses = (0,), + stacklevel: int = 1) -> tuple[bool, bytes]: try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, _ = p.communicate() status = p.returncode except EnvironmentError as e: warnings.warn(str(e), UserWarning, stacklevel=stacklevel) - return False, '' + return False, b'' if os.WIFEXITED(status) and os.WEXITSTATUS(status) in successful_status: return True, output return False, output -def command_info(successful_status=(0,), stacklevel=1, **kw): +def command_info(successful_status: _Statuses = (0,), + stacklevel: int = 1, + **kw: _CMD) -> dict[str, bytes]: info = {} for key in kw: ok, output = getoutput(kw[key], successful_status=successful_status, @@ -57,7 +76,9 @@ def command_info(successful_status=(0,), stacklevel=1, **kw): return info -def command_by_line(cmd, successful_status=(0,), stacklevel=1): +def command_by_line(cmd: _CMD, + successful_status: _Statuses = (0,), + stacklevel: int = 1) -> Generator[str, None, None]: ok, output = getoutput(cmd, successful_status=successful_status, stacklevel=stacklevel + 1) if not ok: @@ -70,8 +91,10 @@ def command_by_line(cmd, successful_status=(0,), stacklevel=1): yield line.strip() -def key_value_from_command(cmd, sep, successful_status=(0,), - stacklevel=1): +def key_value_from_command(cmd: _CMD, + sep: str, + successful_status: _Statuses = (0,), + stacklevel: int = 1) -> dict[str, str]: d = {} for line in command_by_line(cmd, successful_status=successful_status, stacklevel=stacklevel + 1): @@ -81,18 +104,25 @@ def key_value_from_command(cmd, sep, successful_status=(0,), return d -class CPUInfoBase(object): +_T = TypeVar('_T') + + +class CPUInfoBase: """Holds CPU information and provides methods for requiring the availability of various CPU features. """ - def _try_call(self, func): + @overload + def _try_call(self, func: Callable[..., NoReturn]) -> None: ... + @overload + def _try_call(self, func: Callable[[], _T]) -> _T | None: ... + def _try_call(self, func: Callable[[], _T]) -> _T | None: try: return func() except: - pass + return None - def __getattr__(self, name): + def __getattr__(self, name: str) -> Callable[..., Any]: if not name.startswith('_'): if hasattr(self, '_' + name): attr = getattr(self, '_' + name) @@ -102,25 +132,27 @@ def __getattr__(self, name): return lambda: None raise AttributeError(name) - def _getNCPUs(self): + def _getNCPUs(self) -> int: return 1 - def __get_nbits(self): + def __get_nbits(self) -> str: abits = platform.architecture()[0] - nbits = re.compile(r'(\d+)bit').search(abits).group(1) - return nbits + match = re.compile(r'(\d+)bit').search(abits) + assert match, abits + return match.group(1) - def _is_32bit(self): + def _is_32bit(self) -> bool: return self.__get_nbits() == '32' - def _is_64bit(self): + def _is_64bit(self) -> bool: return self.__get_nbits() == '64' class LinuxCPUInfo(CPUInfoBase): - info = None + # This will never be `None` on (initialized) instances + info: list[dict[str, str]] = None # type: ignore[assignment] - def __init__(self): + def __init__(self) -> None: if self.info is not None: return info = [{}] @@ -143,64 +175,64 @@ def __init__(self): fo.close() self.__class__.info = info - def _not_impl(self): + def _not_impl(self) -> None: pass # Athlon - def _is_AMD(self): + def _is_AMD(self) -> bool: return self.info[0]['vendor_id'] == 'AuthenticAMD' - def _is_AthlonK6_2(self): + def _is_AthlonK6_2(self) -> bool: return self._is_AMD() and self.info[0]['model'] == '2' - def _is_AthlonK6_3(self): + def _is_AthlonK6_3(self) -> bool: return self._is_AMD() and self.info[0]['model'] == '3' - def _is_AthlonK6(self): + def _is_AthlonK6(self) -> bool: return re.match(r'.*?AMD-K6', self.info[0]['model name']) is not None - def _is_AthlonK7(self): + def _is_AthlonK7(self) -> bool: return re.match(r'.*?AMD-K7', self.info[0]['model name']) is not None - def _is_AthlonMP(self): + def _is_AthlonMP(self) -> bool: return re.match(r'.*?Athlon\(tm\) MP\b', self.info[0]['model name']) is not None - def _is_AMD64(self): + def _is_AMD64(self) -> bool: return self.is_AMD() and self.info[0]['family'] == '15' - def _is_Athlon64(self): + def _is_Athlon64(self) -> bool: return re.match(r'.*?Athlon\(tm\) 64\b', self.info[0]['model name']) is not None - def _is_AthlonHX(self): + def _is_AthlonHX(self) -> bool: return re.match(r'.*?Athlon HX\b', self.info[0]['model name']) is not None - def _is_Opteron(self): + def _is_Opteron(self) -> bool: return re.match(r'.*?Opteron\b', self.info[0]['model name']) is not None - def _is_Hammer(self): + def _is_Hammer(self) -> bool: return re.match(r'.*?Hammer\b', self.info[0]['model name']) is not None # Alpha - def _is_Alpha(self): + def _is_Alpha(self) -> bool: return self.info[0]['cpu'] == 'Alpha' - def _is_EV4(self): + def _is_EV4(self) -> bool: return self.is_Alpha() and self.info[0]['cpu model'] == 'EV4' - def _is_EV5(self): + def _is_EV5(self) -> bool: return self.is_Alpha() and self.info[0]['cpu model'] == 'EV5' - def _is_EV56(self): + def _is_EV56(self) -> bool: return self.is_Alpha() and self.info[0]['cpu model'] == 'EV56' - def _is_PCA56(self): + def _is_PCA56(self) -> bool: return self.is_Alpha() and self.info[0]['cpu model'] == 'PCA56' # Intel @@ -208,356 +240,377 @@ def _is_PCA56(self): #XXX _is_i386 = _not_impl - def _is_Intel(self): + def _is_Intel(self) -> bool: return self.info[0]['vendor_id'] == 'GenuineIntel' - def _is_i486(self): + def _is_i486(self) -> bool: return self.info[0]['cpu'] == 'i486' - def _is_i586(self): + def _is_i586(self) -> bool: return self.is_Intel() and self.info[0]['cpu family'] == '5' - def _is_i686(self): + def _is_i686(self) -> bool: return self.is_Intel() and self.info[0]['cpu family'] == '6' - def _is_Celeron(self): + def _is_Celeron(self) -> bool: return re.match(r'.*?Celeron', self.info[0]['model name']) is not None - def _is_Pentium(self): + def _is_Pentium(self) -> bool: return re.match(r'.*?Pentium', self.info[0]['model name']) is not None - def _is_PentiumII(self): + def _is_PentiumII(self) -> bool: return re.match(r'.*?Pentium.*?II\b', self.info[0]['model name']) is not None - def _is_PentiumPro(self): + def _is_PentiumPro(self) -> bool: return re.match(r'.*?PentiumPro\b', self.info[0]['model name']) is not None - def _is_PentiumMMX(self): + def _is_PentiumMMX(self) -> bool: return re.match(r'.*?Pentium.*?MMX\b', self.info[0]['model name']) is not None - def _is_PentiumIII(self): + def _is_PentiumIII(self) -> bool: return re.match(r'.*?Pentium.*?III\b', self.info[0]['model name']) is not None - def _is_PentiumIV(self): + def _is_PentiumIV(self) -> bool: return re.match(r'.*?Pentium.*?(IV|4)\b', self.info[0]['model name']) is not None - def _is_PentiumM(self): + def _is_PentiumM(self) -> bool: return re.match(r'.*?Pentium.*?M\b', self.info[0]['model name']) is not None - def _is_Prescott(self): + def _is_Prescott(self) -> bool: return self.is_PentiumIV() and self.has_sse3() - def _is_Nocona(self): + def _is_Nocona(self) -> bool: return (self.is_Intel() and self.info[0]['cpu family'] in ('6', '15') and # two s sse3; three s ssse3 not the same thing, this is fine (self.has_sse3() and not self.has_ssse3()) and re.match(r'.*?\blm\b', self.info[0]['flags']) is not None) - def _is_Core2(self): + def _is_Core2(self) -> bool: return (self.is_64bit() and self.is_Intel() and re.match(r'.*?Core\(TM\)2\b', self.info[0]['model name']) is not None) - def _is_Itanium(self): + def _is_Itanium(self) -> bool: return re.match(r'.*?Itanium\b', self.info[0]['family']) is not None - def _is_XEON(self): + def _is_XEON(self) -> bool: return re.match(r'.*?XEON\b', self.info[0]['model name'], re.IGNORECASE) is not None _is_Xeon = _is_XEON # Power - def _is_Power(self): + def _is_Power(self) -> bool: return re.match(r'.*POWER.*', self.info[0]['cpu']) is not None - def _is_Power7(self): + def _is_Power7(self) -> bool: return re.match(r'.*POWER7.*', self.info[0]['cpu']) is not None - def _is_Power8(self): + def _is_Power8(self) -> bool: return re.match(r'.*POWER8.*', self.info[0]['cpu']) is not None - def _is_Power9(self): + def _is_Power9(self) -> bool: return re.match(r'.*POWER9.*', self.info[0]['cpu']) is not None - def _has_Altivec(self): + def _has_Altivec(self) -> bool: return re.match(r'.*altivec\ supported.*', self.info[0]['cpu']) is not None # Varia - def _is_singleCPU(self): + def _is_singleCPU(self) -> bool: return len(self.info) == 1 - def _getNCPUs(self): + def _getNCPUs(self) -> int: return len(self.info) - def _has_fdiv_bug(self): + def _has_fdiv_bug(self) -> bool: return self.info[0]['fdiv_bug'] == 'yes' - def _has_f00f_bug(self): + def _has_f00f_bug(self) -> bool: return self.info[0]['f00f_bug'] == 'yes' - def _has_mmx(self): + def _has_mmx(self) -> bool: return re.match(r'.*?\bmmx\b', self.info[0]['flags']) is not None - def _has_sse(self): + def _has_sse(self) -> bool: return re.match(r'.*?\bsse\b', self.info[0]['flags']) is not None - def _has_sse2(self): + def _has_sse2(self) -> bool: return re.match(r'.*?\bsse2\b', self.info[0]['flags']) is not None - def _has_sse3(self): + def _has_sse3(self) -> bool: return re.match(r'.*?\bpni\b', self.info[0]['flags']) is not None - def _has_ssse3(self): + def _has_ssse3(self) -> bool: return re.match(r'.*?\bssse3\b', self.info[0]['flags']) is not None - def _has_3dnow(self): + def _has_3dnow(self) -> bool: return re.match(r'.*?\b3dnow\b', self.info[0]['flags']) is not None - def _has_3dnowext(self): + def _has_3dnowext(self) -> bool: return re.match(r'.*?\b3dnowext\b', self.info[0]['flags']) is not None class IRIXCPUInfo(CPUInfoBase): - info = None + # The first initialized instance will set this class variable + info: ClassVar[dict[str, str]] = None # type: ignore[assignment] - def __init__(self): + def __init__(self) -> None: if self.info is not None: return info = key_value_from_command('sysconf', sep=' ', successful_status=(0, 1)) self.__class__.info = info - def _not_impl(self): + def _not_impl(self) -> None: pass - def _is_singleCPU(self): + def _is_singleCPU(self) -> bool: return self.info.get('NUM_PROCESSORS') == '1' - def _getNCPUs(self): + def _getNCPUs(self) -> int: return int(self.info.get('NUM_PROCESSORS', 1)) - def __cputype(self, n): - return self.info.get('PROCESSORS').split()[0].lower() == 'r%s' % (n) + def __cputype(self, n: int | str) -> bool: + return self.info['PROCESSORS'].split()[0].lower() == 'r%s' % (n) - def _is_r2000(self): + def _is_r2000(self) -> bool: return self.__cputype(2000) - def _is_r3000(self): + def _is_r3000(self) -> bool: return self.__cputype(3000) - def _is_r3900(self): + def _is_r3900(self) -> bool: return self.__cputype(3900) - def _is_r4000(self): + def _is_r4000(self) -> bool: return self.__cputype(4000) - def _is_r4100(self): + def _is_r4100(self) -> bool: return self.__cputype(4100) - def _is_r4300(self): + def _is_r4300(self) -> bool: return self.__cputype(4300) - def _is_r4400(self): + def _is_r4400(self) -> bool: return self.__cputype(4400) - def _is_r4600(self): + def _is_r4600(self) -> bool: return self.__cputype(4600) - def _is_r4650(self): + def _is_r4650(self) -> bool: return self.__cputype(4650) - def _is_r5000(self): + def _is_r5000(self) -> bool: return self.__cputype(5000) - def _is_r6000(self): + def _is_r6000(self) -> bool: return self.__cputype(6000) - def _is_r8000(self): + def _is_r8000(self) -> bool: return self.__cputype(8000) - def _is_r10000(self): + def _is_r10000(self) -> bool: return self.__cputype(10000) - def _is_r12000(self): + def _is_r12000(self) -> bool: return self.__cputype(12000) - def _is_rorion(self): + def _is_rorion(self) -> bool: return self.__cputype('orion') - def get_ip(self): + def get_ip(self) -> str | None: try: return self.info.get('MACHINE') except: - pass + return None - def __machine(self, n): - return self.info.get('MACHINE').lower() == 'ip%s' % (n) + def __machine(self, n: int) -> bool: + return self.info['MACHINE'].lower() == 'ip%s' % (n) - def _is_IP19(self): + def _is_IP19(self) -> bool: return self.__machine(19) - def _is_IP20(self): + def _is_IP20(self) -> bool: return self.__machine(20) - def _is_IP21(self): + def _is_IP21(self) -> bool: return self.__machine(21) - def _is_IP22(self): + def _is_IP22(self) -> bool: return self.__machine(22) - def _is_IP22_4k(self): + def _is_IP22_4k(self) -> bool: return self.__machine(22) and self._is_r4000() - def _is_IP22_5k(self): + def _is_IP22_5k(self) -> bool: return self.__machine(22) and self._is_r5000() - def _is_IP24(self): + def _is_IP24(self) -> bool: return self.__machine(24) - def _is_IP25(self): + def _is_IP25(self) -> bool: return self.__machine(25) - def _is_IP26(self): + def _is_IP26(self) -> bool: return self.__machine(26) - def _is_IP27(self): + def _is_IP27(self) -> bool: return self.__machine(27) - def _is_IP28(self): + def _is_IP28(self) -> bool: return self.__machine(28) - def _is_IP30(self): + def _is_IP30(self) -> bool: return self.__machine(30) - def _is_IP32(self): + def _is_IP32(self) -> bool: return self.__machine(32) - def _is_IP32_5k(self): + def _is_IP32_5k(self) -> bool: return self.__machine(32) and self._is_r5000() - def _is_IP32_10k(self): + def _is_IP32_10k(self) -> bool: return self.__machine(32) and self._is_r10000() class DarwinCPUInfo(CPUInfoBase): - info = None + # The first initialized instance will set this class variable + info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] - def __init__(self): + def __init__(self) -> None: if self.info is not None: return - info = command_info(arch='arch', - machine='machine') + info: dict[str, bytes | Any] = command_info(arch='arch', machine='machine') info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') self.__class__.info = info - def _not_impl(self): pass + def _not_impl(self) -> None: + pass - def _getNCPUs(self): + def _getNCPUs(self) -> int: return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) - def _is_Power_Macintosh(self): + def _is_Power_Macintosh(self) -> bool: return self.info['sysctl_hw']['hw.machine'] == 'Power Macintosh' - def _is_i386(self): + def _is_i386(self) -> bool: return self.info['arch'] == 'i386' - def _is_ppc(self): + def _is_ppc(self) -> bool: return self.info['arch'] == 'ppc' - def __machine(self, n): + def __machine(self, n: int | str) -> bool: return self.info['machine'] == 'ppc%s' % n - def _is_ppc601(self): return self.__machine(601) + def _is_ppc601(self) -> bool: + return self.__machine(601) - def _is_ppc602(self): return self.__machine(602) + def _is_ppc602(self) -> bool: + return self.__machine(602) - def _is_ppc603(self): return self.__machine(603) + def _is_ppc603(self) -> bool: + return self.__machine(603) - def _is_ppc603e(self): return self.__machine('603e') + def _is_ppc603e(self) -> bool: + return self.__machine('603e') - def _is_ppc604(self): return self.__machine(604) + def _is_ppc604(self) -> bool: + return self.__machine(604) - def _is_ppc604e(self): return self.__machine('604e') + def _is_ppc604e(self) -> bool: + return self.__machine('604e') - def _is_ppc620(self): return self.__machine(620) + def _is_ppc620(self) -> bool: + return self.__machine(620) - def _is_ppc630(self): return self.__machine(630) + def _is_ppc630(self) -> bool: + return self.__machine(630) - def _is_ppc740(self): return self.__machine(740) + def _is_ppc740(self) -> bool: + return self.__machine(740) - def _is_ppc7400(self): return self.__machine(7400) + def _is_ppc7400(self) -> bool: + return self.__machine(7400) - def _is_ppc7450(self): return self.__machine(7450) + def _is_ppc7450(self) -> bool: + return self.__machine(7450) - def _is_ppc750(self): return self.__machine(750) + def _is_ppc750(self) -> bool: + return self.__machine(750) - def _is_ppc403(self): return self.__machine(403) + def _is_ppc403(self) -> bool: + return self.__machine(403) - def _is_ppc505(self): return self.__machine(505) + def _is_ppc505(self) -> bool: + return self.__machine(505) - def _is_ppc801(self): return self.__machine(801) + def _is_ppc801(self) -> bool: + return self.__machine(801) - def _is_ppc821(self): return self.__machine(821) + def _is_ppc821(self) -> bool: + return self.__machine(821) - def _is_ppc823(self): return self.__machine(823) + def _is_ppc823(self) -> bool: + return self.__machine(823) - def _is_ppc860(self): return self.__machine(860) + def _is_ppc860(self) -> bool: + return self.__machine(860) class NetBSDCPUInfo(CPUInfoBase): - info = None - - def __init__(self): - if self.info is not None: - return - info = {} - info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') - info['arch'] = info['sysctl_hw'].get('hw.machine_arch', 1) - info['machine'] = info['sysctl_hw'].get('hw.machine', 1) - self.__class__.info = info + # The first initialized instance will set this class variable + info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] - def _not_impl(self): pass + def __init__(self) -> None: + if self.info is not None: + return + sysctl_hw = key_value_from_command(['sysctl', 'hw'], sep='=') + self.__class__.info = { + 'sysctl_hw': sysctl_hw, + 'arch': sysctl_hw.get('hw.machine_arch', 1), + 'machine': sysctl_hw.get('hw.machine', 1), + } + + def _not_impl(self) -> None: + pass - def _getNCPUs(self): - return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) + def _getNCPUs(self) -> int: + return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) - def _is_Intel(self): - if self.info['sysctl_hw'].get('hw.model', "")[0:5] == 'Intel': - return True - return False + def _is_Intel(self) -> bool: + return self.info['sysctl_hw'].get('hw.model', "")[:5] == 'Intel' - def _is_AMD(self): - if self.info['sysctl_hw'].get('hw.model', "")[0:3] == 'AMD': - return True - return False + def _is_AMD(self) -> bool: + return self.info['sysctl_hw'].get('hw.model', "")[:3] == 'AMD' class SunOSCPUInfo(CPUInfoBase): - info = None + # The first initialized instance will set this class variable + info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] - def __init__(self): + def __init__(self) -> None: if self.info is not None: return - info = command_info(arch='arch', - mach='mach', - uname_i=['uname', '-i'], - isainfo_b=['isainfo', '-b'], - isainfo_n=['isainfo', '-n'], + info: dict[str, Any] = command_info( + arch='arch', + mach='mach', + uname_i=['uname', '-i'], + isainfo_b=['isainfo', '-b'], + isainfo_n=['isainfo', '-n'], ) info['uname_X'] = key_value_from_command(['uname', '-X'], sep='=') for line in command_by_line(['psrinfo', '-v', '0']): @@ -567,116 +620,115 @@ def __init__(self): break self.__class__.info = info - def _not_impl(self): + def _not_impl(self) -> None: pass - def _is_i386(self): + def _is_i386(self) -> bool: return self.info['isainfo_n'] == 'i386' - def _is_sparc(self): + def _is_sparc(self) -> bool: return self.info['isainfo_n'] == 'sparc' - def _is_sparcv9(self): + def _is_sparcv9(self) -> bool: return self.info['isainfo_n'] == 'sparcv9' - def _getNCPUs(self): + def _getNCPUs(self) -> int: return int(self.info['uname_X'].get('NumCPU', 1)) - def _is_sun4(self): + def _is_sun4(self) -> bool: return self.info['arch'] == 'sun4' - def _is_SUNW(self): + def _is_SUNW(self) -> bool: return re.match(r'SUNW', self.info['uname_i']) is not None - def _is_sparcstation5(self): + def _is_sparcstation5(self) -> bool: return re.match(r'.*SPARCstation-5', self.info['uname_i']) is not None - def _is_ultra1(self): + def _is_ultra1(self) -> bool: return re.match(r'.*Ultra-1', self.info['uname_i']) is not None - def _is_ultra250(self): + def _is_ultra250(self) -> bool: return re.match(r'.*Ultra-250', self.info['uname_i']) is not None - def _is_ultra2(self): + def _is_ultra2(self) -> bool: return re.match(r'.*Ultra-2', self.info['uname_i']) is not None - def _is_ultra30(self): + def _is_ultra30(self) -> bool: return re.match(r'.*Ultra-30', self.info['uname_i']) is not None - def _is_ultra4(self): + def _is_ultra4(self) -> bool: return re.match(r'.*Ultra-4', self.info['uname_i']) is not None - def _is_ultra5_10(self): + def _is_ultra5_10(self) -> bool: return re.match(r'.*Ultra-5_10', self.info['uname_i']) is not None - def _is_ultra5(self): + def _is_ultra5(self) -> bool: return re.match(r'.*Ultra-5', self.info['uname_i']) is not None - def _is_ultra60(self): + def _is_ultra60(self) -> bool: return re.match(r'.*Ultra-60', self.info['uname_i']) is not None - def _is_ultra80(self): + def _is_ultra80(self) -> bool: return re.match(r'.*Ultra-80', self.info['uname_i']) is not None - def _is_ultraenterprice(self): + def _is_ultraenterprice(self) -> bool: return re.match(r'.*Ultra-Enterprise', self.info['uname_i']) is not None - def _is_ultraenterprice10k(self): + def _is_ultraenterprice10k(self) -> bool: return re.match(r'.*Ultra-Enterprise-10000', self.info['uname_i']) is not None - def _is_sunfire(self): + def _is_sunfire(self) -> bool: return re.match(r'.*Sun-Fire', self.info['uname_i']) is not None - def _is_ultra(self): + def _is_ultra(self) -> bool: return re.match(r'.*Ultra', self.info['uname_i']) is not None - def _is_cpusparcv7(self): + def _is_cpusparcv7(self) -> bool: return self.info['processor'] == 'sparcv7' - def _is_cpusparcv8(self): + def _is_cpusparcv8(self) -> bool: return self.info['processor'] == 'sparcv8' - def _is_cpusparcv9(self): + def _is_cpusparcv9(self) -> bool: return self.info['processor'] == 'sparcv9' class Win32CPUInfo(CPUInfoBase): - info = None - pkey = r"HARDWARE\DESCRIPTION\System\CentralProcessor" + # The first initialized instance will set this class variable + info: ClassVar[list[dict[str, Any]]] = None # type: ignore[assignment] + + pkey: ClassVar = r"HARDWARE\DESCRIPTION\System\CentralProcessor" # XXX: what does the value of # HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0 # mean? - def __init__(self): - try: - import _winreg - except ImportError: # Python 3 - import winreg as _winreg + def __init__(self) -> None: + import winreg as _winreg if self.info is not None: return - info = [] + info: list[dict[str, Any]] = [] try: #XXX: Bad style to use so long `try:...except:...`. Fix it! prgx = re.compile(r"family\s+(?P\d+)\s+model\s+(?P\d+)" r"\s+stepping\s+(?P\d+)", re.IGNORECASE) - chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) + chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) # pyright: ignore[reportAttributeAccessIssue] pnum = 0 while 1: try: - proc = _winreg.EnumKey(chnd, pnum) - except _winreg.error: + proc = _winreg.EnumKey(chnd, pnum) # pyright: ignore[reportAttributeAccessIssue] + except _winreg.error: # pyright: ignore[reportAttributeAccessIssue] break else: pnum += 1 info.append({"Processor": proc}) - phnd = _winreg.OpenKey(chnd, proc) + phnd = _winreg.OpenKey(chnd, proc) # pyright: ignore[reportAttributeAccessIssue] pidx = 0 while True: try: - name, value, vtpe = _winreg.EnumValue(phnd, pidx) - except _winreg.error: + name, value, vtpe = _winreg.EnumValue(phnd, pidx) # pyright: ignore[reportAttributeAccessIssue] + except _winreg.error: # pyright: ignore[reportAttributeAccessIssue] break else: pidx = pidx + 1 @@ -688,105 +740,105 @@ def __init__(self): info[-1]["Model"] = int(srch.group("MDL")) info[-1]["Stepping"] = int(srch.group("STP")) except: - print(sys.exc_value, '(ignoring)') + print(sys.exc_info()[1], '(ignoring)') self.__class__.info = info - def _not_impl(self): + def _not_impl(self) -> None: pass # Athlon - def _is_AMD(self): + def _is_AMD(self) -> bool: return self.info[0]['VendorIdentifier'] == 'AuthenticAMD' - def _is_Am486(self): + def _is_Am486(self) -> bool: return self.is_AMD() and self.info[0]['Family'] == 4 - def _is_Am5x86(self): + def _is_Am5x86(self) -> bool: return self.is_AMD() and self.info[0]['Family'] == 4 - def _is_AMDK5(self): + def _is_AMDK5(self) -> bool: return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [0, 1, 2, 3]) - def _is_AMDK6(self): + def _is_AMDK6(self) -> bool: return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [6, 7]) - def _is_AMDK6_2(self): + def _is_AMDK6_2(self) -> bool: return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 8) - def _is_AMDK6_3(self): + def _is_AMDK6_3(self) -> bool: return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 9) - def _is_AMDK7(self): + def _is_AMDK7(self) -> bool: return self.is_AMD() and self.info[0]['Family'] == 6 # To reliably distinguish between the different types of AMD64 chips # (Athlon64, Operton, Athlon64 X2, Semperon, Turion 64, etc.) would # require looking at the 'brand' from cpuid - def _is_AMD64(self): + def _is_AMD64(self) -> bool: return self.is_AMD() and self.info[0]['Family'] == 15 # Intel - def _is_Intel(self): + def _is_Intel(self) -> bool: return self.info[0]['VendorIdentifier'] == 'GenuineIntel' - def _is_i386(self): + def _is_i386(self) -> bool: return self.info[0]['Family'] == 3 - def _is_i486(self): + def _is_i486(self) -> bool: return self.info[0]['Family'] == 4 - def _is_i586(self): + def _is_i586(self) -> bool: return self.is_Intel() and self.info[0]['Family'] == 5 - def _is_i686(self): + def _is_i686(self) -> bool: return self.is_Intel() and self.info[0]['Family'] == 6 - def _is_Pentium(self): + def _is_Pentium(self) -> bool: return self.is_Intel() and self.info[0]['Family'] == 5 - def _is_PentiumMMX(self): + def _is_PentiumMMX(self) -> bool: return (self.is_Intel() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) - def _is_PentiumPro(self): + def _is_PentiumPro(self) -> bool: return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] == 1) - def _is_PentiumII(self): + def _is_PentiumII(self) -> bool: return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [3, 5, 6]) - def _is_PentiumIII(self): + def _is_PentiumIII(self) -> bool: return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) - def _is_PentiumIV(self): + def _is_PentiumIV(self) -> bool: return self.is_Intel() and self.info[0]['Family'] == 15 - def _is_PentiumM(self): + def _is_PentiumM(self) -> bool: return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [9, 13, 14]) - def _is_Core2(self): + def _is_Core2(self) -> bool: return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [15, 16, 17]) # Varia - def _is_singleCPU(self): + def _is_singleCPU(self) -> bool: return len(self.info) == 1 - def _getNCPUs(self): + def _getNCPUs(self) -> int: return len(self.info) - def _has_mmx(self): + def _has_mmx(self) -> bool: if self.is_Intel(): return ((self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) or @@ -796,7 +848,7 @@ def _has_mmx(self): else: return False - def _has_sse(self): + def _has_sse(self) -> bool: if self.is_Intel(): return ((self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) or @@ -808,7 +860,7 @@ def _has_sse(self): else: return False - def _has_sse2(self): + def _has_sse2(self) -> bool: if self.is_Intel(): return self.is_Pentium4() or self.is_PentiumM() or self.is_Core2() elif self.is_AMD(): @@ -816,10 +868,10 @@ def _has_sse2(self): else: return False - def _has_3dnow(self): + def _has_3dnow(self) -> bool: return self.is_AMD() and self.info[0]['Family'] in [5, 6, 15] - def _has_3dnowext(self): + def _has_3dnowext(self) -> bool: return self.is_AMD() and self.info[0]['Family'] in [6, 15] From a761d4e51ff0abf39f5192f6a897d3c4ec4f0072 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 17:25:17 +0200 Subject: [PATCH 114/166] annotate `__init__.py` --- numexpr/__init__.py | 55 +++++++++++++++++++++++++++++++-------------- numexpr/utils.py | 2 +- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/numexpr/__init__.py b/numexpr/__init__.py index 63bb9e9..a67c5cb 100644 --- a/numexpr/__init__.py +++ b/numexpr/__init__.py @@ -21,46 +21,67 @@ """ -from numexpr.interpreter import __BLOCK_SIZE1__, MAX_THREADS, use_vml +from typing import TYPE_CHECKING, Final +if TYPE_CHECKING: + import unittest -is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE +# the `import _ as _` are needed for mypy to understand these are re-exports + +from numexpr.interpreter import ( + __BLOCK_SIZE1__ as __BLOCK_SIZE1__, + MAX_THREADS as MAX_THREADS, + use_vml as use_vml, +) + +is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE # cpuinfo imports were moved into the test submodule function that calls them # to improve import times. -from numexpr.expressions import E -from numexpr.necompiler import (NumExpr, disassemble, evaluate, re_evaluate, - validate) -from numexpr.utils import (_init_num_threads, detect_number_of_cores, - detect_number_of_threads, get_num_threads, - get_vml_version, set_num_threads, - set_vml_accuracy_mode, set_vml_num_threads) +from numexpr.expressions import E as E +from numexpr.necompiler import ( + NumExpr as NumExpr, + disassemble as disassemble, + evaluate as evaluate, + re_evaluate as re_evaluate, + validate as validate, +) +from numexpr.utils import ( + _init_num_threads, + detect_number_of_cores as detect_number_of_cores, + detect_number_of_threads as detect_number_of_threads, + get_num_threads as get_num_threads, + get_vml_version as get_vml_version, + set_num_threads as set_num_threads, + set_vml_accuracy_mode as set_vml_accuracy_mode, + set_vml_num_threads as set_vml_num_threads, +) # Detect the number of cores -ncores = detect_number_of_cores() +ncores: Final = detect_number_of_cores() # Initialize the number of threads to be used -nthreads = _init_num_threads() +nthreads: Final = _init_num_threads() # The default for VML is 1 thread (see #39) # set_vml_num_threads(1) -from . import version +from . import version as version -__version__ = version.version +__version__: Final = version.version -def print_versions(): +def print_versions() -> None: """Print the versions of software that numexpr relies on.""" try: import numexpr.tests - return numexpr.tests.print_versions() + return numexpr.tests.print_versions() # type: ignore[attr-defined, no-untyped-call] except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') -def test(verbosity=1): +def test(verbosity: int = 1) -> "unittest.result.TestResult": """Run all the tests in the test suite.""" try: import numexpr.tests - return numexpr.tests.test(verbosity=verbosity) + return numexpr.tests.test(verbosity=verbosity) # type: ignore[attr-defined, no-untyped-call] except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') diff --git a/numexpr/utils.py b/numexpr/utils.py index 9993b43..3162730 100644 --- a/numexpr/utils.py +++ b/numexpr/utils.py @@ -29,7 +29,7 @@ overload, ) -from numexpr import use_vml # type: ignore[attr-defined] +from numexpr import use_vml from numexpr.interpreter import MAX_THREADS, _get_num_threads, _set_num_threads from . import version From 06c00df6d1adc289029905fb26f1009a0e179088 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 24 Sep 2025 18:08:50 +0200 Subject: [PATCH 115/166] Revise RELEASE_NOTES.rst for new features Updated release notes to reflect new functionality and contributions. --- RELEASE_NOTES.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 8ed7222..9c4630a 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -14,10 +14,8 @@ Changes from 2.12.1 to 2.13.0 * New functionality has been added: * Bitwise operators (and, or, not, xor): `&, |, ~, ^` * New binary arithmetic operator for floor division: `//` - * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, - `minimum`, `log2`, `trunc`, `round` and `sign`. - * Also enables integer outputs for integer inputs for - `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. + * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, `minimum`, `log2`, `trunc`, `round` and `sign`. + * Also enables integer outputs for integer inputs for `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. Thanks to Luke Shaw for the contributions. From 6892ed6e87041eec9b8946370fd23a5a96fa8777 Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Wed, 24 Sep 2025 18:09:58 +0200 Subject: [PATCH 116/166] Update RELEASE_NOTES for version 2.13.0 --- RELEASE_NOTES.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 9c4630a..d3effc3 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -12,12 +12,12 @@ Changes from 2.12.1 to 2.13.0 ----------------------------- * New functionality has been added: - * Bitwise operators (and, or, not, xor): `&, |, ~, ^` - * New binary arithmetic operator for floor division: `//` - * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, `minimum`, `log2`, `trunc`, `round` and `sign`. - * Also enables integer outputs for integer inputs for `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. + * Bitwise operators (and, or, not, xor): `&, |, ~, ^` + * New binary arithmetic operator for floor division: `//` + * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, `minimum`, `log2`, `trunc`, `round` and `sign`. + * Also enables integer outputs for integer inputs for `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. - Thanks to Luke Shaw for the contributions. + Thanks to Luke Shaw for the contributions. * New wheels for Python 3.14 and 3.14t are provided. From 35800698ba899199ed5ad2b8e93209a3853daaf5 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 19:50:55 +0200 Subject: [PATCH 117/166] annotate `expressions` --- numexpr/expressions.py | 235 +++++++++++++++++++++++++---------------- 1 file changed, 146 insertions(+), 89 deletions(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index f11dd6c..2ee272e 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -11,34 +11,47 @@ __all__ = ['E'] import operator -import sys import threading +import types +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Final, + Iterable, + Mapping, + NoReturn, + TypeVar, + cast, +) +if TYPE_CHECKING: + from typing_extensions import Self, TypeIs + import numpy # Declare a double type that does not exist in Python space -double = numpy.double +double = numpy.float64 # The default kind for undeclared variables default_kind = 'double' int_ = numpy.int32 long_ = numpy.int64 -type_to_kind = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', - double: 'double', complex: 'complex', bytes: 'bytes', str: 'str'} -kind_to_type = {'bool': bool, 'int': int_, 'long': long_, 'float': float, - 'double': double, 'complex': complex, 'bytes': bytes, 'str': str} -kind_rank = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') -scalar_constant_types = [bool, int_, int, float, double, complex, bytes, str] - -scalar_constant_types = tuple(scalar_constant_types) +type_to_kind: Final = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', + double: 'double', complex: 'complex', bytes: 'bytes', str: 'str'} +kind_to_type: Final = {'bool': bool, 'int': int_, 'long': long_, 'float': float, + 'double': double, 'complex': complex, 'bytes': bytes, 'str': str} +kind_rank: Final = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') +scalar_constant_types: Final = (bool, int_, int, float, double, complex, bytes, str) from numexpr import interpreter -class Expression(): +class Expression: - def __getattr__(self, name): + def __getattr__(self, name: str) -> Any: if name.startswith('_'): try: return self.__dict__[name] @@ -48,38 +61,40 @@ def __getattr__(self, name): return VariableNode(name, default_kind) -E = Expression() +E: Final = Expression() class Context(threading.local): - def get(self, value, default): + def get(self, value: str, default: object) -> Any: return self.__dict__.get(value, default) - def get_current_context(self): + def get_current_context(self) -> dict[str, Any]: return self.__dict__ - def set_new_context(self, dict_): + def set_new_context(self, dict_: Mapping[str, Any]) -> None: self.__dict__.update(dict_) # This will be called each time the local object is used in a separate thread -_context = Context() +_context: Final = Context() -def get_optimization(): +def get_optimization() -> str: return _context.get('optimization', 'none') +_T = TypeVar('_T') + # helper functions for creating __magic__ methods -def ophelper(f): - def func(*args): - args = list(args) +def ophelper(f: Callable[..., _T]) -> Callable[..., _T]: + def func(*args: ExpressionNode) -> _T: + arglist = list(args) for i, x in enumerate(args): if isConstant(x): - args[i] = x = ConstantNode(x) + arglist[i] = x = ConstantNode(x) if not isinstance(x, ExpressionNode): raise TypeError("unsupported object type: %s" % type(x)) - return f(*args) + return f(*arglist) func.__name__ = f.__name__ func.__doc__ = f.__doc__ @@ -87,7 +102,7 @@ def func(*args): return func -def allConstantNodes(args): +def allConstantNodes(args: Iterable[object]) -> bool: "returns True if args are all ConstantNodes." for x in args: if not isinstance(x, ConstantNode): @@ -95,12 +110,12 @@ def allConstantNodes(args): return True -def isConstant(ex): +def isConstant(ex: object) -> "TypeIs[complex | bytes | str | numpy.number]": "Returns True if ex is a constant scalar of an allowed type." - return isinstance(ex, scalar_constant_types) + return isinstance(ex, scalar_constant_types) # pyright: ignore[reportArgumentType] -def commonKind(nodes): +def commonKind(nodes: Iterable['ExpressionNode | RawNode']) -> str: node_kinds = [node.astKind for node in nodes] str_count = node_kinds.count('bytes') + node_kinds.count('str') if 0 < str_count < len(node_kinds): # some args are strings, but not all @@ -117,7 +132,7 @@ def commonKind(nodes): min_int32 = -max_int32 - 1 -def bestConstantType(x): +def bestConstantType(x: object) -> type | None: # ``numpy.string_`` is a subclass of ``bytes`` if isinstance(x, (bytes, str)): return bytes @@ -130,14 +145,14 @@ def bestConstantType(x): # ``long`` objects are kept as is to allow the user to force # promotion of results by using long constants, e.g. by operating # a 32-bit array with a long (64-bit) constant. - if isinstance(x, (long_, numpy.int64)): + if isinstance(x, (long_, numpy.int64)): # type: ignore[misc] return long_ # ``double`` objects are kept as is to allow the user to force # promotion of results by using double constants, e.g. by operating # a float (32-bit) array with a double (64-bit) constant. if isinstance(x, double): return double - if isinstance(x, numpy.float32): + if isinstance(x, numpy.float32): # pyright: ignore[reportArgumentType] return float if isinstance(x, (int, numpy.integer)): # Constants needing more than 32 bits are always @@ -150,25 +165,29 @@ def bestConstantType(x): # ``double`` too. for converter in float, complex: try: - y = converter(x) + y = converter(x) # type: ignore[arg-type, call-overload] except Exception as err: continue if y == x or numpy.isnan(y): return converter + return None -def getKind(x): +def getKind(x: object) -> str: converter = bestConstantType(x) + assert converter is not None return type_to_kind[converter] -def binop(opname, reversed=False, kind=None): +def binop( + opname: str, reversed: bool = False, kind: str | None = None +) -> Callable[['ExpressionNode', 'ExpressionNode'], 'ExpressionNode']: # Getting the named method from self (after reversal) does not # always work (e.g. int constants do not have a __lt__ method). opfunc = getattr(operator, "__%s__" % opname) @ophelper - def operation(self, other): + def operation(self: ExpressionNode, other: ExpressionNode) -> ExpressionNode: if reversed: self, other = other, self if allConstantNodes([self, other]): @@ -179,9 +198,11 @@ def operation(self, other): return operation -def func(func, minkind=None, maxkind=None): +def func( + func: Callable[..., Any], minkind: str | None = None, maxkind: str | None = None +) -> Callable[..., 'FuncNode | ConstantNode']: @ophelper - def function(*args): + def function(*args: ExpressionNode) -> 'FuncNode | ConstantNode': if allConstantNodes(args): return ConstantNode(func(*[x.value for x in args])) kind = commonKind(args) @@ -204,20 +225,23 @@ def function(*args): @ophelper -def where_func(a, b, c): +def where_func( + a: 'ExpressionNode', b: 'ExpressionNode', c: 'ExpressionNode' +) -> 'ExpressionNode': if isinstance(a, ConstantNode): return b if a.value else c if allConstantNodes([a, b, c]): - return ConstantNode(numpy.where(a, b, c)) + return ConstantNode(numpy.where(a, b, c)) # type: ignore[call-overload] return FuncNode('where', [a, b, c]) -def encode_axis(axis): +def encode_axis(axis: 'ConstantNode | int | None') -> 'RawNode': if isinstance(axis, ConstantNode): axis = axis.value if axis is None: axis = interpreter.allaxes else: + assert isinstance(axis, int) if axis < 0: raise ValueError("negative axis are not supported") if axis > 254: @@ -225,24 +249,26 @@ def encode_axis(axis): return RawNode(axis) -def gen_reduce_axis_func(name): - def _func(a, axis=None): - axis = encode_axis(axis) +def gen_reduce_axis_func(name: str) -> Callable[..., 'ExpressionNode']: + def _func(a: object, axis: 'ConstantNode | int | None' = None) -> 'ExpressionNode': + _axis = encode_axis(axis) if isinstance(a, ConstantNode): return a - if isinstance(a, (bool, int_, long_, float, double, complex)): - a = ConstantNode(a) - return FuncNode(name, [a, axis], kind=a.astKind) + if isinstance(a, (bool, int_, long_, float, double, complex)): # type: ignore[misc] + _a = ConstantNode(a) + else: + _a = cast('ExpressionNode', a) + return FuncNode(name, [_a, _axis], kind=_a.astKind) return _func @ophelper -def contains_func(a, b): +def contains_func(a: 'ExpressionNode', b: 'ExpressionNode') -> 'FuncNode': return FuncNode('contains', [a, b], kind='bool') @ophelper -def div_op(a, b): +def div_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and @@ -252,7 +278,7 @@ def div_op(a, b): @ophelper -def truediv_op(a, b): +def truediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and @@ -265,12 +291,12 @@ def truediv_op(a, b): @ophelper -def rtruediv_op(a, b): +def rtruediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': return truediv_op(b, a) @ophelper -def pow_op(a, b): +def pow_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'ExpressionNode': if isinstance(b, ConstantNode): x = b.value @@ -286,7 +312,9 @@ def pow_op(a, b): n = int_(abs(x)) ishalfpower = int_(abs(2 * x)) % 2 - def multiply(x, y): + def multiply( + x: ExpressionNode | None, y: ExpressionNode + ) -> ExpressionNode: if x is None: return y return OpNode('mul', [x, y]) @@ -327,7 +355,7 @@ def multiply(x, y): return OpNode('pow', [a, b]) # The functions and the minimum and maximum types accepted -numpy.expm1x = numpy.expm1 +numpy.expm1x = numpy.expm1 # type: ignore[attr-defined] functions = { 'copy': func(numpy.copy), 'ones_like': func(numpy.ones_like), @@ -390,58 +418,72 @@ def multiply(x, y): } -class ExpressionNode(): +class ExpressionNode: """ An object that represents a generic number object. This implements the number special methods so that we can keep track of how this object has been used. """ - astType = 'generic' - - def __init__(self, value=None, kind=None, children=None): + astType: ClassVar = 'generic' + astKind: Final[str] + + children: Final[tuple['ExpressionNode | RawNode', ...]] + value: Final[Any] + + def __init__( + self, + value: object | None = None, + kind: str | None = None, + children: Iterable['ExpressionNode | RawNode'] | None = None, + ) -> None: self.value = value if kind is None: kind = 'none' self.astKind = kind - if children is None: - self.children = () - else: - self.children = tuple(children) + self.children = () if children is None else tuple(children) - def get_real(self): + def get_real(self) -> 'OpNode | ConstantNode': if self.astType == 'constant': return ConstantNode(complex(self.value).real) return OpNode('real', (self,), 'double') - real = property(get_real) + if TYPE_CHECKING: + @property + def real(self) -> 'OpNode | ConstantNode': ... + else: + real = property(get_real) - def get_imag(self): + def get_imag(self) -> 'OpNode | ConstantNode': if self.astType == 'constant': return ConstantNode(complex(self.value).imag) return OpNode('imag', (self,), 'double') - imag = property(get_imag) + if TYPE_CHECKING: + @property + def imag(self) -> 'OpNode | ConstantNode': ... + else: + imag = property(get_imag) - def __str__(self): + def __str__(self) -> str: return '%s(%s, %s, %s)' % (self.__class__.__name__, self.value, self.astKind, self.children) - def __repr__(self): + def __repr__(self) -> str: return self.__str__() - def __neg__(self): + def __neg__(self) -> 'OpNode': return OpNode('neg', (self,)) - def __invert__(self): + def __invert__(self) -> 'OpNode': return OpNode('invert', (self,)) - def __pos__(self): + def __pos__(self) -> 'Self': return self # The next check is commented out. See #24 for more info. - def __bool__(self): + def __bool__(self) -> NoReturn: raise TypeError("You can't use Python's standard boolean operators in " "NumExpr expressions. You should use their bitwise " "counterparts instead: '&' instead of 'and', " @@ -471,71 +513,86 @@ def __bool__(self): __gt__ = binop('gt', kind='bool') __ge__ = binop('ge', kind='bool') - __eq__ = binop('eq', kind='bool') - __ne__ = binop('ne', kind='bool') + __eq__ = binop('eq', kind='bool') # type: ignore[assignment] + __ne__ = binop('ne', kind='bool') # type: ignore[assignment] __lt__ = binop('gt', reversed=True, kind='bool') __le__ = binop('ge', reversed=True, kind='bool') class LeafNode(ExpressionNode): - leafNode = True + leafNode: ClassVar = True class VariableNode(LeafNode): - astType = 'variable' - - def __init__(self, value=None, kind=None, children=None): + astType: ClassVar = 'variable' + + def __init__( + self, + value: object | None = None, + kind: str | None = None, + children: None = None, + ) -> None: LeafNode.__init__(self, value=value, kind=kind) -class RawNode(): +class RawNode: """ Used to pass raw integers to interpreter. For instance, for selecting what function to use in func1. Purposely don't inherit from ExpressionNode, since we don't wan't this to be used for anything but being walked. """ - astType = 'raw' - astKind = 'none' + astType: ClassVar = 'raw' + astKind: ClassVar = 'none' - def __init__(self, value): + def __init__(self, value: object) -> None: self.value = value self.children = () - def __str__(self): + def __str__(self) -> str: return 'RawNode(%s)' % (self.value,) __repr__ = __str__ class ConstantNode(LeafNode): - astType = 'constant' + astType: ClassVar = 'constant' - def __init__(self, value=None, children=None): + def __init__(self, value: object | None = None, children: None = None): kind = getKind(value) # Python float constants are double precision by default if kind == 'float' and isinstance(value, float): kind = 'double' LeafNode.__init__(self, value=value, kind=kind) - def __neg__(self): + def __neg__(self) -> 'ConstantNode': # type: ignore[override] return ConstantNode(-self.value) - def __invert__(self): + def __invert__(self) -> 'ConstantNode': # type: ignore[override] return ConstantNode(~self.value) class OpNode(ExpressionNode): - astType = 'op' - - def __init__(self, opcode=None, args=None, kind=None): + astType: ClassVar = 'op' + + def __init__( + self, + opcode: str | None = None, + args: Iterable[ExpressionNode | RawNode] | None = None, + kind: str | None = None, + ) -> None: if (kind is None) and (args is not None): kind = commonKind(args) ExpressionNode.__init__(self, value=opcode, kind=kind, children=args) class FuncNode(OpNode): - def __init__(self, opcode=None, args=None, kind=None): + def __init__( + self, + opcode: str | None = None, + args: Iterable[ExpressionNode | RawNode] | None = None, + kind: str | None = None, + ) -> None: if (kind is None) and (args is not None): kind = commonKind(args) if opcode in ("isnan", "isfinite", "isinf", "signbit"): # bodge for boolean return functions From ea2c17bc8b69339f412eae834f6cc2a7c0536df4 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 21:44:11 +0200 Subject: [PATCH 118/166] annotate `interpreter.NumExpr` --- numexpr/interpreter.pyi | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/numexpr/interpreter.pyi b/numexpr/interpreter.pyi index 1b9b8af..64e1d89 100644 --- a/numexpr/interpreter.pyi +++ b/numexpr/interpreter.pyi @@ -1,5 +1,6 @@ -from typing import Final, Literal, TypeAlias - +import numpy.typing as npt +from collections.abc import Sequence +from typing import Any, Final, Literal, TypeAlias _VMLAccuracyMode: TypeAlias = Literal[0, 1, 2, 3] @@ -20,3 +21,31 @@ funccodes: Final[dict[bytes, int]] = ... maxdims: Final[int] = ... opcodes: Final[dict[bytes, int]] = ... use_vml: Final[bool] = ... + +class NumExpr: + signature: Final[bytes] + constsig: Final[bytes] + tempsig: Final[bytes] + fullsig: Final[bytes] + + program: Final[bytes] + constants: Final[Sequence[Any]] + input_names: Final[Sequence[str]] + + def __init__( + self, + signature: bytes, + tempsig: bytes, + program: bytes, + constants: Sequence[Any] = ..., + input_names: Sequence[str] | None = None, + ) -> None: ... + def run( + self, + *args: Any, + casting: str = ..., + order: str = ..., + ex_uses_vml: bool = ..., + out: npt.NDArray[Any] = ..., + ) -> Any: ... + __call__ = run From ba6dc39a4d326c821c300f655c0aa2d91d235e3a Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 21:44:24 +0200 Subject: [PATCH 119/166] annotate `necompiler` --- numexpr/necompiler.py | 304 +++++++++++++++++++++++++----------------- 1 file changed, 183 insertions(+), 121 deletions(-) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 8b80737..d0e8c8b 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -14,17 +14,19 @@ import re import sys import threading -from typing import Dict, Optional + +from typing import TYPE_CHECKING, Any, ClassVar, Final, Generator, Iterable, Iterator, TypeAlias +if TYPE_CHECKING: + from typing_extensions import Unpack import numpy -is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE +is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE from numexpr import expressions, interpreter, use_vml from numexpr.utils import CacheDict, ContextDict # Declare a double type that does not exist in Python space double = numpy.double -double = numpy.double int_ = numpy.int32 long_ = numpy.int64 @@ -86,7 +88,9 @@ ] -class ASTNode(): + + +class ASTNode: """Abstract Syntax Tree node. Members: @@ -98,16 +102,25 @@ class ASTNode(): children -- the children below this node reg -- the register assigned to the result for this node. """ - cmpnames = ['astType', 'astKind', 'value', 'children'] - - def __init__(self, astType='generic', astKind='unknown', value=None, children=()): + cmpnames: ClassVar = 'astType', 'astKind', 'value', 'children' + + astType: str + astKind: str + value: Any + children: tuple['ASTNode', ...] + reg: 'Register | None' + + def __init__(self, astType: str = 'generic', + astKind: str = 'unknown', + value: object | None = None, + children: Iterable['ASTNode'] = ()) -> None: self.astType = astType self.astKind = astKind self.value = value self.children = tuple(children) self.reg = None - def __eq__(self, other): + def __eq__(self, other: 'ASTNode') -> bool: # type: ignore[override] if self.astType == 'alias': self = self.value if other.astType == 'alias': @@ -119,50 +132,50 @@ def __eq__(self, other): return False return True - def __lt__(self,other): + def __lt__(self, other: 'ASTNode') -> bool: # RAM: this is a fix for issue #88 whereby sorting on constants # that may be of astKind == 'complex' but type(self.value) == int or float # Here we let NumPy sort as it will cast data properly for comparison # when the Python built-ins will raise an error. if self.astType == 'constant': if self.astKind == other.astKind: - return numpy.array(self.value) < numpy.array(other.value) + return bool(numpy.array(self.value) < numpy.array(other.value)) return self.astKind < other.astKind else: raise TypeError('Sorting not implemented for astType: %s'%self.astType) - def __hash__(self): + def __hash__(self) -> int: if self.astType == 'alias': self = self.value return hash((self.astType, self.astKind, self.value, self.children)) - def __str__(self): + def __str__(self) -> str: return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind, self.value, self.children, self.reg) - def __repr__(self): + def __repr__(self) -> str: return '' % id(self) - def key(self): + def key(self) -> tuple[str, str, Any, tuple['ASTNode', ...]]: return (self.astType, self.astKind, self.value, self.children) - def typecode(self): + def typecode(self) -> str: return kind_to_typecode[self.astKind] - def postorderWalk(self): + def postorderWalk(self) -> Iterator['ASTNode']: for c in self.children: for w in c.postorderWalk(): yield w yield self - def allOf(self, *astTypes): - astTypes = set(astTypes) + def allOf(self, *astTypes: str) -> Iterator['ASTNode']: + astTypeSet = set(astTypes) for w in self.postorderWalk(): - if w.astType in astTypes: + if w.astType in astTypeSet: yield w -def expressionToAST(ex): +def expressionToAST(ex: expressions.ExpressionNode | expressions.RawNode) -> ASTNode: """Take an expression tree made out of expressions.ExpressionNode, and convert to an AST tree. @@ -173,7 +186,7 @@ def expressionToAST(ex): [expressionToAST(c) for c in ex.children]) -def sigPerms(s): +def sigPerms(s: str) -> Generator[str, None, None]: """Generate all possible signatures derived by upcasting the given signature. """ @@ -192,7 +205,7 @@ def sigPerms(s): yield s -def typeCompileAst(ast): +def typeCompileAst(ast: ASTNode) -> ASTNode: """Assign appropriate types to each node in the AST. Will convert opcodes and functions to appropriate upcast version, @@ -235,7 +248,7 @@ def typeCompileAst(ast): [typeCompileAst(c) for c in children]) -class Register(): +class Register: """Abstraction for a register in the VM. Members: @@ -246,13 +259,18 @@ class Register(): None if no number assigned yet. """ - def __init__(self, astnode, temporary=False): + node: Final[ASTNode] + temporary: bool + immediate: bool + n: int | None + + def __init__(self, astnode: ASTNode, temporary: bool = False) -> None: self.node = astnode self.temporary = temporary self.immediate = False self.n = None - def __str__(self): + def __str__(self) -> str: if self.temporary: name = 'Temporary' else: @@ -260,7 +278,7 @@ def __str__(self): return '%s(%s, %s, %s)' % (name, self.node.astType, self.node.astKind, self.n,) - def __repr__(self): + def __repr__(self) -> str: return self.__str__() @@ -269,11 +287,11 @@ class Immediate(Register): a register. """ - def __init__(self, astnode): + def __init__(self, astnode: ASTNode) -> None: Register.__init__(self, astnode) self.immediate = True - def __str__(self): + def __str__(self) -> str: return 'Immediate(%d)' % (self.node.value,) @@ -282,7 +300,8 @@ def __str__(self): _attr_pat = r'\.\b(?!(real|imag|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|(\d*j))\b)' _blacklist_re = re.compile(f'{_flow_pat}|{_dunder_pat}|{_attr_pat}') -def stringToExpression(s, types, context, sanitize: bool=True): +def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], + sanitize: bool = True) -> expressions.ExpressionNode: """Given a string, convert it to a tree of ExpressionNode's. """ # sanitize the string for obvious attack vectors that NumExpr cannot @@ -306,7 +325,7 @@ def stringToExpression(s, types, context, sanitize: bool=True): flags = 0 c = compile(s, '', 'eval', flags) # make VariableNode's for the names - names = {} + names: dict[str, Any] = {} for name in c.co_names: if name == "None": names[name] = None @@ -320,10 +339,10 @@ def stringToExpression(s, types, context, sanitize: bool=True): names.update(expressions.functions) # now build the expression - ex = eval(c, names) + ex: expressions.ExpressionNode = eval(c, names) if expressions.isConstant(ex): - ex = expressions.ConstantNode(ex, expressions.getKind(ex)) + ex = expressions.ConstantNode(ex) elif not isinstance(ex, expressions.ExpressionNode): raise TypeError("unsupported expression type: %s" % type(ex)) finally: @@ -331,12 +350,12 @@ def stringToExpression(s, types, context, sanitize: bool=True): return ex -def isReduction(ast): +def isReduction(ast: ASTNode) -> bool: prefixes = (b'sum_', b'prod_', b'min_', b'max_') return any(ast.value.startswith(p) for p in prefixes) -def getInputOrder(ast, input_order=None): +def getInputOrder(ast: ASTNode, input_order: list[str] | None = None) -> list[ASTNode]: """ Derive the input order of the variables in an expression. """ @@ -359,16 +378,16 @@ def getInputOrder(ast, input_order=None): return ordered_variables -def convertConstantToKind(x, kind): +def convertConstantToKind(x: Any, kind: str) -> Any: # Exception for 'float' types that will return the NumPy float32 type if kind == 'float': return numpy.float32(x) - elif isinstance(x,str): + elif isinstance(x, str): return x.encode('ascii') return kind_to_type[kind](x) -def getConstants(ast): +def getConstants(ast: ASTNode) -> tuple[list[ASTNode], list[Any]]: """ RAM: implemented magic method __lt__ for ASTNode to fix issues #88 and #209. The following test code works now, as does the test suite. @@ -377,14 +396,17 @@ def getConstants(ast): a = 1 + 3j; b = 5.0 ne.evaluate('a*2 + 15j - b') """ - constant_registers = set([node.reg for node in ast.allOf("constant")]) - constants_order = sorted([r.node for r in constant_registers]) + constant_registers = {node.reg for node in ast.allOf("constant") + if node.reg is not None} + constants_order = sorted(r.node for r in constant_registers) constants = [convertConstantToKind(a.value, a.astKind) for a in constants_order] return constants_order, constants -def sortNodesByOrder(nodes, order): +# unused? +def sortNodesByOrder(nodes: Iterable[ASTNode], + order: Iterable[tuple[int, str, int]]) -> list[ASTNode]: order_map = {} for i, (_, v, _) in enumerate(order): order_map[v] = i @@ -393,11 +415,12 @@ def sortNodesByOrder(nodes, order): return [a[1] for a in dec_nodes] -def assignLeafRegisters(inodes, registerMaker): +def assignLeafRegisters(inodes: Iterable[ASTNode], + registerMaker: type[Register]) -> None: """ Assign new registers to each of the leaf nodes. """ - leafRegisters = {} + leafRegisters: dict[tuple[object, ...], Register] = {} for node in inodes: key = node.key() if key in leafRegisters: @@ -406,7 +429,8 @@ def assignLeafRegisters(inodes, registerMaker): node.reg = leafRegisters[key] = registerMaker(node) -def assignBranchRegisters(inodes, registerMaker): +def assignBranchRegisters(inodes: Iterable[ASTNode], + registerMaker: type[Register]) -> None: """ Assign temporary registers to each of the branch nodes. """ @@ -414,11 +438,11 @@ def assignBranchRegisters(inodes, registerMaker): node.reg = registerMaker(node, temporary=True) -def collapseDuplicateSubtrees(ast): +def collapseDuplicateSubtrees(ast: ASTNode) -> list[ASTNode]: """ Common subexpression elimination. """ - seen = {} + seen: dict[ASTNode, ASTNode] = {} aliases = [] for a in ast.allOf('op'): if a in seen: @@ -437,64 +461,68 @@ def collapseDuplicateSubtrees(ast): return aliases -def optimizeTemporariesAllocation(ast): +def optimizeTemporariesAllocation(ast: ASTNode) -> None: """ Attempt to minimize the number of temporaries needed, by reusing old ones. """ - nodes = [n for n in ast.postorderWalk() if n.reg.temporary] - users_of = dict((n.reg, set()) for n in nodes) + nodes = [n for n in ast.postorderWalk() if n.reg and n.reg.temporary] + users_of: dict[Register, set[ASTNode]] = {n.reg: set() for n in nodes if n.reg} - node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary)) - for n in nodes) if nodes and nodes[-1] is not ast: nodes_to_check = nodes + [ast] else: nodes_to_check = nodes for n in nodes_to_check: for c in n.children: - if c.reg.temporary: + if c.reg and c.reg.temporary: users_of[c.reg].add(n) - unused = dict([(tc, set()) for tc in scalar_constant_kinds]) + unused: dict[str, set[Register]] = {tc: set() for tc in scalar_constant_kinds} for n in nodes: for c in n.children: reg = c.reg - if reg.temporary: + if reg and reg.temporary: users = users_of[reg] users.discard(n) if not users: unused[reg.node.astKind].add(reg) if unused[n.astKind]: reg = unused[n.astKind].pop() - users_of[reg] = users_of[n.reg] + if n.reg: + users_of[reg] = users_of[n.reg] n.reg = reg -def setOrderedRegisterNumbers(order, start): +def setOrderedRegisterNumbers(order: Iterable[ASTNode], start: int) -> int: """ Given an order of nodes, assign register numbers. """ - for i, node in enumerate(order): - node.reg.n = start + i - return start + len(order) + i = -1 + for i, node in enumerate(order, start=start): + if node.reg: + node.reg.n = i + total = i + 1 + return total -def setRegisterNumbersForTemporaries(ast, start): +def setRegisterNumbersForTemporaries(ast: ASTNode, start: int) -> tuple[int, str]: """ Assign register numbers for temporary registers, keeping track of aliases and handling immediate operands. """ seen = 0 signature = '' - aliases = [] + aliases: list[ASTNode] = [] for node in ast.postorderWalk(): if node.astType == 'alias': aliases.append(node) node = node.value - if node.reg.immediate: - node.reg.n = node.value - continue reg = node.reg + if not reg: + continue + if reg.immediate: + reg.n = node.value + continue if reg.n is None: reg.n = start + seen seen += 1 @@ -504,7 +532,10 @@ def setRegisterNumbersForTemporaries(ast, start): return start + seen, signature -def convertASTtoThreeAddrForm(ast): +_ThreeAddressForm: TypeAlias = tuple[bytes, Register, Unpack[tuple[Register, ...]]] + + +def convertASTtoThreeAddrForm(ast: ASTNode) -> list[_ThreeAddressForm]: """ Convert an AST to a three address form. @@ -514,55 +545,59 @@ def convertASTtoThreeAddrForm(ast): I suppose this should be called three register form, but three address form is found in compiler theory. """ - return [(node.value, node.reg) + tuple([c.reg for c in node.children]) - for node in ast.allOf('op')] + return [(node.value, node.reg, *(c.reg for c in node.children if c.reg)) + for node in ast.allOf('op') if node.reg] -def compileThreeAddrForm(program): +def compileThreeAddrForm(program: Iterable[_ThreeAddressForm]) -> bytes: """ Given a three address form of the program, compile it a string that the VM understands. """ - def nToChr(reg): + def nToChr(reg: Register | None) -> bytes: if reg is None: return b'\xff' - elif reg.n < 0: + assert reg.n is not None + if reg.n < 0: raise ValueError("negative value for register number %s" % reg.n) - else: - return bytes([reg.n]) + return bytes([reg.n]) - def quadrupleToString(opcode, store, a1=None, a2=None): + def quadrupleToString(opcode: bytes, + store: Register | None, + a1: Register | None = None, + a2: Register | None = None) -> bytes: cop = chr(interpreter.opcodes[opcode]).encode('latin_1') cs = nToChr(store) ca1 = nToChr(a1) ca2 = nToChr(a2) return cop + cs + ca1 + ca2 - def toString(args): - while len(args) < 4: - args += (None,) - opcode, store, a1, a2 = args[:4] + def toString(args: _ThreeAddressForm) -> bytes: + opcode: bytes = args[0] + store: Register = args[1] + a1: Register | None = args[2] if len(args) > 2 else None + a2: Register | None = args[3] if len(args) > 3 else None + an: tuple[Register, ...] = args[4:] if len(args) > 4 else () s = quadrupleToString(opcode, store, a1, a2) l = [s] - args = args[4:] - while args: - s = quadrupleToString(b'noop', *args[:3]) + while an: + s = quadrupleToString(b'noop', *an[:3]) l.append(s) - args = args[3:] + an = an[3:] return b''.join(l) prog_str = b''.join([toString(t) for t in program]) return prog_str -context_info = [ +context_info: Final = ( ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'), - ('truediv', (False, True, 'auto'), 'auto') -] + ('truediv', (False, True, 'auto'), 'auto'), +) -def getContext(kwargs, _frame_depth=1): +def getContext(kwargs: dict[str, Any], _frame_depth: int = 1) -> dict[str, Any]: d = kwargs.copy() context = {} for name, allowed, default in context_info: @@ -581,11 +616,23 @@ def getContext(kwargs, _frame_depth=1): return context -def precompile(ex, signature=(), context={}, sanitize: bool=True): +_PrecompileResult: TypeAlias = tuple[ + list[_ThreeAddressForm], # threeAddrProgram + str, # inputsig + str, # tempsig + list[Any], # constants + tuple[str, ...], # input_names +] + + +def precompile(ex: expressions.ExpressionNode | str, + signature: Iterable[tuple[str, type]] = (), + context: dict[str, Any] = {}, + sanitize: bool = True) -> _PrecompileResult: """ Compile the expression to an intermediate form. """ - types = dict(signature) + types: dict[str, type] = dict(signature) input_order = [name for (name, type_) in signature] if isinstance(ex, str): @@ -614,14 +661,16 @@ def precompile(ex, signature=(), context={}, sanitize: bool=True): input_order = getInputOrder(ast, input_order) constants_order, constants = getConstants(ast) + assert ast.reg is not None + if isReduction(ast): ast.reg.temporary = False optimizeTemporariesAllocation(ast) ast.reg.temporary = False - r_output = 0 ast.reg.n = 0 + r_output = 0 r_inputs = r_output + 1 r_constants = setOrderedRegisterNumbers(input_order, r_inputs) @@ -630,12 +679,15 @@ def precompile(ex, signature=(), context={}, sanitize: bool=True): threeAddrProgram = convertASTtoThreeAddrForm(ast) input_names = tuple([a.value for a in input_order]) - signature = ''.join(type_to_typecode[types.get(x, default_type)] - for x in input_names) - return threeAddrProgram, signature, tempsig, constants, input_names + inputsig = ''.join(type_to_typecode[types.get(x, default_type)] + for x in input_names) + return threeAddrProgram, inputsig, tempsig, constants, input_names -def NumExpr(ex, signature=(), sanitize: bool=True, **kwargs): +def NumExpr(ex: expressions.ExpressionNode | str, + signature: Iterable[tuple[str, type]] = (), + sanitize: bool = True, + **kwargs: object) -> interpreter.NumExpr: """ Compile an expression built using E. variables to a function. @@ -659,23 +711,21 @@ def NumExpr(ex, signature=(), sanitize: bool=True, **kwargs): program, constants, input_names) -def disassemble(nex): +def disassemble(nex: interpreter.NumExpr) -> list[list[bytes | int | None]]: """ Given a NumExpr object, return a list which is the program disassembled. """ - rev_opcodes = {} - for op in interpreter.opcodes: - rev_opcodes[interpreter.opcodes[op]] = op + rev_opcodes = {code: op for op, code in interpreter.opcodes.items()} r_constants = 1 + len(nex.signature) r_temps = r_constants + len(nex.constants) - def parseOp(op): - name, sig = [*op.rsplit(b'_', 1), ''][:2] + def parseOp(op: bytes) -> tuple[bytes, bytes]: + name, sig, *_ = *op.rsplit(b'_', 1), b'' return name, sig - def getArg(pc, offset): + def getArg(pc: int, offset: int) -> int | bytes | None: arg = nex.program[pc + (offset if offset < 4 else offset+1)] - _, sig = parseOp(rev_opcodes.get(nex.program[pc])) + _, sig = parseOp(rev_opcodes[nex.program[pc]]) try: code = sig[offset - 1] except IndexError: @@ -699,9 +749,9 @@ def getArg(pc, offset): source = [] for pc in range(0, len(nex.program), 4): - op = rev_opcodes.get(nex.program[pc]) + op = rev_opcodes[nex.program[pc]] _, sig = parseOp(op) - parsed = [op] + parsed: list[bytes | int | None] = [op] for i in range(len(sig)): parsed.append(getArg(pc, 1 + i)) while len(parsed) < 4: @@ -710,7 +760,7 @@ def getArg(pc, offset): return source -def getType(a): +def getType(a: numpy.typing.NDArray[Any] | numpy.generic) -> type: kind = a.dtype.kind if kind == 'b': return bool @@ -733,7 +783,9 @@ def getType(a): raise ValueError("unknown type %s" % a.dtype.name) -def getExprNames(text, context, sanitize: bool=True): +def getExprNames(text: str, + context: dict[str, Any], + sanitize: bool = True) -> tuple[list[str], bool]: ex = stringToExpression(text, {}, context, sanitize) ast = expressionToAST(ex) input_order = getInputOrder(ast, None) @@ -751,7 +803,10 @@ def getExprNames(text, context, sanitize: bool=True): return [a.value for a in input_order], ex_uses_vml -def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): +def getArguments(names: Iterable[str], + local_dict: dict[str, Any] | None = None, + global_dict: dict[str, Any] | None = None, + _frame_depth: int = 2) -> list[numpy.typing.NDArray[Any]]: """ Get the arguments based on the names. """ @@ -795,14 +850,14 @@ def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): def validate(ex: str, - local_dict: Optional[Dict] = None, - global_dict: Optional[Dict] = None, - out: numpy.ndarray = None, + local_dict: dict[str, Any] | None = None, + global_dict: dict[str, Any] | None = None, + out: numpy.typing.NDArray[Any] | None = None, order: str = 'K', casting: str = 'safe', _frame_depth: int = 2, - sanitize: Optional[bool] = None, - **kwargs) -> Optional[Exception]: + sanitize: bool | None = None, + **kwargs: object) -> Exception | None: r""" Validate a NumExpr expression with the given `local_dict` or `locals()`. Returns `None` on success and the Exception object if one occurs. Note that @@ -849,7 +904,7 @@ def validate(ex: str, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. - sanitize: Optional[bool] + sanitize: bool | None Both `validate` and by extension `evaluate` call `eval(ex)`, which is potentially dangerous on unsanitized inputs. As such, NumExpr by default performs simple sanitization, banning the character ':;[', the @@ -913,14 +968,14 @@ def validate(ex: str, return None def evaluate(ex: str, - local_dict: Optional[Dict] = None, - global_dict: Optional[Dict] = None, - out: numpy.ndarray = None, + local_dict: dict[str, Any] | None = None, + global_dict: dict[str, Any] | None = None, + out: numpy.typing.NDArray[Any] | None = None, order: str = 'K', casting: str = 'same_kind', - sanitize: Optional[bool] = None, + sanitize: bool | None = None, _frame_depth: int = 3, - **kwargs) -> numpy.ndarray: + **kwargs: object) -> numpy.typing.NDArray[Any]: r""" Evaluate a simple array expression element-wise using the virtual machine. @@ -990,9 +1045,9 @@ def evaluate(ex: str, else: raise e -def re_evaluate(local_dict: Optional[Dict] = None, - global_dict: Optional[Dict] = None, - _frame_depth: int=2) -> numpy.ndarray: +def re_evaluate(local_dict: dict[str, Any] | None = None, + global_dict: dict[str, Any] | None = None, + _frame_depth: int = 2) -> numpy.typing.NDArray[Any]: """ Re-evaluate the previous executed array expression without any check. @@ -1010,13 +1065,20 @@ def re_evaluate(local_dict: Optional[Dict] = None, """ if not hasattr(_numexpr_last, 'l'): _numexpr_last.l = ContextDict() + ctx: ContextDict[Any] = _numexpr_last.l try: - compiled_ex = _numexpr_last.l['ex'] + compiled_ex = ctx['ex'] except KeyError: raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`") - argnames = _numexpr_last.l['argnames'] + assert compiled_ex is not None + + argnames = ctx['argnames'] + assert argnames is not None args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth) - kwargs = _numexpr_last.l['kwargs'] + + kwargs = ctx['kwargs'] + assert kwargs is not None + # with evaluate_lock: return compiled_ex(*args, **kwargs) From 910a31c0b5d9199d2d18b70b728447b6fe2dbc34 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 23:33:16 +0200 Subject: [PATCH 120/166] fix typing errors in the tests --- numexpr/__init__.py | 4 ++-- numexpr/tests/__init__.py | 7 +++++-- numexpr/tests/conftest.py | 2 +- numexpr/tests/test_numexpr.py | 1 + pyproject.toml | 16 ++++++++++++++++ 5 files changed, 25 insertions(+), 5 deletions(-) diff --git a/numexpr/__init__.py b/numexpr/__init__.py index a67c5cb..e998919 100644 --- a/numexpr/__init__.py +++ b/numexpr/__init__.py @@ -72,7 +72,7 @@ def print_versions() -> None: """Print the versions of software that numexpr relies on.""" try: import numexpr.tests - return numexpr.tests.print_versions() # type: ignore[attr-defined, no-untyped-call] + return numexpr.tests.print_versions() # type: ignore[no-untyped-call] except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') @@ -81,7 +81,7 @@ def test(verbosity: int = 1) -> "unittest.result.TestResult": """Run all the tests in the test suite.""" try: import numexpr.tests - return numexpr.tests.test(verbosity=verbosity) # type: ignore[attr-defined, no-untyped-call] + return numexpr.tests.test(verbosity=verbosity) # type: ignore[no-untyped-call] except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') diff --git a/numexpr/tests/__init__.py b/numexpr/tests/__init__.py index f47c8cc..a3606a7 100644 --- a/numexpr/tests/__init__.py +++ b/numexpr/tests/__init__.py @@ -8,7 +8,10 @@ # rights to use. #################################################################### -from numexpr.tests.test_numexpr import print_versions, test +from numexpr.tests.test_numexpr import ( + print_versions as print_versions, + test as test, +) if __name__ == '__main__': - test() + test() # type: ignore[no-untyped-call] diff --git a/numexpr/tests/conftest.py b/numexpr/tests/conftest.py index 3d32260..6cf5ca1 100644 --- a/numexpr/tests/conftest.py +++ b/numexpr/tests/conftest.py @@ -11,7 +11,7 @@ import pytest -def pytest_configure(config): +def pytest_configure(config: pytest.Config) -> None: config.addinivalue_line( "markers", "thread_unsafe: mark test as unsafe for parallel execution" ) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index be0b055..5c1ad05 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -9,6 +9,7 @@ # rights to use. #################################################################### +# mypy: ignore-errors import os import platform diff --git a/pyproject.toml b/pyproject.toml index 264a999..2fd24e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,3 +62,19 @@ test-command = ["python -m pytest --pyargs numexpr"] [[tool.cibuildwheel.overrides]] select = "cp31*t-*" test-command = ["python -m pytest --parallel-threads=4 --pyargs numexpr"] + +[tool.mypy] +exclude = ["bench", "build", "doc", "issues", "numexpr/tests"] +strict = true +disable_error_code = ["no-any-return"] +enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] +warn_unreachable = false +local_partial_types = true +allow_redefinition_new = true + +[tool.pyright] +exclude = ["bench", "build", "doc", "issues", "numexpr/tests"] +ignore = [".venv"] +stubPath = "." +reportPrivateUsage = false +reportConstantRedefinition = false From 35e3dc21459ecaeef4eec91269e0ad960b43e8b8 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 23:34:00 +0200 Subject: [PATCH 121/166] ignore mypy errors in `setup.py` --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 64d9f20..beeece3 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,8 @@ # rights to use. #################################################################### +# mypy: ignore-errors + import configparser import os import os.path as op From f30461e2624c995a0b1738410215163b7df2e262 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 23:35:11 +0200 Subject: [PATCH 122/166] configure mypy and pyright in `pyproject.toml` --- pyproject.toml | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2fd24e3..96dae1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,8 +63,10 @@ test-command = ["python -m pytest --pyargs numexpr"] select = "cp31*t-*" test-command = ["python -m pytest --parallel-threads=4 --pyargs numexpr"] + [tool.mypy] -exclude = ["bench", "build", "doc", "issues", "numexpr/tests"] +files = ["numexpr/*.py"] +exclude = ["^bench/", "^build/", "^doc/", "^issues/"] strict = true disable_error_code = ["no-any-return"] enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] @@ -72,9 +74,11 @@ warn_unreachable = false local_partial_types = true allow_redefinition_new = true + [tool.pyright] -exclude = ["bench", "build", "doc", "issues", "numexpr/tests"] -ignore = [".venv"] +include = ["numexpr"] +exclude = ["bench", "build", "doc", "issues", "numexpr/tests", "setup.py"] +ignore = [".venv", "numexpr/tests/test_numexpr.py", "setup.py"] stubPath = "." reportPrivateUsage = false reportConstantRedefinition = false From 49bc1816e3e6b5b8d7859df02a6e4c4ec3886342 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 23:43:03 +0200 Subject: [PATCH 123/166] run isort --- numexpr/__init__.py | 39 +++++++++++++++++---------------------- numexpr/cpuinfo.py | 15 ++------------- numexpr/expressions.py | 16 +++------------- numexpr/interpreter.pyi | 3 ++- numexpr/necompiler.py | 3 ++- numexpr/tests/__init__.py | 6 ++---- numexpr/utils.py | 15 ++------------- 7 files changed, 30 insertions(+), 67 deletions(-) diff --git a/numexpr/__init__.py b/numexpr/__init__.py index e998919..af9defc 100644 --- a/numexpr/__init__.py +++ b/numexpr/__init__.py @@ -22,16 +22,15 @@ """ from typing import TYPE_CHECKING, Final + if TYPE_CHECKING: import unittest # the `import _ as _` are needed for mypy to understand these are re-exports -from numexpr.interpreter import ( - __BLOCK_SIZE1__ as __BLOCK_SIZE1__, - MAX_THREADS as MAX_THREADS, - use_vml as use_vml, -) +from numexpr.interpreter import __BLOCK_SIZE1__ as __BLOCK_SIZE1__ +from numexpr.interpreter import MAX_THREADS as MAX_THREADS +from numexpr.interpreter import use_vml as use_vml is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE @@ -39,23 +38,19 @@ # to improve import times. from numexpr.expressions import E as E -from numexpr.necompiler import ( - NumExpr as NumExpr, - disassemble as disassemble, - evaluate as evaluate, - re_evaluate as re_evaluate, - validate as validate, -) -from numexpr.utils import ( - _init_num_threads, - detect_number_of_cores as detect_number_of_cores, - detect_number_of_threads as detect_number_of_threads, - get_num_threads as get_num_threads, - get_vml_version as get_vml_version, - set_num_threads as set_num_threads, - set_vml_accuracy_mode as set_vml_accuracy_mode, - set_vml_num_threads as set_vml_num_threads, -) +from numexpr.necompiler import NumExpr as NumExpr +from numexpr.necompiler import disassemble as disassemble +from numexpr.necompiler import evaluate as evaluate +from numexpr.necompiler import re_evaluate as re_evaluate +from numexpr.necompiler import validate as validate +from numexpr.utils import _init_num_threads +from numexpr.utils import detect_number_of_cores as detect_number_of_cores +from numexpr.utils import detect_number_of_threads as detect_number_of_threads +from numexpr.utils import get_num_threads as get_num_threads +from numexpr.utils import get_vml_version as get_vml_version +from numexpr.utils import set_num_threads as set_num_threads +from numexpr.utils import set_vml_accuracy_mode as set_vml_accuracy_mode +from numexpr.utils import set_vml_num_threads as set_vml_num_threads # Detect the number of cores ncores: Final = detect_number_of_cores() diff --git a/numexpr/cpuinfo.py b/numexpr/cpuinfo.py index 676e56e..9a4e5be 100755 --- a/numexpr/cpuinfo.py +++ b/numexpr/cpuinfo.py @@ -30,19 +30,8 @@ import subprocess import sys import warnings -from typing import ( - Any, - Callable, - ClassVar, - Container, - Final, - Generator, - NoReturn, - Sequence, - TypeAlias, - TypeVar, - overload, -) +from typing import (Any, Callable, ClassVar, Container, Final, Generator, + NoReturn, Sequence, TypeAlias, TypeVar, overload) _CMD: TypeAlias = str | Sequence[str] _Statuses: TypeAlias = Container[int] diff --git a/numexpr/expressions.py b/numexpr/expressions.py index 2ee272e..de276ce 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -12,20 +12,10 @@ import operator import threading - import types -from typing import ( - TYPE_CHECKING, - Any, - Callable, - ClassVar, - Final, - Iterable, - Mapping, - NoReturn, - TypeVar, - cast, -) +from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Final, Iterable, + Mapping, NoReturn, TypeVar, cast) + if TYPE_CHECKING: from typing_extensions import Self, TypeIs diff --git a/numexpr/interpreter.pyi b/numexpr/interpreter.pyi index 64e1d89..90f8d80 100644 --- a/numexpr/interpreter.pyi +++ b/numexpr/interpreter.pyi @@ -1,7 +1,8 @@ -import numpy.typing as npt from collections.abc import Sequence from typing import Any, Final, Literal, TypeAlias +import numpy.typing as npt + _VMLAccuracyMode: TypeAlias = Literal[0, 1, 2, 3] MAX_THREADS: Final[int] = ... diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index d0e8c8b..63d1cd8 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -14,8 +14,9 @@ import re import sys import threading +from typing import (TYPE_CHECKING, Any, ClassVar, Final, Generator, Iterable, + Iterator, TypeAlias) -from typing import TYPE_CHECKING, Any, ClassVar, Final, Generator, Iterable, Iterator, TypeAlias if TYPE_CHECKING: from typing_extensions import Unpack diff --git a/numexpr/tests/__init__.py b/numexpr/tests/__init__.py index a3606a7..9ecc21d 100644 --- a/numexpr/tests/__init__.py +++ b/numexpr/tests/__init__.py @@ -8,10 +8,8 @@ # rights to use. #################################################################### -from numexpr.tests.test_numexpr import ( - print_versions as print_versions, - test as test, -) +from numexpr.tests.test_numexpr import print_versions as print_versions +from numexpr.tests.test_numexpr import test as test if __name__ == '__main__': test() # type: ignore[no-untyped-call] diff --git a/numexpr/utils.py b/numexpr/utils.py index 3162730..83ae1bd 100644 --- a/numexpr/utils.py +++ b/numexpr/utils.py @@ -15,19 +15,8 @@ import contextvars import os import subprocess -from typing import ( - Final, - Generic, - ItemsView, - Iterable, - Iterator, - KeysView, - Literal, - TypeVar, - ValuesView, - cast, - overload, -) +from typing import (Final, Generic, ItemsView, Iterable, Iterator, KeysView, + Literal, TypeVar, ValuesView, cast, overload) from numexpr import use_vml from numexpr.interpreter import MAX_THREADS, _get_num_threads, _set_num_threads From 10d949adb2362c866953e32eba50b2987ab08751 Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 23:43:42 +0200 Subject: [PATCH 124/166] re-enable mypy pre-commit hook :( --- .pre-commit-config.yaml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 97f6d37..72de0d7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,9 +18,10 @@ repos: hooks: - id: isort -# Too many things to fix, let's just ignore it for now -#- repo: https://github.com/pre-commit/mirrors-mypy -# rev: v1.8.0 -# hooks: -# - id: mypy -# exclude: ^(docs/|setup.py) +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.18.2 + hooks: + - id: mypy + args: [--config-file=pyproject.toml] + exclude: ^(bench/|build/|doc/|issues/|setup.py) + additional_dependencies: [numpy, pytest] From 895df61a96d28b74b636d0c9b3e12eb8d3e86d4b Mon Sep 17 00:00:00 2001 From: jorenham Date: Wed, 24 Sep 2025 23:55:58 +0200 Subject: [PATCH 125/166] add the `py.typed` --- MANIFEST.in | 2 +- numexpr/py.typed | 0 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 numexpr/py.typed diff --git a/MANIFEST.in b/MANIFEST.in index 11f9a15..886a494 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,7 @@ include MANIFEST.in VERSION include *.rst *.txt *.cfg site.cfg.example -recursive-include numexpr *.cpp *.hpp *.py *.pyi +recursive-include numexpr *.cpp *.hpp *.py *.pyi py.typed recursive-include numexpr/win32 *.c *.h exclude numexpr/__config__.py RELEASING.txt site.cfg diff --git a/numexpr/py.typed b/numexpr/py.typed new file mode 100644 index 0000000..e69de29 From 9ace416a6237a1317f93f4f8e88679600b5fde81 Mon Sep 17 00:00:00 2001 From: jorenham Date: Thu, 25 Sep 2025 00:05:22 +0200 Subject: [PATCH 126/166] run mypy and pyright in CI --- .github/workflows/typecheck.yml | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/typecheck.yml diff --git a/.github/workflows/typecheck.yml b/.github/workflows/typecheck.yml new file mode 100644 index 0000000..a6ca065 --- /dev/null +++ b/.github/workflows/typecheck.yml @@ -0,0 +1,36 @@ +name: Validate static types +permissions: read-all + +on: + pull_request: + paths: + - .github/workflows/typecheck.yml + - numexpr/* + - pyproject.toml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + typecheck_quaddtype: + runs-on: ubuntu-latest + timeout-minutes: 2 + + steps: + - uses: actions/checkout@v5.0.0 + + - uses: astral-sh/setup-uv@v6.7.0 + with: + activate-environment: true + python-version: "3.10" + + - name: install + run: uv pip install mypy pyright pytest . + + - name: pyright + run: pyright + + - name: mypy + run: mypy --no-incremental --cache-dir=/dev/null . From a525d352c7f5524b0cc4e2b4df2d60dc965c4443 Mon Sep 17 00:00:00 2001 From: jorenham Date: Thu, 25 Sep 2025 00:07:36 +0200 Subject: [PATCH 127/166] add `Typing :: Typed` project metadata classifier --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 96dae1f..e5a7d18 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ classifiers = [ "Intended Audience :: Science/Research", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", + "Typing :: Typed", "Operating System :: Microsoft :: Windows", "Operating System :: Unix", "Programming Language :: Python :: 3", From 982118cc39bf0003e688032e8cd1373454d58d54 Mon Sep 17 00:00:00 2001 From: jorenham Date: Thu, 25 Sep 2025 00:12:56 +0200 Subject: [PATCH 128/166] add release note for the added static typing support --- RELEASE_NOTES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 8ed7222..654cc85 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -7,6 +7,10 @@ Changes from 2.13.0 to 2.13.1 * **Under development.** +* Static typing support has been added, making NumExpr compatible with + static type checkers like `mypy` and `pyright`. + Thanks to Joren Hammudoglu (@jorenham) for the work. + Changes from 2.12.1 to 2.13.0 ----------------------------- From d8161ba2ea4d0cfbb9a95f2b642fd3045e78b294 Mon Sep 17 00:00:00 2001 From: jorenham Date: Thu, 25 Sep 2025 00:31:37 +0200 Subject: [PATCH 129/166] add missing annotation quotes --- numexpr/expressions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index de276ce..ba77b5b 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -77,7 +77,7 @@ def get_optimization() -> str: # helper functions for creating __magic__ methods def ophelper(f: Callable[..., _T]) -> Callable[..., _T]: - def func(*args: ExpressionNode) -> _T: + def func(*args: 'ExpressionNode') -> _T: arglist = list(args) for i, x in enumerate(args): if isConstant(x): From 17c206a113cadd7c1afed082979649740bcdfc57 Mon Sep 17 00:00:00 2001 From: jorenham Date: Thu, 25 Sep 2025 00:46:10 +0200 Subject: [PATCH 130/166] stringify remaining problematic annotations --- numexpr/expressions.py | 4 ++-- numexpr/necompiler.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index ba77b5b..fee72af 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -177,7 +177,7 @@ def binop( opfunc = getattr(operator, "__%s__" % opname) @ophelper - def operation(self: ExpressionNode, other: ExpressionNode) -> ExpressionNode: + def operation(self: 'ExpressionNode', other: 'ExpressionNode') -> 'ExpressionNode': if reversed: self, other = other, self if allConstantNodes([self, other]): @@ -192,7 +192,7 @@ def func( func: Callable[..., Any], minkind: str | None = None, maxkind: str | None = None ) -> Callable[..., 'FuncNode | ConstantNode']: @ophelper - def function(*args: ExpressionNode) -> 'FuncNode | ConstantNode': + def function(*args: 'ExpressionNode') -> 'FuncNode | ConstantNode': if allConstantNodes(args): return ConstantNode(func(*[x.value for x in args])) kind = commonKind(args) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 63d1cd8..e2f1767 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -533,7 +533,7 @@ def setRegisterNumbersForTemporaries(ast: ASTNode, start: int) -> tuple[int, str return start + seen, signature -_ThreeAddressForm: TypeAlias = tuple[bytes, Register, Unpack[tuple[Register, ...]]] +_ThreeAddressForm: TypeAlias = tuple[bytes, Register, 'Unpack[tuple[Register, ...]]'] def convertASTtoThreeAddrForm(ast: ASTNode) -> list[_ThreeAddressForm]: From f360f1240f460334c380bd1b81d38c5e38deefae Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Thu, 25 Sep 2025 09:32:56 +0200 Subject: [PATCH 131/166] Update RELEASE_NOTES to reflect changes in functionality Removed 'fmod' from the list of functions enabling integer outputs for integer inputs. --- RELEASE_NOTES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index d3effc3..0690e5c 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -15,7 +15,7 @@ Changes from 2.12.1 to 2.13.0 * Bitwise operators (and, or, not, xor): `&, |, ~, ^` * New binary arithmetic operator for floor division: `//` * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, `minimum`, `log2`, `trunc`, `round` and `sign`. - * Also enables integer outputs for integer inputs for `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. + * Also enables integer outputs for integer inputs for `abs`, `copy`, `ones_like`, `sign` and `round`. Thanks to Luke Shaw for the contributions. From 36ca8d1f16c7523a615e4df1d9ab9becfdab10fc Mon Sep 17 00:00:00 2001 From: jorenham Date: Thu, 25 Sep 2025 19:33:59 +0200 Subject: [PATCH 132/166] revert problematic functional changes in `setOrderedRegisterNumbers` --- numexpr/necompiler.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index e2f1767..77f655b 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -15,7 +15,7 @@ import sys import threading from typing import (TYPE_CHECKING, Any, ClassVar, Final, Generator, Iterable, - Iterator, TypeAlias) + Iterator, Sequence, TypeAlias) if TYPE_CHECKING: from typing_extensions import Unpack @@ -494,16 +494,14 @@ def optimizeTemporariesAllocation(ast: ASTNode) -> None: n.reg = reg -def setOrderedRegisterNumbers(order: Iterable[ASTNode], start: int) -> int: +def setOrderedRegisterNumbers(order: Sequence[ASTNode], start: int) -> int: """ Given an order of nodes, assign register numbers. """ - i = -1 - for i, node in enumerate(order, start=start): - if node.reg: - node.reg.n = i - total = i + 1 - return total + for i, node in enumerate(order): + assert node.reg is not None + node.reg.n = start + i + return start + len(order) def setRegisterNumbersForTemporaries(ast: ASTNode, start: int) -> tuple[int, str]: From f114e9f49f759d543d80950f39ff01cff00ddd32 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 30 Sep 2025 12:15:01 +0200 Subject: [PATCH 133/166] Add nan handling to maximum/minimum --- numexpr/bespoke_functions.hpp | 45 ++++++++++++++++ numexpr/functions.hpp | 8 +-- numexpr/msvc_function_stubs.hpp | 96 ++++++++++++++++++--------------- numexpr/numexpr_config.hpp | 10 ++-- numexpr/tests/test_numexpr.py | 3 ++ 5 files changed, 112 insertions(+), 50 deletions(-) diff --git a/numexpr/bespoke_functions.hpp b/numexpr/bespoke_functions.hpp index e404edc..26f784e 100644 --- a/numexpr/bespoke_functions.hpp +++ b/numexpr/bespoke_functions.hpp @@ -35,6 +35,51 @@ inline long fabsl(long x) {return x<0 ? -x: x;} // inline long fmodl(long x, long y) {return (long)fmodf((long)x, (long)y);} #ifdef USE_VML +// To match Numpy behaviour for NaNs +static void vsFmax_(MKL_INT n, const float* x1, const float* x2, float* dest) +{ + vsFmax(n, x1, x2, dest); + MKL_INT j; + for (j=0; j are actually #define'd and are not usable as function pointers :-/ */ -#if _MSC_VER < 1400 // 1310 == MSVC 7.1 -/* Apparently, single precision functions are not included in MSVC 7.1 */ - -#define sqrtf(x) ((float)sqrt((double)(x))) -#define sinf(x) ((float)sin((double)(x))) -#define cosf(x) ((float)cos((double)(x))) -#define tanf(x) ((float)tan((double)(x))) -#define asinf(x) ((float)asin((double)(x))) -#define acosf(x) ((float)acos((double)(x))) -#define atanf(x) ((float)atan((double)(x))) -#define sinhf(x) ((float)sinh((double)(x))) -#define coshf(x) ((float)cosh((double)(x))) -#define tanhf(x) ((float)tanh((double)(x))) -#define asinhf(x) ((float)asinh((double)(x))) -#define acoshf(x) ((float)acosh((double)(x))) -#define atanhf(x) ((float)atanh((double)(x))) -#define logf(x) ((float)log((double)(x))) -#define log1pf(x) ((float)log1p((double)(x))) -#define log10f(x) ((float)log10((double)(x))) -#define log2f(x) ((float)log2((double)(x))) -#define expf(x) ((float)exp((double)(x))) -#define expm1f(x) ((float)expm1((double)(x))) -#define fabsf(x) ((float)fabs((double)(x))) -#define fmodf(x, y) ((float)fmod((double)(x), (double)(y))) -#define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) -#define hypotf(x, y) ((float)hypot((double)(x), (double)(y))) -#define copysignf(x, y) ((float)copysign((double)(x), (double)(y))) -#define nextafterf(x, y) ((float)nextafter((double)(x), (double)(y))) -#define fmaxf(x, y) ((float)fmaxd((double)(x), (double)(y))) -#define fminf(x, y) ((float)fmind((double)(x), (double)(y))) -#define ceilf(x) ((float)ceil((double)(x))) -#define hypotf(x) ((float)hypot((double)(x))) -#define rintf(x) ((float)rint((double)(x))) -#define truncf(x) ((float)trunc((double)(x))) - - -/* The next are directly called from interp_body.cpp */ -#define powf(x, y) ((float)pow((double)(x), (double)(y))) -#define floorf(x) ((float)floor((double)(x))) -#endif // _MSC_VER < 1400 - /* Due to casting problems (normally return ints not bools, easiest to define non-overloaded wrappers for these functions) */ // MSVC version: use global ::isfinite / ::isnan @@ -67,6 +26,57 @@ inline bool isnand(double x) { return !!::_isnan(x); } inline bool isinfd(double x) { return !!::isinf(x); } inline bool isinff_(float x) { return !!::isinf(x); } +// To handle overloading of fmax/fmin in cmath and match NumPy behaviour for NaNs +inline double fmaxd(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmax(x, y); } +inline double fmind(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmin(x, y); } + + +#if _MSC_VER < 1400 // 1310 == MSVC 7.1 + /* Apparently, single precision functions are not included in MSVC 7.1 */ + + #define sqrtf(x) ((float)sqrt((double)(x))) + #define sinf(x) ((float)sin((double)(x))) + #define cosf(x) ((float)cos((double)(x))) + #define tanf(x) ((float)tan((double)(x))) + #define asinf(x) ((float)asin((double)(x))) + #define acosf(x) ((float)acos((double)(x))) + #define atanf(x) ((float)atan((double)(x))) + #define sinhf(x) ((float)sinh((double)(x))) + #define coshf(x) ((float)cosh((double)(x))) + #define tanhf(x) ((float)tanh((double)(x))) + #define asinhf(x) ((float)asinh((double)(x))) + #define acoshf(x) ((float)acosh((double)(x))) + #define atanhf(x) ((float)atanh((double)(x))) + #define logf(x) ((float)log((double)(x))) + #define log1pf(x) ((float)log1p((double)(x))) + #define log10f(x) ((float)log10((double)(x))) + #define log2f(x) ((float)log2((double)(x))) + #define expf(x) ((float)exp((double)(x))) + #define expm1f(x) ((float)expm1((double)(x))) + #define fabsf(x) ((float)fabs((double)(x))) + #define fmodf(x, y) ((float)fmod((double)(x), (double)(y))) + #define atan2f(x, y) ((float)atan2((double)(x), (double)(y))) + #define hypotf(x, y) ((float)hypot((double)(x), (double)(y))) + #define copysignf(x, y) ((float)copysign((double)(x), (double)(y))) + #define nextafterf(x, y) ((float)nextafter((double)(x), (double)(y))) + #define ceilf(x) ((float)ceil((double)(x))) + #define hypotf(x) ((float)hypot((double)(x))) + #define rintf(x) ((float)rint((double)(x))) + #define truncf(x) ((float)trunc((double)(x))) + + + /* The next are directly called from interp_body.cpp */ + #define powf(x, y) ((float)pow((double)(x), (double)(y))) + #define floorf(x) ((float)floor((double)(x))) + + #define fmaxf_(x, y) ((float)fmaxd((double)(x), (double)(y))) // define fmaxf_ since fmaxf doesn't exist for early MSVC + #define fminf_(x, y) ((float)fmind((double)(x), (double)(y))) +#else + inline float fmaxf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fmaxf(x, y); } + inline float fminf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fminf(x, y); } +#endif // _MSC_VER < 1400 + + /* Now the actual stubs */ inline float sqrtf2(float x) { @@ -170,11 +180,11 @@ inline float copysignf2(float x, float y) { } inline float fmaxf2(float x, float y) { - return fmaxf(x, y); + return fmaxf_(x, y); } inline float fminf2(float x, float y) { - return fminf(x, y); + return fminf_(x, y); } diff --git a/numexpr/numexpr_config.hpp b/numexpr/numexpr_config.hpp index 4ed64ab..5df0c35 100644 --- a/numexpr/numexpr_config.hpp +++ b/numexpr/numexpr_config.hpp @@ -43,9 +43,7 @@ #include //no single precision version of signbit in C++ standard inline bool signbitf(float x) { return signbit((double)x); } -// To handle overloading of fmax/fmin in cmath -inline double fmaxd(double x, double y) { return fmax(x, y); } -inline double fmind(double x, double y) { return fmin(x, y); } + #ifdef _WIN32 #ifndef __MINGW32__ #include "missing_posix_functions.hpp" @@ -62,6 +60,12 @@ inline bool isfinited(double x) { return !!std::isfinite(x); } inline bool isnand(double x) { return !!std::isnan(x); } inline bool isinff_(float x) { return !!std::isinf(x); } inline bool isinfd(double x) { return !!std::isinf(x); } + +// To handle overloading of fmax/fmin in cmath and match NumPy behaviour for NaNs +inline double fmaxd(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmax(x, y); } +inline double fmind(double x, double y) { return (isnand(x) | isnand(y))? NAN : fmin(x, y); } +inline float fmaxf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fmaxf(x, y); } +inline float fminf_(float x, float y) { return (isnanf_(x) | isnanf_(y))? NAN : fminf(x, y); } #endif #endif // NUMEXPR_CONFIG_HPP diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index be0b055..9af2c0e 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -484,6 +484,9 @@ def test_maximum_minimum(self): for dtype in [float, double, int, np.int64]: x = arange(10, dtype=dtype) y = 2 * arange(10, dtype=dtype)[::-1] + if dtype in (float, double): + y[5] = np.nan + x[2] = np.nan assert_array_equal(evaluate("maximum(x,y)"), maximum(x,y)) assert_array_equal(evaluate("minimum(x,y)"), minimum(x,y)) From f1487d4dc52f141e21eeeaff6fbbdc06755b65da Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 30 Sep 2025 14:43:12 +0200 Subject: [PATCH 134/166] Fix boolean add/mult --- numexpr/expressions.py | 3 +++ numexpr/tests/test_numexpr.py | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/numexpr/expressions.py b/numexpr/expressions.py index f11dd6c..cab0247 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -531,6 +531,9 @@ class OpNode(ExpressionNode): def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) + if kind=='bool': # handle bool*bool and bool+bool cases + opcode = 'and' if opcode=='mul' else opcode + opcode = 'or' if opcode=='add' else opcode ExpressionNode.__init__(self, value=opcode, kind=kind, children=args) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 9af2c0e..c607835 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -490,6 +490,12 @@ def test_maximum_minimum(self): assert_array_equal(evaluate("maximum(x,y)"), maximum(x,y)) assert_array_equal(evaluate("minimum(x,y)"), minimum(x,y)) + def test_addmult_booleans(self): + x = np.asarray([0, 1, 0, 0, 1], dtype=bool) + y = x[::-1] + assert_array_equal(evaluate("x * y"), x * y) + assert_array_equal(evaluate("x + y"), x + y) + def test_sign_round(self): for dtype in [float, double, np.int32, np.int64, complex]: x = arange(10, dtype=dtype) From 74149e71047482ea36fe651bea55f5bea37010f9 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 30 Sep 2025 15:27:24 +0200 Subject: [PATCH 135/166] Fix numerical stability for tan/tanh --- numexpr/complex_functions.hpp | 63 ++++++++++++++++++----------------- numexpr/tests/test_numexpr.py | 7 ++++ 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/numexpr/complex_functions.hpp b/numexpr/complex_functions.hpp index 42775e2..e0ff8a2 100644 --- a/numexpr/complex_functions.hpp +++ b/numexpr/complex_functions.hpp @@ -390,42 +390,45 @@ nc_sinh(std::complex *x, std::complex *r) static void nc_tan(std::complex *x, std::complex *r) { - double sr,cr,shi,chi; - double rs,is,rc,ic; - double d; - double xr=x->real(), xi=x->imag(); - sr = sin(xr); - cr = cos(xr); - shi = sinh(xi); - chi = cosh(xi); - rs = sr*chi; - is = cr*shi; - rc = cr*chi; - ic = -sr*shi; - d = rc*rc + ic*ic; - r->real((rs*rc+is*ic)/d); - r->imag((is*rc-rs*ic)/d); + double xr = x->real(); + double xi = x->imag(); + double imag_part; + + double denom = cos(2*xr) + cosh(2*xi); + // handle overflows + if (xi > 20) { + imag_part = 1.0 / (1.0 + exp(-4*xi)); + } else if (xi < -20) { + imag_part = -1.0 / (1.0 + exp(4*xi)); + } else { + imag_part = sinh(2*xi) / denom; + } + double real_part = sin(2*xr) / denom; + + r->real(real_part); + r->imag(imag_part); return; } static void nc_tanh(std::complex *x, std::complex *r) { - double si,ci,shr,chr; - double rs,is,rc,ic; - double d; - double xr=x->real(), xi=x->imag(); - si = sin(xi); - ci = cos(xi); - shr = sinh(xr); - chr = cosh(xr); - rs = ci*shr; - is = si*chr; - rc = ci*chr; - ic = si*shr; - d = rc*rc + ic*ic; - r->real((rs*rc+is*ic)/d); - r->imag((is*rc-rs*ic)/d); + double xr = x->real(); + double xi = x->imag(); + double real_part; + double denom = cosh(2*xr) + cos(2*xi); + // handle overflows + if (xr > 20) { + real_part = 1.0 / (1.0 + exp(-4*xr)); + } else if (xr < -20) { + real_part = -1.0 / (1.0 + exp(4*xr)); + } else { + real_part = sinh(2*xr) / denom; + } + double imag_part = sin(2*xi) / denom; + + r->real(real_part); + r->imag(imag_part); return; } diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index be0b055..e8e711a 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -480,6 +480,13 @@ def test_bitwise_operators(self): assert_array_equal(evaluate("x | y"), x | y) # or assert_array_equal(evaluate("~x"), ~x) # invert + def test_complex_tan(self): + # old version of NumExpr had overflow problems + x = np.arange(1, 400., step=16., dtype=np.complex128) + y = 1j*np.arange(1, 400., step=16., dtype=np.complex128) + assert_array_almost_equal(evaluate("tan(x + y)"), tan(x + y)) + assert_array_almost_equal(evaluate("tanh(x + y)"), tanh(x + y)) + def test_maximum_minimum(self): for dtype in [float, double, int, np.int64]: x = arange(10, dtype=dtype) From 780908af3daf970391715bced9621f06157307cc Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 30 Sep 2025 15:33:23 +0200 Subject: [PATCH 136/166] Make test stricter --- numexpr/tests/test_numexpr.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index c607835..5e60800 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -493,8 +493,14 @@ def test_maximum_minimum(self): def test_addmult_booleans(self): x = np.asarray([0, 1, 0, 0, 1], dtype=bool) y = x[::-1] - assert_array_equal(evaluate("x * y"), x * y) - assert_array_equal(evaluate("x + y"), x + y) + res_ne = evaluate("x * y") + res_np = x * y + assert_array_equal(res_ne, res_np) + assert res_ne.dtype == res_np.dtype + res_ne = evaluate("x + y") + res_np = x + y + assert_array_equal(res_ne, res_np) + assert res_ne.dtype == res_np.dtype def test_sign_round(self): for dtype in [float, double, np.int32, np.int64, complex]: From aefd3e62f3a7e18362c83c6e22baf416e2c7d668 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 30 Sep 2025 17:52:55 +0200 Subject: [PATCH 137/166] Announcing --- ANNOUNCE.rst | 7 +++---- RELEASE_NOTES.rst | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index a668477..c759b7b 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,12 +1,11 @@ ========================= -Announcing NumExpr 2.13.0 +Announcing NumExpr 2.13. ========================= Hi everyone, -NumExpr 2.13.0 introduced a bunch of new features including new -bitwise operators (&, |, ^, ~), floor division (//). It also adds -many new functions (like hypot, log2, maximum, minimum, nextafter...). +NumExpr 2.13.1 introduces a couple of patches for maximum/minimum and +multiplication/addition for booleans to match NumPy behaviour. Thanks to Luke Shaw for these contributions. Project documentation is available at: diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 0690e5c..f1a358c 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5,8 +5,8 @@ Release notes for NumExpr 2.13 series Changes from 2.13.0 to 2.13.1 ----------------------------- -* **Under development.** - +* Patch to maximum/minimum functions in order to match NumPy NaN handling +* Patch to convert '+'->'|' and '*'->'&' for booleans Changes from 2.12.1 to 2.13.0 ----------------------------- From 64eb4e1ec1a2591621ac353d251ed7946b6bc188 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Tue, 30 Sep 2025 17:55:55 +0200 Subject: [PATCH 138/166] Correct version number --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 6dea080..94f15e9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.13.1.dev0 +2.13.1 From 2d24f28f8ecb0981472fb005fd7aeca4db5832d5 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Tue, 30 Sep 2025 20:40:41 +0200 Subject: [PATCH 139/166] Post 2.13.1 release actions done --- RELEASE_NOTES.rst | 5 +++++ VERSION | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index f1a358c..dc61a25 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,11 @@ Release notes for NumExpr 2.13 series ===================================== +Changes from 2.13.1 to 2.13.2 +----------------------------- + +* **Under development.** + Changes from 2.13.0 to 2.13.1 ----------------------------- diff --git a/VERSION b/VERSION index 94f15e9..aa9132d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.13.1 +2.13.2.dev0 From 18053b21438677be65beeb05910c3164b081f240 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Oct 2025 19:45:58 +0000 Subject: [PATCH 140/166] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pycqa/isort: 6.0.1 → 6.1.0](https://github.com/pycqa/isort/compare/6.0.1...6.1.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 72de0d7..35c3d40 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: # - id: flake8 # - repo: https://github.com/pycqa/isort - rev: 6.0.1 + rev: 6.1.0 hooks: - id: isort From 4a5866550ca940b8f9cc38facffca4d58628d56a Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 10:37:35 +0200 Subject: [PATCH 141/166] Getting ready for release 2.14.0 --- ANNOUNCE.rst | 24 ++++++++++-------------- RELEASE_NOTES.rst | 7 ++++--- VERSION | 2 +- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index c759b7b..b08aeaf 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,31 +1,27 @@ ========================= -Announcing NumExpr 2.13. +Announcing NumExpr 2.14. ========================= Hi everyone, -NumExpr 2.13.1 introduces a couple of patches for maximum/minimum and -multiplication/addition for booleans to match NumPy behaviour. -Thanks to Luke Shaw for these contributions. +NumExpr 2.14.0 introduces a couple of patches for tan/tanh and +adds static typing support. +Thanks to Luke Shaw and Joren Hammudoglu (@jorenham) for these contributions. Project documentation is available at: https://numexpr.readthedocs.io/ -Changes from 2.12.1 to 2.13.0 +Changes from 2.13.1 to 2.14.0 ----------------------------- -* New functionality has been added: - * Bitwise operators (and, or, not, xor): `&, |, ~, ^` - * New binary arithmetic operator for floor division: `//` - * New functions: `signbit`, `hypot`, `copysign`, `nextafter`, `maximum`, - `minimum`, `log2`, `trunc`, `round` and `sign`. - * Also enables integer outputs for integer inputs for - `abs`, `fmod`, `copy`, `ones_like`, `sign` and `round`. +* Numerical stability for overflow has been improved for ``tan`` and ``tanh`` + to handle possible overflows for complex numbers. - Thanks to Luke Shaw for the contributions. +* Static typing support has been added, making NumExpr compatible with + static type checkers like `mypy` and `pyright`. + Thanks to Joren Hammudoglu (@jorenham) for the work. -* New wheels for Python 3.14 and 3.14t are provided. What's Numexpr? --------------- diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index c937db2..b7faa31 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -1,11 +1,12 @@ ===================================== -Release notes for NumExpr 2.13 series +Release notes for NumExpr 2.14 series ===================================== -Changes from 2.13.1 to 2.13.2 +Changes from 2.13.1 to 2.14.0 ----------------------------- -* **Under development.** +* Numerical stability for overflow has been improved for ``tan`` and ``tanh`` + to handle possible overflows for complex numbers. * Static typing support has been added, making NumExpr compatible with static type checkers like `mypy` and `pyright`. diff --git a/VERSION b/VERSION index aa9132d..edcfe40 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.13.2.dev0 +2.14.0 From 4154b7b3e91d1b60873b588cb0d355c876b41235 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 10:45:20 +0200 Subject: [PATCH 142/166] Getting ready for release 2.14.0 --- RELEASE_NOTES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index b7faa31..aa63243 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5,7 +5,7 @@ Release notes for NumExpr 2.14 series Changes from 2.13.1 to 2.14.0 ----------------------------- -* Numerical stability for overflow has been improved for ``tan`` and ``tanh`` +* Numerical stability for overflow has been improved for ``tan`` / ``tanh`` to handle possible overflows for complex numbers. * Static typing support has been added, making NumExpr compatible with From cd6c16d05a0de7f047cf03660844bd4253316ae8 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 13:23:49 +0200 Subject: [PATCH 143/166] Post 2.14.0 release actions done --- RELEASE_NOTES.rst | 6 ++++++ VERSION | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index aa63243..1b11c43 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,12 @@ Release notes for NumExpr 2.14 series ===================================== +Changes from 2.14.0 to 2.14.1 +----------------------------- + +* **Under development.** + + Changes from 2.13.1 to 2.14.0 ----------------------------- diff --git a/VERSION b/VERSION index edcfe40..7e52938 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.14.0 +2.14.1.dev0 From fcf995afc92f45048f1ce690eca50e960d829227 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 13:24:09 +0200 Subject: [PATCH 144/166] Post 2.14.0 release actions done --- ANNOUNCE.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index b08aeaf..7ce676c 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -4,7 +4,7 @@ Announcing NumExpr 2.14. Hi everyone, -NumExpr 2.14.0 introduces a couple of patches for tan/tanh and +NumExpr 2.14.0 introduces a couple of patches for tan / tanh and adds static typing support. Thanks to Luke Shaw and Joren Hammudoglu (@jorenham) for these contributions. @@ -15,7 +15,7 @@ https://numexpr.readthedocs.io/ Changes from 2.13.1 to 2.14.0 ----------------------------- -* Numerical stability for overflow has been improved for ``tan`` and ``tanh`` +* Numerical stability for overflow has been improved for ``tan`` and ``tanh`` to handle possible overflows for complex numbers. * Static typing support has been added, making NumExpr compatible with From bf8d7a6c8c74de742fd414b5a2726656073809b1 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 14:24:09 +0200 Subject: [PATCH 145/166] Add test for numpy 1.23 --- .github/workflows/build.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bfdd27d..48f781c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -7,7 +7,7 @@ permissions: jobs: build_wheels: - name: Build wheels on ${{ matrix.os }} for ${{ matrix.arch }} + name: Build and test on ${{ matrix.os }}${{ matrix.numpy-version && format(' (numpy {0})', matrix.numpy-version) || '' }} for ${{ matrix.arch }} runs-on: ${{ matrix.runs-on || matrix.os }} permissions: contents: write @@ -23,21 +23,32 @@ jobs: - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64" + numpy-version: [null] + + # Linux x86_64 with numpy 1.23 + - os: ubuntu-latest + arch: x86_64 + artifact_name: "linux-x86_64_numpy1_23" + python-version: "3.12" + numpy-version: "1.23" # Linux ARM64 builds (native runners) - os: ubuntu-24.04-arm arch: aarch64 artifact_name: "linux-aarch64" + numpy-version: [null] # Windows builds - os: windows-latest arch: x86_64 artifact_name: "windows-x86_64" + numpy-version: [null] # macOS builds (universal2) - os: macos-latest arch: x86_64 artifact_name: "macos-universal2" + numpy-version: [null] steps: - uses: actions/checkout@v3 @@ -46,6 +57,10 @@ jobs: with: python-version: '3.x' + - name: Install specific numpy version + if: matrix.numpy-version + run: pip install "numpy==${{ matrix.numpy-version }}.*" + - name: Build wheels uses: pypa/cibuildwheel@v3.1.3 From 8461786f2b2465ac651c1a8ec384ead782cf73ad Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 14:38:16 +0200 Subject: [PATCH 146/166] Correct build.yml file --- .github/workflows/build.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 48f781c..90aedb3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,39 +23,38 @@ jobs: - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64" - numpy-version: [null] + numpy-version: null # Linux x86_64 with numpy 1.23 - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64_numpy1_23" - python-version: "3.12" numpy-version: "1.23" # Linux ARM64 builds (native runners) - os: ubuntu-24.04-arm arch: aarch64 artifact_name: "linux-aarch64" - numpy-version: [null] + numpy-version: null # Windows builds - os: windows-latest arch: x86_64 artifact_name: "windows-x86_64" - numpy-version: [null] + numpy-version: null # macOS builds (universal2) - os: macos-latest arch: x86_64 artifact_name: "macos-universal2" - numpy-version: [null] + numpy-version: "null" steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 name: Install Python with: - python-version: '3.x' + python-version: '3.12' - name: Install specific numpy version if: matrix.numpy-version From 48396e31124e25c40fc04eb72832ab3752760ba1 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 14:43:44 +0200 Subject: [PATCH 147/166] Correct build.yml file --- .github/workflows/build.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 90aedb3..5ccbcc5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,6 @@ jobs: - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64" - numpy-version: null # Linux x86_64 with numpy 1.23 - os: ubuntu-latest @@ -35,19 +34,16 @@ jobs: - os: ubuntu-24.04-arm arch: aarch64 artifact_name: "linux-aarch64" - numpy-version: null # Windows builds - os: windows-latest arch: x86_64 artifact_name: "windows-x86_64" - numpy-version: null # macOS builds (universal2) - os: macos-latest arch: x86_64 artifact_name: "macos-universal2" - numpy-version: "null" steps: - uses: actions/checkout@v3 From e81babc832d02db64027c23339280c05bba87752 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 14:52:05 +0200 Subject: [PATCH 148/166] Push numpy requirement to 1.26 --- .github/workflows/build.yml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5ccbcc5..ad9c97f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,7 +28,7 @@ jobs: - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64_numpy1_23" - numpy-version: "1.23" + numpy-version: "1.26" # Linux ARM64 builds (native runners) - os: ubuntu-24.04-arm diff --git a/requirements.txt b/requirements.txt index 1c52baf..a4c58eb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -numpy >= 1.23.0 # keep in sync with NPY_TARGET_VERSION (setup.py) +numpy >= 1.26.0 # keep in sync with NPY_TARGET_VERSION (setup.py) From 40efd83c52a02c429bef7d52660f9d46605d6d3a Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:23:21 +0200 Subject: [PATCH 149/166] Force use of numpy 1.26 --- .github/workflows/build.yml | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ad9c97f..651390f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,19 +15,28 @@ jobs: CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading + # Ensure cibuildwheel uses correct NumPy in isolated builds too + CIBW_BEFORE_BUILD: | + pip install --upgrade pip + if [ -n "${{ matrix.numpy-version }}" ]; then + pip install --force-reinstall "numpy==${{ matrix.numpy-version }}.*" + else + pip install --upgrade numpy + fi + python -c "import numpy; print('Using NumPy', numpy.__version__)" strategy: fail-fast: false matrix: include: - # Linux x86_64 builds + # Linux x86_64 builds (latest NumPy) - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64" - # Linux x86_64 with numpy 1.23 + # Linux x86_64 with NumPy 1.26 - os: ubuntu-latest arch: x86_64 - artifact_name: "linux-x86_64_numpy1_23" + artifact_name: "linux-x86_64_numpy1_26" numpy-version: "1.26" # Linux ARM64 builds (native runners) @@ -44,17 +53,31 @@ jobs: - os: macos-latest arch: x86_64 artifact_name: "macos-universal2" + steps: - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - name: Install Python + - name: Install Python + uses: actions/setup-python@v5 with: python-version: '3.12' + cache: false - - name: Install specific numpy version + - name: Install specific NumPy version if: matrix.numpy-version - run: pip install "numpy==${{ matrix.numpy-version }}.*" + run: | + pip install --upgrade pip + pip install --force-reinstall "numpy==${{ matrix.numpy-version }}.*" + python -c "import numpy; print('Installed NumPy version:', numpy.__version__)" + shell: bash + + - name: Ensure latest NumPy if not pinned + if: ${{ !matrix.numpy-version }} + run: | + pip install --upgrade pip + pip install --upgrade numpy + python -c "import numpy; print('Using latest NumPy:', numpy.__version__)" + shell: bash - name: Build wheels uses: pypa/cibuildwheel@v3.1.3 From 7307711b912ac1e139bfe83fdb7aff93f4f20f4e Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:26:20 +0200 Subject: [PATCH 150/166] Fix cache error --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 651390f..7627d81 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -61,7 +61,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: '3.12' - cache: false - name: Install specific NumPy version if: matrix.numpy-version From ed435570d3269402d326f40ce26c7df35265572a Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:29:26 +0200 Subject: [PATCH 151/166] Force local build with numpy 1.26 --- .github/workflows/build.yml | 40 +++++++++++-------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7627d81..806d017 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,28 +15,19 @@ jobs: CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading - # Ensure cibuildwheel uses correct NumPy in isolated builds too - CIBW_BEFORE_BUILD: | - pip install --upgrade pip - if [ -n "${{ matrix.numpy-version }}" ]; then - pip install --force-reinstall "numpy==${{ matrix.numpy-version }}.*" - else - pip install --upgrade numpy - fi - python -c "import numpy; print('Using NumPy', numpy.__version__)" strategy: fail-fast: false matrix: include: - # Linux x86_64 builds (latest NumPy) + # Linux x86_64 builds - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64" - # Linux x86_64 with NumPy 1.26 + # Linux x86_64 with numpy 1.23 - os: ubuntu-latest arch: x86_64 - artifact_name: "linux-x86_64_numpy1_26" + artifact_name: "linux-x86_64_numpy1_23" numpy-version: "1.26" # Linux ARM64 builds (native runners) @@ -53,30 +44,23 @@ jobs: - os: macos-latest arch: x86_64 artifact_name: "macos-universal2" - steps: - uses: actions/checkout@v3 - - name: Install Python - uses: actions/setup-python@v5 + - uses: actions/setup-python@v3 + name: Install Python with: python-version: '3.12' - - name: Install specific NumPy version + - name: Install specific numpy version if: matrix.numpy-version - run: | - pip install --upgrade pip - pip install --force-reinstall "numpy==${{ matrix.numpy-version }}.*" - python -c "import numpy; print('Installed NumPy version:', numpy.__version__)" - shell: bash + run: pip install "numpy==${{ matrix.numpy-version }}.*" - - name: Ensure latest NumPy if not pinned - if: ${{ !matrix.numpy-version }} - run: | - pip install --upgrade pip - pip install --upgrade numpy - python -c "import numpy; print('Using latest NumPy:', numpy.__version__)" - shell: bash + - name: Local Build + run: pip install -e . + + - name: Test + run: python -m pytest - name: Build wheels uses: pypa/cibuildwheel@v3.1.3 From 0e827b18865907dc8cce79f3aa31901392570187 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:33:27 +0200 Subject: [PATCH 152/166] Force tests with numpy 1.26 --- .github/workflows/build.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 806d017..283c188 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,14 +56,15 @@ jobs: if: matrix.numpy-version run: pip install "numpy==${{ matrix.numpy-version }}.*" - - name: Local Build - run: pip install -e . - - - name: Test - run: python -m pytest - - name: Build wheels uses: pypa/cibuildwheel@v3.1.3 + env: + CIBW_BEFORE_BUILD: | + if [ -n "${{ matrix.numpy-version }}" ]; then + pip install --force-reinstall "numpy==${{ matrix.numpy-version }}.*" + python -c "import numpy; print('cibuildwheel NumPy version:', numpy.__version__)" + fi + - name: Make sdist if: ${{ matrix.os == 'windows-latest' }} From acd7eb0da381227ef5903fcee0c4163e0b18ffbb Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:37:17 +0200 Subject: [PATCH 153/166] Force tests with numpy 1.26 --- .github/workflows/build.yml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 283c188..b84d278 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -59,12 +59,9 @@ jobs: - name: Build wheels uses: pypa/cibuildwheel@v3.1.3 env: - CIBW_BEFORE_BUILD: | - if [ -n "${{ matrix.numpy-version }}" ]; then - pip install --force-reinstall "numpy==${{ matrix.numpy-version }}.*" - python -c "import numpy; print('cibuildwheel NumPy version:', numpy.__version__)" - fi - + CIBW_TEST_REQUIRES: "numpy==${{ matrix.numpy-version }}.* pytest" + CIBW_TEST_COMMAND: | + pytest - name: Make sdist if: ${{ matrix.os == 'windows-latest' }} From e27646b74237f97acae8c43b189079aa7baf5c34 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:42:50 +0200 Subject: [PATCH 154/166] Force local build and test with numpy 1.26 --- .github/workflows/build.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b84d278..976a42f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -56,12 +56,16 @@ jobs: if: matrix.numpy-version run: pip install "numpy==${{ matrix.numpy-version }}.*" + - name: Local Build + run: pip install -e . + + - name: Test + run: | + pip install pytest + python -m pytest + - name: Build wheels uses: pypa/cibuildwheel@v3.1.3 - env: - CIBW_TEST_REQUIRES: "numpy==${{ matrix.numpy-version }}.* pytest" - CIBW_TEST_COMMAND: | - pytest - name: Make sdist if: ${{ matrix.os == 'windows-latest' }} From eb2f9997035c3865cc3e1ec8ed715f9f9dfe3b30 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:49:53 +0200 Subject: [PATCH 155/166] Revert "Merge pull request #534 from jorenham/static-typing" This reverts commit bce38b4317746a3e7a33c4489a3bc81898fd7c36, reversing changes made to cd730c826fa2ac16d145703a94a90b56e158cd5d. --- .github/workflows/typecheck.yml | 36 --- .pre-commit-config.yaml | 13 +- MANIFEST.in | 2 +- RELEASE_NOTES.rst | 1 - numexpr/__init__.py | 50 ++-- numexpr/cpuinfo.py | 513 +++++++++++++++----------------- numexpr/expressions.py | 225 ++++++-------- numexpr/interpreter.pyi | 52 ---- numexpr/necompiler.py | 297 ++++++++---------- numexpr/py.typed | 0 numexpr/tests/__init__.py | 5 +- numexpr/tests/conftest.py | 2 +- numexpr/tests/test_numexpr.py | 1 - numexpr/utils.py | 116 +++----- numexpr/version.pyi | 6 - pyproject.toml | 21 -- setup.py | 2 - 17 files changed, 510 insertions(+), 832 deletions(-) delete mode 100644 .github/workflows/typecheck.yml delete mode 100644 numexpr/interpreter.pyi delete mode 100644 numexpr/py.typed delete mode 100644 numexpr/version.pyi diff --git a/.github/workflows/typecheck.yml b/.github/workflows/typecheck.yml deleted file mode 100644 index a6ca065..0000000 --- a/.github/workflows/typecheck.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: Validate static types -permissions: read-all - -on: - pull_request: - paths: - - .github/workflows/typecheck.yml - - numexpr/* - - pyproject.toml - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - typecheck_quaddtype: - runs-on: ubuntu-latest - timeout-minutes: 2 - - steps: - - uses: actions/checkout@v5.0.0 - - - uses: astral-sh/setup-uv@v6.7.0 - with: - activate-environment: true - python-version: "3.10" - - - name: install - run: uv pip install mypy pyright pytest . - - - name: pyright - run: pyright - - - name: mypy - run: mypy --no-incremental --cache-dir=/dev/null . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 35c3d40..3235b5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,10 +18,9 @@ repos: hooks: - id: isort -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.18.2 - hooks: - - id: mypy - args: [--config-file=pyproject.toml] - exclude: ^(bench/|build/|doc/|issues/|setup.py) - additional_dependencies: [numpy, pytest] +# Too many things to fix, let's just ignore it for now +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v1.8.0 +# hooks: +# - id: mypy +# exclude: ^(docs/|setup.py) diff --git a/MANIFEST.in b/MANIFEST.in index 886a494..4ec8d9f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,7 @@ include MANIFEST.in VERSION include *.rst *.txt *.cfg site.cfg.example -recursive-include numexpr *.cpp *.hpp *.py *.pyi py.typed +recursive-include numexpr *.cpp *.hpp *.py recursive-include numexpr/win32 *.c *.h exclude numexpr/__config__.py RELEASING.txt site.cfg diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 1b11c43..0294048 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -25,7 +25,6 @@ Changes from 2.13.0 to 2.13.1 * Patch to maximum/minimum functions in order to match NumPy NaN handling * Patch to convert '+'->'|' and '*'->'&' for booleans - Changes from 2.12.1 to 2.13.0 ----------------------------- diff --git a/numexpr/__init__.py b/numexpr/__init__.py index af9defc..63bb9e9 100644 --- a/numexpr/__init__.py +++ b/numexpr/__init__.py @@ -21,62 +21,46 @@ """ -from typing import TYPE_CHECKING, Final +from numexpr.interpreter import __BLOCK_SIZE1__, MAX_THREADS, use_vml -if TYPE_CHECKING: - import unittest - -# the `import _ as _` are needed for mypy to understand these are re-exports - -from numexpr.interpreter import __BLOCK_SIZE1__ as __BLOCK_SIZE1__ -from numexpr.interpreter import MAX_THREADS as MAX_THREADS -from numexpr.interpreter import use_vml as use_vml - -is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE # cpuinfo imports were moved into the test submodule function that calls them # to improve import times. -from numexpr.expressions import E as E -from numexpr.necompiler import NumExpr as NumExpr -from numexpr.necompiler import disassemble as disassemble -from numexpr.necompiler import evaluate as evaluate -from numexpr.necompiler import re_evaluate as re_evaluate -from numexpr.necompiler import validate as validate -from numexpr.utils import _init_num_threads -from numexpr.utils import detect_number_of_cores as detect_number_of_cores -from numexpr.utils import detect_number_of_threads as detect_number_of_threads -from numexpr.utils import get_num_threads as get_num_threads -from numexpr.utils import get_vml_version as get_vml_version -from numexpr.utils import set_num_threads as set_num_threads -from numexpr.utils import set_vml_accuracy_mode as set_vml_accuracy_mode -from numexpr.utils import set_vml_num_threads as set_vml_num_threads +from numexpr.expressions import E +from numexpr.necompiler import (NumExpr, disassemble, evaluate, re_evaluate, + validate) +from numexpr.utils import (_init_num_threads, detect_number_of_cores, + detect_number_of_threads, get_num_threads, + get_vml_version, set_num_threads, + set_vml_accuracy_mode, set_vml_num_threads) # Detect the number of cores -ncores: Final = detect_number_of_cores() +ncores = detect_number_of_cores() # Initialize the number of threads to be used -nthreads: Final = _init_num_threads() +nthreads = _init_num_threads() # The default for VML is 1 thread (see #39) # set_vml_num_threads(1) -from . import version as version +from . import version -__version__: Final = version.version +__version__ = version.version -def print_versions() -> None: +def print_versions(): """Print the versions of software that numexpr relies on.""" try: import numexpr.tests - return numexpr.tests.print_versions() # type: ignore[no-untyped-call] + return numexpr.tests.print_versions() except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') -def test(verbosity: int = 1) -> "unittest.result.TestResult": +def test(verbosity=1): """Run all the tests in the test suite.""" try: import numexpr.tests - return numexpr.tests.test(verbosity=verbosity) # type: ignore[no-untyped-call] + return numexpr.tests.test(verbosity=verbosity) except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') diff --git a/numexpr/cpuinfo.py b/numexpr/cpuinfo.py index 9a4e5be..897a4ca 100755 --- a/numexpr/cpuinfo.py +++ b/numexpr/cpuinfo.py @@ -29,33 +29,25 @@ import re import subprocess import sys +import types import warnings -from typing import (Any, Callable, ClassVar, Container, Final, Generator, - NoReturn, Sequence, TypeAlias, TypeVar, overload) -_CMD: TypeAlias = str | Sequence[str] -_Statuses: TypeAlias = Container[int] +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE -is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE - -def getoutput(cmd: _CMD, - successful_status: _Statuses = (0,), - stacklevel: int = 1) -> tuple[bool, bytes]: +def getoutput(cmd, successful_status=(0,), stacklevel=1): try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, _ = p.communicate() status = p.returncode except EnvironmentError as e: warnings.warn(str(e), UserWarning, stacklevel=stacklevel) - return False, b'' + return False, '' if os.WIFEXITED(status) and os.WEXITSTATUS(status) in successful_status: return True, output return False, output -def command_info(successful_status: _Statuses = (0,), - stacklevel: int = 1, - **kw: _CMD) -> dict[str, bytes]: +def command_info(successful_status=(0,), stacklevel=1, **kw): info = {} for key in kw: ok, output = getoutput(kw[key], successful_status=successful_status, @@ -65,9 +57,7 @@ def command_info(successful_status: _Statuses = (0,), return info -def command_by_line(cmd: _CMD, - successful_status: _Statuses = (0,), - stacklevel: int = 1) -> Generator[str, None, None]: +def command_by_line(cmd, successful_status=(0,), stacklevel=1): ok, output = getoutput(cmd, successful_status=successful_status, stacklevel=stacklevel + 1) if not ok: @@ -80,10 +70,8 @@ def command_by_line(cmd: _CMD, yield line.strip() -def key_value_from_command(cmd: _CMD, - sep: str, - successful_status: _Statuses = (0,), - stacklevel: int = 1) -> dict[str, str]: +def key_value_from_command(cmd, sep, successful_status=(0,), + stacklevel=1): d = {} for line in command_by_line(cmd, successful_status=successful_status, stacklevel=stacklevel + 1): @@ -93,25 +81,18 @@ def key_value_from_command(cmd: _CMD, return d -_T = TypeVar('_T') - - -class CPUInfoBase: +class CPUInfoBase(object): """Holds CPU information and provides methods for requiring the availability of various CPU features. """ - @overload - def _try_call(self, func: Callable[..., NoReturn]) -> None: ... - @overload - def _try_call(self, func: Callable[[], _T]) -> _T | None: ... - def _try_call(self, func: Callable[[], _T]) -> _T | None: + def _try_call(self, func): try: return func() except: - return None + pass - def __getattr__(self, name: str) -> Callable[..., Any]: + def __getattr__(self, name): if not name.startswith('_'): if hasattr(self, '_' + name): attr = getattr(self, '_' + name) @@ -121,27 +102,25 @@ def __getattr__(self, name: str) -> Callable[..., Any]: return lambda: None raise AttributeError(name) - def _getNCPUs(self) -> int: + def _getNCPUs(self): return 1 - def __get_nbits(self) -> str: + def __get_nbits(self): abits = platform.architecture()[0] - match = re.compile(r'(\d+)bit').search(abits) - assert match, abits - return match.group(1) + nbits = re.compile(r'(\d+)bit').search(abits).group(1) + return nbits - def _is_32bit(self) -> bool: + def _is_32bit(self): return self.__get_nbits() == '32' - def _is_64bit(self) -> bool: + def _is_64bit(self): return self.__get_nbits() == '64' class LinuxCPUInfo(CPUInfoBase): - # This will never be `None` on (initialized) instances - info: list[dict[str, str]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return info = [{}] @@ -164,64 +143,64 @@ def __init__(self) -> None: fo.close() self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass # Athlon - def _is_AMD(self) -> bool: + def _is_AMD(self): return self.info[0]['vendor_id'] == 'AuthenticAMD' - def _is_AthlonK6_2(self) -> bool: + def _is_AthlonK6_2(self): return self._is_AMD() and self.info[0]['model'] == '2' - def _is_AthlonK6_3(self) -> bool: + def _is_AthlonK6_3(self): return self._is_AMD() and self.info[0]['model'] == '3' - def _is_AthlonK6(self) -> bool: + def _is_AthlonK6(self): return re.match(r'.*?AMD-K6', self.info[0]['model name']) is not None - def _is_AthlonK7(self) -> bool: + def _is_AthlonK7(self): return re.match(r'.*?AMD-K7', self.info[0]['model name']) is not None - def _is_AthlonMP(self) -> bool: + def _is_AthlonMP(self): return re.match(r'.*?Athlon\(tm\) MP\b', self.info[0]['model name']) is not None - def _is_AMD64(self) -> bool: + def _is_AMD64(self): return self.is_AMD() and self.info[0]['family'] == '15' - def _is_Athlon64(self) -> bool: + def _is_Athlon64(self): return re.match(r'.*?Athlon\(tm\) 64\b', self.info[0]['model name']) is not None - def _is_AthlonHX(self) -> bool: + def _is_AthlonHX(self): return re.match(r'.*?Athlon HX\b', self.info[0]['model name']) is not None - def _is_Opteron(self) -> bool: + def _is_Opteron(self): return re.match(r'.*?Opteron\b', self.info[0]['model name']) is not None - def _is_Hammer(self) -> bool: + def _is_Hammer(self): return re.match(r'.*?Hammer\b', self.info[0]['model name']) is not None # Alpha - def _is_Alpha(self) -> bool: + def _is_Alpha(self): return self.info[0]['cpu'] == 'Alpha' - def _is_EV4(self) -> bool: + def _is_EV4(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV4' - def _is_EV5(self) -> bool: + def _is_EV5(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV5' - def _is_EV56(self) -> bool: + def _is_EV56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV56' - def _is_PCA56(self) -> bool: + def _is_PCA56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'PCA56' # Intel @@ -229,377 +208,356 @@ def _is_PCA56(self) -> bool: #XXX _is_i386 = _not_impl - def _is_Intel(self) -> bool: + def _is_Intel(self): return self.info[0]['vendor_id'] == 'GenuineIntel' - def _is_i486(self) -> bool: + def _is_i486(self): return self.info[0]['cpu'] == 'i486' - def _is_i586(self) -> bool: + def _is_i586(self): return self.is_Intel() and self.info[0]['cpu family'] == '5' - def _is_i686(self) -> bool: + def _is_i686(self): return self.is_Intel() and self.info[0]['cpu family'] == '6' - def _is_Celeron(self) -> bool: + def _is_Celeron(self): return re.match(r'.*?Celeron', self.info[0]['model name']) is not None - def _is_Pentium(self) -> bool: + def _is_Pentium(self): return re.match(r'.*?Pentium', self.info[0]['model name']) is not None - def _is_PentiumII(self) -> bool: + def _is_PentiumII(self): return re.match(r'.*?Pentium.*?II\b', self.info[0]['model name']) is not None - def _is_PentiumPro(self) -> bool: + def _is_PentiumPro(self): return re.match(r'.*?PentiumPro\b', self.info[0]['model name']) is not None - def _is_PentiumMMX(self) -> bool: + def _is_PentiumMMX(self): return re.match(r'.*?Pentium.*?MMX\b', self.info[0]['model name']) is not None - def _is_PentiumIII(self) -> bool: + def _is_PentiumIII(self): return re.match(r'.*?Pentium.*?III\b', self.info[0]['model name']) is not None - def _is_PentiumIV(self) -> bool: + def _is_PentiumIV(self): return re.match(r'.*?Pentium.*?(IV|4)\b', self.info[0]['model name']) is not None - def _is_PentiumM(self) -> bool: + def _is_PentiumM(self): return re.match(r'.*?Pentium.*?M\b', self.info[0]['model name']) is not None - def _is_Prescott(self) -> bool: + def _is_Prescott(self): return self.is_PentiumIV() and self.has_sse3() - def _is_Nocona(self) -> bool: + def _is_Nocona(self): return (self.is_Intel() and self.info[0]['cpu family'] in ('6', '15') and # two s sse3; three s ssse3 not the same thing, this is fine (self.has_sse3() and not self.has_ssse3()) and re.match(r'.*?\blm\b', self.info[0]['flags']) is not None) - def _is_Core2(self) -> bool: + def _is_Core2(self): return (self.is_64bit() and self.is_Intel() and re.match(r'.*?Core\(TM\)2\b', self.info[0]['model name']) is not None) - def _is_Itanium(self) -> bool: + def _is_Itanium(self): return re.match(r'.*?Itanium\b', self.info[0]['family']) is not None - def _is_XEON(self) -> bool: + def _is_XEON(self): return re.match(r'.*?XEON\b', self.info[0]['model name'], re.IGNORECASE) is not None _is_Xeon = _is_XEON # Power - def _is_Power(self) -> bool: + def _is_Power(self): return re.match(r'.*POWER.*', self.info[0]['cpu']) is not None - def _is_Power7(self) -> bool: + def _is_Power7(self): return re.match(r'.*POWER7.*', self.info[0]['cpu']) is not None - def _is_Power8(self) -> bool: + def _is_Power8(self): return re.match(r'.*POWER8.*', self.info[0]['cpu']) is not None - def _is_Power9(self) -> bool: + def _is_Power9(self): return re.match(r'.*POWER9.*', self.info[0]['cpu']) is not None - def _has_Altivec(self) -> bool: + def _has_Altivec(self): return re.match(r'.*altivec\ supported.*', self.info[0]['cpu']) is not None # Varia - def _is_singleCPU(self) -> bool: + def _is_singleCPU(self): return len(self.info) == 1 - def _getNCPUs(self) -> int: + def _getNCPUs(self): return len(self.info) - def _has_fdiv_bug(self) -> bool: + def _has_fdiv_bug(self): return self.info[0]['fdiv_bug'] == 'yes' - def _has_f00f_bug(self) -> bool: + def _has_f00f_bug(self): return self.info[0]['f00f_bug'] == 'yes' - def _has_mmx(self) -> bool: + def _has_mmx(self): return re.match(r'.*?\bmmx\b', self.info[0]['flags']) is not None - def _has_sse(self) -> bool: + def _has_sse(self): return re.match(r'.*?\bsse\b', self.info[0]['flags']) is not None - def _has_sse2(self) -> bool: + def _has_sse2(self): return re.match(r'.*?\bsse2\b', self.info[0]['flags']) is not None - def _has_sse3(self) -> bool: + def _has_sse3(self): return re.match(r'.*?\bpni\b', self.info[0]['flags']) is not None - def _has_ssse3(self) -> bool: + def _has_ssse3(self): return re.match(r'.*?\bssse3\b', self.info[0]['flags']) is not None - def _has_3dnow(self) -> bool: + def _has_3dnow(self): return re.match(r'.*?\b3dnow\b', self.info[0]['flags']) is not None - def _has_3dnowext(self) -> bool: + def _has_3dnowext(self): return re.match(r'.*?\b3dnowext\b', self.info[0]['flags']) is not None class IRIXCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, str]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return info = key_value_from_command('sysconf', sep=' ', successful_status=(0, 1)) self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass - def _is_singleCPU(self) -> bool: + def _is_singleCPU(self): return self.info.get('NUM_PROCESSORS') == '1' - def _getNCPUs(self) -> int: + def _getNCPUs(self): return int(self.info.get('NUM_PROCESSORS', 1)) - def __cputype(self, n: int | str) -> bool: - return self.info['PROCESSORS'].split()[0].lower() == 'r%s' % (n) + def __cputype(self, n): + return self.info.get('PROCESSORS').split()[0].lower() == 'r%s' % (n) - def _is_r2000(self) -> bool: + def _is_r2000(self): return self.__cputype(2000) - def _is_r3000(self) -> bool: + def _is_r3000(self): return self.__cputype(3000) - def _is_r3900(self) -> bool: + def _is_r3900(self): return self.__cputype(3900) - def _is_r4000(self) -> bool: + def _is_r4000(self): return self.__cputype(4000) - def _is_r4100(self) -> bool: + def _is_r4100(self): return self.__cputype(4100) - def _is_r4300(self) -> bool: + def _is_r4300(self): return self.__cputype(4300) - def _is_r4400(self) -> bool: + def _is_r4400(self): return self.__cputype(4400) - def _is_r4600(self) -> bool: + def _is_r4600(self): return self.__cputype(4600) - def _is_r4650(self) -> bool: + def _is_r4650(self): return self.__cputype(4650) - def _is_r5000(self) -> bool: + def _is_r5000(self): return self.__cputype(5000) - def _is_r6000(self) -> bool: + def _is_r6000(self): return self.__cputype(6000) - def _is_r8000(self) -> bool: + def _is_r8000(self): return self.__cputype(8000) - def _is_r10000(self) -> bool: + def _is_r10000(self): return self.__cputype(10000) - def _is_r12000(self) -> bool: + def _is_r12000(self): return self.__cputype(12000) - def _is_rorion(self) -> bool: + def _is_rorion(self): return self.__cputype('orion') - def get_ip(self) -> str | None: + def get_ip(self): try: return self.info.get('MACHINE') except: - return None + pass - def __machine(self, n: int) -> bool: - return self.info['MACHINE'].lower() == 'ip%s' % (n) + def __machine(self, n): + return self.info.get('MACHINE').lower() == 'ip%s' % (n) - def _is_IP19(self) -> bool: + def _is_IP19(self): return self.__machine(19) - def _is_IP20(self) -> bool: + def _is_IP20(self): return self.__machine(20) - def _is_IP21(self) -> bool: + def _is_IP21(self): return self.__machine(21) - def _is_IP22(self) -> bool: + def _is_IP22(self): return self.__machine(22) - def _is_IP22_4k(self) -> bool: + def _is_IP22_4k(self): return self.__machine(22) and self._is_r4000() - def _is_IP22_5k(self) -> bool: + def _is_IP22_5k(self): return self.__machine(22) and self._is_r5000() - def _is_IP24(self) -> bool: + def _is_IP24(self): return self.__machine(24) - def _is_IP25(self) -> bool: + def _is_IP25(self): return self.__machine(25) - def _is_IP26(self) -> bool: + def _is_IP26(self): return self.__machine(26) - def _is_IP27(self) -> bool: + def _is_IP27(self): return self.__machine(27) - def _is_IP28(self) -> bool: + def _is_IP28(self): return self.__machine(28) - def _is_IP30(self) -> bool: + def _is_IP30(self): return self.__machine(30) - def _is_IP32(self) -> bool: + def _is_IP32(self): return self.__machine(32) - def _is_IP32_5k(self) -> bool: + def _is_IP32_5k(self): return self.__machine(32) and self._is_r5000() - def _is_IP32_10k(self) -> bool: + def _is_IP32_10k(self): return self.__machine(32) and self._is_r10000() class DarwinCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return - info: dict[str, bytes | Any] = command_info(arch='arch', machine='machine') + info = command_info(arch='arch', + machine='machine') info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') self.__class__.info = info - def _not_impl(self) -> None: - pass + def _not_impl(self): pass - def _getNCPUs(self) -> int: + def _getNCPUs(self): return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) - def _is_Power_Macintosh(self) -> bool: + def _is_Power_Macintosh(self): return self.info['sysctl_hw']['hw.machine'] == 'Power Macintosh' - def _is_i386(self) -> bool: + def _is_i386(self): return self.info['arch'] == 'i386' - def _is_ppc(self) -> bool: + def _is_ppc(self): return self.info['arch'] == 'ppc' - def __machine(self, n: int | str) -> bool: + def __machine(self, n): return self.info['machine'] == 'ppc%s' % n - def _is_ppc601(self) -> bool: - return self.__machine(601) + def _is_ppc601(self): return self.__machine(601) - def _is_ppc602(self) -> bool: - return self.__machine(602) + def _is_ppc602(self): return self.__machine(602) - def _is_ppc603(self) -> bool: - return self.__machine(603) + def _is_ppc603(self): return self.__machine(603) - def _is_ppc603e(self) -> bool: - return self.__machine('603e') + def _is_ppc603e(self): return self.__machine('603e') - def _is_ppc604(self) -> bool: - return self.__machine(604) + def _is_ppc604(self): return self.__machine(604) - def _is_ppc604e(self) -> bool: - return self.__machine('604e') + def _is_ppc604e(self): return self.__machine('604e') - def _is_ppc620(self) -> bool: - return self.__machine(620) + def _is_ppc620(self): return self.__machine(620) - def _is_ppc630(self) -> bool: - return self.__machine(630) + def _is_ppc630(self): return self.__machine(630) - def _is_ppc740(self) -> bool: - return self.__machine(740) + def _is_ppc740(self): return self.__machine(740) - def _is_ppc7400(self) -> bool: - return self.__machine(7400) + def _is_ppc7400(self): return self.__machine(7400) - def _is_ppc7450(self) -> bool: - return self.__machine(7450) + def _is_ppc7450(self): return self.__machine(7450) - def _is_ppc750(self) -> bool: - return self.__machine(750) + def _is_ppc750(self): return self.__machine(750) - def _is_ppc403(self) -> bool: - return self.__machine(403) + def _is_ppc403(self): return self.__machine(403) - def _is_ppc505(self) -> bool: - return self.__machine(505) + def _is_ppc505(self): return self.__machine(505) - def _is_ppc801(self) -> bool: - return self.__machine(801) + def _is_ppc801(self): return self.__machine(801) - def _is_ppc821(self) -> bool: - return self.__machine(821) + def _is_ppc821(self): return self.__machine(821) - def _is_ppc823(self) -> bool: - return self.__machine(823) + def _is_ppc823(self): return self.__machine(823) - def _is_ppc860(self) -> bool: - return self.__machine(860) + def _is_ppc860(self): return self.__machine(860) class NetBSDCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: - if self.info is not None: - return - sysctl_hw = key_value_from_command(['sysctl', 'hw'], sep='=') - self.__class__.info = { - 'sysctl_hw': sysctl_hw, - 'arch': sysctl_hw.get('hw.machine_arch', 1), - 'machine': sysctl_hw.get('hw.machine', 1), - } - - def _not_impl(self) -> None: - pass + def __init__(self): + if self.info is not None: + return + info = {} + info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') + info['arch'] = info['sysctl_hw'].get('hw.machine_arch', 1) + info['machine'] = info['sysctl_hw'].get('hw.machine', 1) + self.__class__.info = info - def _getNCPUs(self) -> int: - return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) + def _not_impl(self): pass + + def _getNCPUs(self): + return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) - def _is_Intel(self) -> bool: - return self.info['sysctl_hw'].get('hw.model', "")[:5] == 'Intel' + def _is_Intel(self): + if self.info['sysctl_hw'].get('hw.model', "")[0:5] == 'Intel': + return True + return False - def _is_AMD(self) -> bool: - return self.info['sysctl_hw'].get('hw.model', "")[:3] == 'AMD' + def _is_AMD(self): + if self.info['sysctl_hw'].get('hw.model', "")[0:3] == 'AMD': + return True + return False class SunOSCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return - info: dict[str, Any] = command_info( - arch='arch', - mach='mach', - uname_i=['uname', '-i'], - isainfo_b=['isainfo', '-b'], - isainfo_n=['isainfo', '-n'], + info = command_info(arch='arch', + mach='mach', + uname_i=['uname', '-i'], + isainfo_b=['isainfo', '-b'], + isainfo_n=['isainfo', '-n'], ) info['uname_X'] = key_value_from_command(['uname', '-X'], sep='=') for line in command_by_line(['psrinfo', '-v', '0']): @@ -609,115 +567,116 @@ def __init__(self) -> None: break self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass - def _is_i386(self) -> bool: + def _is_i386(self): return self.info['isainfo_n'] == 'i386' - def _is_sparc(self) -> bool: + def _is_sparc(self): return self.info['isainfo_n'] == 'sparc' - def _is_sparcv9(self) -> bool: + def _is_sparcv9(self): return self.info['isainfo_n'] == 'sparcv9' - def _getNCPUs(self) -> int: + def _getNCPUs(self): return int(self.info['uname_X'].get('NumCPU', 1)) - def _is_sun4(self) -> bool: + def _is_sun4(self): return self.info['arch'] == 'sun4' - def _is_SUNW(self) -> bool: + def _is_SUNW(self): return re.match(r'SUNW', self.info['uname_i']) is not None - def _is_sparcstation5(self) -> bool: + def _is_sparcstation5(self): return re.match(r'.*SPARCstation-5', self.info['uname_i']) is not None - def _is_ultra1(self) -> bool: + def _is_ultra1(self): return re.match(r'.*Ultra-1', self.info['uname_i']) is not None - def _is_ultra250(self) -> bool: + def _is_ultra250(self): return re.match(r'.*Ultra-250', self.info['uname_i']) is not None - def _is_ultra2(self) -> bool: + def _is_ultra2(self): return re.match(r'.*Ultra-2', self.info['uname_i']) is not None - def _is_ultra30(self) -> bool: + def _is_ultra30(self): return re.match(r'.*Ultra-30', self.info['uname_i']) is not None - def _is_ultra4(self) -> bool: + def _is_ultra4(self): return re.match(r'.*Ultra-4', self.info['uname_i']) is not None - def _is_ultra5_10(self) -> bool: + def _is_ultra5_10(self): return re.match(r'.*Ultra-5_10', self.info['uname_i']) is not None - def _is_ultra5(self) -> bool: + def _is_ultra5(self): return re.match(r'.*Ultra-5', self.info['uname_i']) is not None - def _is_ultra60(self) -> bool: + def _is_ultra60(self): return re.match(r'.*Ultra-60', self.info['uname_i']) is not None - def _is_ultra80(self) -> bool: + def _is_ultra80(self): return re.match(r'.*Ultra-80', self.info['uname_i']) is not None - def _is_ultraenterprice(self) -> bool: + def _is_ultraenterprice(self): return re.match(r'.*Ultra-Enterprise', self.info['uname_i']) is not None - def _is_ultraenterprice10k(self) -> bool: + def _is_ultraenterprice10k(self): return re.match(r'.*Ultra-Enterprise-10000', self.info['uname_i']) is not None - def _is_sunfire(self) -> bool: + def _is_sunfire(self): return re.match(r'.*Sun-Fire', self.info['uname_i']) is not None - def _is_ultra(self) -> bool: + def _is_ultra(self): return re.match(r'.*Ultra', self.info['uname_i']) is not None - def _is_cpusparcv7(self) -> bool: + def _is_cpusparcv7(self): return self.info['processor'] == 'sparcv7' - def _is_cpusparcv8(self) -> bool: + def _is_cpusparcv8(self): return self.info['processor'] == 'sparcv8' - def _is_cpusparcv9(self) -> bool: + def _is_cpusparcv9(self): return self.info['processor'] == 'sparcv9' class Win32CPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[list[dict[str, Any]]] = None # type: ignore[assignment] - - pkey: ClassVar = r"HARDWARE\DESCRIPTION\System\CentralProcessor" + info = None + pkey = r"HARDWARE\DESCRIPTION\System\CentralProcessor" # XXX: what does the value of # HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0 # mean? - def __init__(self) -> None: - import winreg as _winreg + def __init__(self): + try: + import _winreg + except ImportError: # Python 3 + import winreg as _winreg if self.info is not None: return - info: list[dict[str, Any]] = [] + info = [] try: #XXX: Bad style to use so long `try:...except:...`. Fix it! prgx = re.compile(r"family\s+(?P\d+)\s+model\s+(?P\d+)" r"\s+stepping\s+(?P\d+)", re.IGNORECASE) - chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) # pyright: ignore[reportAttributeAccessIssue] + chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) pnum = 0 while 1: try: - proc = _winreg.EnumKey(chnd, pnum) # pyright: ignore[reportAttributeAccessIssue] - except _winreg.error: # pyright: ignore[reportAttributeAccessIssue] + proc = _winreg.EnumKey(chnd, pnum) + except _winreg.error: break else: pnum += 1 info.append({"Processor": proc}) - phnd = _winreg.OpenKey(chnd, proc) # pyright: ignore[reportAttributeAccessIssue] + phnd = _winreg.OpenKey(chnd, proc) pidx = 0 while True: try: - name, value, vtpe = _winreg.EnumValue(phnd, pidx) # pyright: ignore[reportAttributeAccessIssue] - except _winreg.error: # pyright: ignore[reportAttributeAccessIssue] + name, value, vtpe = _winreg.EnumValue(phnd, pidx) + except _winreg.error: break else: pidx = pidx + 1 @@ -729,105 +688,105 @@ def __init__(self) -> None: info[-1]["Model"] = int(srch.group("MDL")) info[-1]["Stepping"] = int(srch.group("STP")) except: - print(sys.exc_info()[1], '(ignoring)') + print(sys.exc_value, '(ignoring)') self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass # Athlon - def _is_AMD(self) -> bool: + def _is_AMD(self): return self.info[0]['VendorIdentifier'] == 'AuthenticAMD' - def _is_Am486(self) -> bool: + def _is_Am486(self): return self.is_AMD() and self.info[0]['Family'] == 4 - def _is_Am5x86(self) -> bool: + def _is_Am5x86(self): return self.is_AMD() and self.info[0]['Family'] == 4 - def _is_AMDK5(self) -> bool: + def _is_AMDK5(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [0, 1, 2, 3]) - def _is_AMDK6(self) -> bool: + def _is_AMDK6(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [6, 7]) - def _is_AMDK6_2(self) -> bool: + def _is_AMDK6_2(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 8) - def _is_AMDK6_3(self) -> bool: + def _is_AMDK6_3(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 9) - def _is_AMDK7(self) -> bool: + def _is_AMDK7(self): return self.is_AMD() and self.info[0]['Family'] == 6 # To reliably distinguish between the different types of AMD64 chips # (Athlon64, Operton, Athlon64 X2, Semperon, Turion 64, etc.) would # require looking at the 'brand' from cpuid - def _is_AMD64(self) -> bool: + def _is_AMD64(self): return self.is_AMD() and self.info[0]['Family'] == 15 # Intel - def _is_Intel(self) -> bool: + def _is_Intel(self): return self.info[0]['VendorIdentifier'] == 'GenuineIntel' - def _is_i386(self) -> bool: + def _is_i386(self): return self.info[0]['Family'] == 3 - def _is_i486(self) -> bool: + def _is_i486(self): return self.info[0]['Family'] == 4 - def _is_i586(self) -> bool: + def _is_i586(self): return self.is_Intel() and self.info[0]['Family'] == 5 - def _is_i686(self) -> bool: + def _is_i686(self): return self.is_Intel() and self.info[0]['Family'] == 6 - def _is_Pentium(self) -> bool: + def _is_Pentium(self): return self.is_Intel() and self.info[0]['Family'] == 5 - def _is_PentiumMMX(self) -> bool: + def _is_PentiumMMX(self): return (self.is_Intel() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) - def _is_PentiumPro(self) -> bool: + def _is_PentiumPro(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] == 1) - def _is_PentiumII(self) -> bool: + def _is_PentiumII(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [3, 5, 6]) - def _is_PentiumIII(self) -> bool: + def _is_PentiumIII(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) - def _is_PentiumIV(self) -> bool: + def _is_PentiumIV(self): return self.is_Intel() and self.info[0]['Family'] == 15 - def _is_PentiumM(self) -> bool: + def _is_PentiumM(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [9, 13, 14]) - def _is_Core2(self) -> bool: + def _is_Core2(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [15, 16, 17]) # Varia - def _is_singleCPU(self) -> bool: + def _is_singleCPU(self): return len(self.info) == 1 - def _getNCPUs(self) -> int: + def _getNCPUs(self): return len(self.info) - def _has_mmx(self) -> bool: + def _has_mmx(self): if self.is_Intel(): return ((self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) or @@ -837,7 +796,7 @@ def _has_mmx(self) -> bool: else: return False - def _has_sse(self) -> bool: + def _has_sse(self): if self.is_Intel(): return ((self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) or @@ -849,7 +808,7 @@ def _has_sse(self) -> bool: else: return False - def _has_sse2(self) -> bool: + def _has_sse2(self): if self.is_Intel(): return self.is_Pentium4() or self.is_PentiumM() or self.is_Core2() elif self.is_AMD(): @@ -857,10 +816,10 @@ def _has_sse2(self) -> bool: else: return False - def _has_3dnow(self) -> bool: + def _has_3dnow(self): return self.is_AMD() and self.info[0]['Family'] in [5, 6, 15] - def _has_3dnowext(self) -> bool: + def _has_3dnowext(self): return self.is_AMD() and self.info[0]['Family'] in [6, 15] diff --git a/numexpr/expressions.py b/numexpr/expressions.py index b62c3f8..cab0247 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -11,37 +11,34 @@ __all__ = ['E'] import operator +import sys import threading -import types -from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Final, Iterable, - Mapping, NoReturn, TypeVar, cast) - -if TYPE_CHECKING: - from typing_extensions import Self, TypeIs import numpy # Declare a double type that does not exist in Python space -double = numpy.float64 +double = numpy.double # The default kind for undeclared variables default_kind = 'double' int_ = numpy.int32 long_ = numpy.int64 -type_to_kind: Final = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', - double: 'double', complex: 'complex', bytes: 'bytes', str: 'str'} -kind_to_type: Final = {'bool': bool, 'int': int_, 'long': long_, 'float': float, - 'double': double, 'complex': complex, 'bytes': bytes, 'str': str} -kind_rank: Final = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') -scalar_constant_types: Final = (bool, int_, int, float, double, complex, bytes, str) +type_to_kind = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', + double: 'double', complex: 'complex', bytes: 'bytes', str: 'str'} +kind_to_type = {'bool': bool, 'int': int_, 'long': long_, 'float': float, + 'double': double, 'complex': complex, 'bytes': bytes, 'str': str} +kind_rank = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') +scalar_constant_types = [bool, int_, int, float, double, complex, bytes, str] + +scalar_constant_types = tuple(scalar_constant_types) from numexpr import interpreter -class Expression: +class Expression(): - def __getattr__(self, name: str) -> Any: + def __getattr__(self, name): if name.startswith('_'): try: return self.__dict__[name] @@ -51,40 +48,38 @@ def __getattr__(self, name: str) -> Any: return VariableNode(name, default_kind) -E: Final = Expression() +E = Expression() class Context(threading.local): - def get(self, value: str, default: object) -> Any: + def get(self, value, default): return self.__dict__.get(value, default) - def get_current_context(self) -> dict[str, Any]: + def get_current_context(self): return self.__dict__ - def set_new_context(self, dict_: Mapping[str, Any]) -> None: + def set_new_context(self, dict_): self.__dict__.update(dict_) # This will be called each time the local object is used in a separate thread -_context: Final = Context() +_context = Context() -def get_optimization() -> str: +def get_optimization(): return _context.get('optimization', 'none') -_T = TypeVar('_T') - # helper functions for creating __magic__ methods -def ophelper(f: Callable[..., _T]) -> Callable[..., _T]: - def func(*args: 'ExpressionNode') -> _T: - arglist = list(args) +def ophelper(f): + def func(*args): + args = list(args) for i, x in enumerate(args): if isConstant(x): - arglist[i] = x = ConstantNode(x) + args[i] = x = ConstantNode(x) if not isinstance(x, ExpressionNode): raise TypeError("unsupported object type: %s" % type(x)) - return f(*arglist) + return f(*args) func.__name__ = f.__name__ func.__doc__ = f.__doc__ @@ -92,7 +87,7 @@ def func(*args: 'ExpressionNode') -> _T: return func -def allConstantNodes(args: Iterable[object]) -> bool: +def allConstantNodes(args): "returns True if args are all ConstantNodes." for x in args: if not isinstance(x, ConstantNode): @@ -100,12 +95,12 @@ def allConstantNodes(args: Iterable[object]) -> bool: return True -def isConstant(ex: object) -> "TypeIs[complex | bytes | str | numpy.number]": +def isConstant(ex): "Returns True if ex is a constant scalar of an allowed type." - return isinstance(ex, scalar_constant_types) # pyright: ignore[reportArgumentType] + return isinstance(ex, scalar_constant_types) -def commonKind(nodes: Iterable['ExpressionNode | RawNode']) -> str: +def commonKind(nodes): node_kinds = [node.astKind for node in nodes] str_count = node_kinds.count('bytes') + node_kinds.count('str') if 0 < str_count < len(node_kinds): # some args are strings, but not all @@ -122,7 +117,7 @@ def commonKind(nodes: Iterable['ExpressionNode | RawNode']) -> str: min_int32 = -max_int32 - 1 -def bestConstantType(x: object) -> type | None: +def bestConstantType(x): # ``numpy.string_`` is a subclass of ``bytes`` if isinstance(x, (bytes, str)): return bytes @@ -135,14 +130,14 @@ def bestConstantType(x: object) -> type | None: # ``long`` objects are kept as is to allow the user to force # promotion of results by using long constants, e.g. by operating # a 32-bit array with a long (64-bit) constant. - if isinstance(x, (long_, numpy.int64)): # type: ignore[misc] + if isinstance(x, (long_, numpy.int64)): return long_ # ``double`` objects are kept as is to allow the user to force # promotion of results by using double constants, e.g. by operating # a float (32-bit) array with a double (64-bit) constant. if isinstance(x, double): return double - if isinstance(x, numpy.float32): # pyright: ignore[reportArgumentType] + if isinstance(x, numpy.float32): return float if isinstance(x, (int, numpy.integer)): # Constants needing more than 32 bits are always @@ -155,29 +150,25 @@ def bestConstantType(x: object) -> type | None: # ``double`` too. for converter in float, complex: try: - y = converter(x) # type: ignore[arg-type, call-overload] + y = converter(x) except Exception as err: continue if y == x or numpy.isnan(y): return converter - return None -def getKind(x: object) -> str: +def getKind(x): converter = bestConstantType(x) - assert converter is not None return type_to_kind[converter] -def binop( - opname: str, reversed: bool = False, kind: str | None = None -) -> Callable[['ExpressionNode', 'ExpressionNode'], 'ExpressionNode']: +def binop(opname, reversed=False, kind=None): # Getting the named method from self (after reversal) does not # always work (e.g. int constants do not have a __lt__ method). opfunc = getattr(operator, "__%s__" % opname) @ophelper - def operation(self: 'ExpressionNode', other: 'ExpressionNode') -> 'ExpressionNode': + def operation(self, other): if reversed: self, other = other, self if allConstantNodes([self, other]): @@ -188,11 +179,9 @@ def operation(self: 'ExpressionNode', other: 'ExpressionNode') -> 'ExpressionNod return operation -def func( - func: Callable[..., Any], minkind: str | None = None, maxkind: str | None = None -) -> Callable[..., 'FuncNode | ConstantNode']: +def func(func, minkind=None, maxkind=None): @ophelper - def function(*args: 'ExpressionNode') -> 'FuncNode | ConstantNode': + def function(*args): if allConstantNodes(args): return ConstantNode(func(*[x.value for x in args])) kind = commonKind(args) @@ -215,23 +204,20 @@ def function(*args: 'ExpressionNode') -> 'FuncNode | ConstantNode': @ophelper -def where_func( - a: 'ExpressionNode', b: 'ExpressionNode', c: 'ExpressionNode' -) -> 'ExpressionNode': +def where_func(a, b, c): if isinstance(a, ConstantNode): return b if a.value else c if allConstantNodes([a, b, c]): - return ConstantNode(numpy.where(a, b, c)) # type: ignore[call-overload] + return ConstantNode(numpy.where(a, b, c)) return FuncNode('where', [a, b, c]) -def encode_axis(axis: 'ConstantNode | int | None') -> 'RawNode': +def encode_axis(axis): if isinstance(axis, ConstantNode): axis = axis.value if axis is None: axis = interpreter.allaxes else: - assert isinstance(axis, int) if axis < 0: raise ValueError("negative axis are not supported") if axis > 254: @@ -239,26 +225,24 @@ def encode_axis(axis: 'ConstantNode | int | None') -> 'RawNode': return RawNode(axis) -def gen_reduce_axis_func(name: str) -> Callable[..., 'ExpressionNode']: - def _func(a: object, axis: 'ConstantNode | int | None' = None) -> 'ExpressionNode': - _axis = encode_axis(axis) +def gen_reduce_axis_func(name): + def _func(a, axis=None): + axis = encode_axis(axis) if isinstance(a, ConstantNode): return a - if isinstance(a, (bool, int_, long_, float, double, complex)): # type: ignore[misc] - _a = ConstantNode(a) - else: - _a = cast('ExpressionNode', a) - return FuncNode(name, [_a, _axis], kind=_a.astKind) + if isinstance(a, (bool, int_, long_, float, double, complex)): + a = ConstantNode(a) + return FuncNode(name, [a, axis], kind=a.astKind) return _func @ophelper -def contains_func(a: 'ExpressionNode', b: 'ExpressionNode') -> 'FuncNode': +def contains_func(a, b): return FuncNode('contains', [a, b], kind='bool') @ophelper -def div_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': +def div_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and @@ -268,7 +252,7 @@ def div_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': @ophelper -def truediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': +def truediv_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and @@ -281,12 +265,12 @@ def truediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': @ophelper -def rtruediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': +def rtruediv_op(a, b): return truediv_op(b, a) @ophelper -def pow_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'ExpressionNode': +def pow_op(a, b): if isinstance(b, ConstantNode): x = b.value @@ -302,9 +286,7 @@ def pow_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'ExpressionNode': n = int_(abs(x)) ishalfpower = int_(abs(2 * x)) % 2 - def multiply( - x: ExpressionNode | None, y: ExpressionNode - ) -> ExpressionNode: + def multiply(x, y): if x is None: return y return OpNode('mul', [x, y]) @@ -345,7 +327,7 @@ def multiply( return OpNode('pow', [a, b]) # The functions and the minimum and maximum types accepted -numpy.expm1x = numpy.expm1 # type: ignore[attr-defined] +numpy.expm1x = numpy.expm1 functions = { 'copy': func(numpy.copy), 'ones_like': func(numpy.ones_like), @@ -408,72 +390,58 @@ def multiply( } -class ExpressionNode: +class ExpressionNode(): """ An object that represents a generic number object. This implements the number special methods so that we can keep track of how this object has been used. """ - astType: ClassVar = 'generic' - astKind: Final[str] - - children: Final[tuple['ExpressionNode | RawNode', ...]] - value: Final[Any] - - def __init__( - self, - value: object | None = None, - kind: str | None = None, - children: Iterable['ExpressionNode | RawNode'] | None = None, - ) -> None: + astType = 'generic' + + def __init__(self, value=None, kind=None, children=None): self.value = value if kind is None: kind = 'none' self.astKind = kind - self.children = () if children is None else tuple(children) + if children is None: + self.children = () + else: + self.children = tuple(children) - def get_real(self) -> 'OpNode | ConstantNode': + def get_real(self): if self.astType == 'constant': return ConstantNode(complex(self.value).real) return OpNode('real', (self,), 'double') - if TYPE_CHECKING: - @property - def real(self) -> 'OpNode | ConstantNode': ... - else: - real = property(get_real) + real = property(get_real) - def get_imag(self) -> 'OpNode | ConstantNode': + def get_imag(self): if self.astType == 'constant': return ConstantNode(complex(self.value).imag) return OpNode('imag', (self,), 'double') - if TYPE_CHECKING: - @property - def imag(self) -> 'OpNode | ConstantNode': ... - else: - imag = property(get_imag) + imag = property(get_imag) - def __str__(self) -> str: + def __str__(self): return '%s(%s, %s, %s)' % (self.__class__.__name__, self.value, self.astKind, self.children) - def __repr__(self) -> str: + def __repr__(self): return self.__str__() - def __neg__(self) -> 'OpNode': + def __neg__(self): return OpNode('neg', (self,)) - def __invert__(self) -> 'OpNode': + def __invert__(self): return OpNode('invert', (self,)) - def __pos__(self) -> 'Self': + def __pos__(self): return self # The next check is commented out. See #24 for more info. - def __bool__(self) -> NoReturn: + def __bool__(self): raise TypeError("You can't use Python's standard boolean operators in " "NumExpr expressions. You should use their bitwise " "counterparts instead: '&' instead of 'and', " @@ -503,74 +471,64 @@ def __bool__(self) -> NoReturn: __gt__ = binop('gt', kind='bool') __ge__ = binop('ge', kind='bool') - __eq__ = binop('eq', kind='bool') # type: ignore[assignment] - __ne__ = binop('ne', kind='bool') # type: ignore[assignment] + __eq__ = binop('eq', kind='bool') + __ne__ = binop('ne', kind='bool') __lt__ = binop('gt', reversed=True, kind='bool') __le__ = binop('ge', reversed=True, kind='bool') class LeafNode(ExpressionNode): - leafNode: ClassVar = True + leafNode = True class VariableNode(LeafNode): - astType: ClassVar = 'variable' - - def __init__( - self, - value: object | None = None, - kind: str | None = None, - children: None = None, - ) -> None: + astType = 'variable' + + def __init__(self, value=None, kind=None, children=None): LeafNode.__init__(self, value=value, kind=kind) -class RawNode: +class RawNode(): """ Used to pass raw integers to interpreter. For instance, for selecting what function to use in func1. Purposely don't inherit from ExpressionNode, since we don't wan't this to be used for anything but being walked. """ - astType: ClassVar = 'raw' - astKind: ClassVar = 'none' + astType = 'raw' + astKind = 'none' - def __init__(self, value: object) -> None: + def __init__(self, value): self.value = value self.children = () - def __str__(self) -> str: + def __str__(self): return 'RawNode(%s)' % (self.value,) __repr__ = __str__ class ConstantNode(LeafNode): - astType: ClassVar = 'constant' + astType = 'constant' - def __init__(self, value: object | None = None, children: None = None): + def __init__(self, value=None, children=None): kind = getKind(value) # Python float constants are double precision by default if kind == 'float' and isinstance(value, float): kind = 'double' LeafNode.__init__(self, value=value, kind=kind) - def __neg__(self) -> 'ConstantNode': # type: ignore[override] + def __neg__(self): return ConstantNode(-self.value) - def __invert__(self) -> 'ConstantNode': # type: ignore[override] + def __invert__(self): return ConstantNode(~self.value) class OpNode(ExpressionNode): - astType: ClassVar = 'op' - - def __init__( - self, - opcode: str | None = None, - args: Iterable[ExpressionNode | RawNode] | None = None, - kind: str | None = None, - ) -> None: + astType = 'op' + + def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) if kind=='bool': # handle bool*bool and bool+bool cases @@ -580,12 +538,7 @@ def __init__( class FuncNode(OpNode): - def __init__( - self, - opcode: str | None = None, - args: Iterable[ExpressionNode | RawNode] | None = None, - kind: str | None = None, - ) -> None: + def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) if opcode in ("isnan", "isfinite", "isinf", "signbit"): # bodge for boolean return functions diff --git a/numexpr/interpreter.pyi b/numexpr/interpreter.pyi deleted file mode 100644 index 90f8d80..0000000 --- a/numexpr/interpreter.pyi +++ /dev/null @@ -1,52 +0,0 @@ -from collections.abc import Sequence -from typing import Any, Final, Literal, TypeAlias - -import numpy.typing as npt - -_VMLAccuracyMode: TypeAlias = Literal[0, 1, 2, 3] - -MAX_THREADS: Final[int] = ... -__BLOCK_SIZE1__: Final[int] = ... - -#ifdef USE_VML -def _get_vml_version() -> str: ... -def _set_vml_accuracy_mode(mode_in: _VMLAccuracyMode, /) -> _VMLAccuracyMode: ... -def _set_vml_num_threads(max_num_threads: int, /) -> None: ... -def _get_vml_num_threads() -> int: ... -#endif -def _get_num_threads() -> int: ... -def _set_num_threads(num_threads: int, /) -> int: ... - -allaxes: Final = 255 -funccodes: Final[dict[bytes, int]] = ... -maxdims: Final[int] = ... -opcodes: Final[dict[bytes, int]] = ... -use_vml: Final[bool] = ... - -class NumExpr: - signature: Final[bytes] - constsig: Final[bytes] - tempsig: Final[bytes] - fullsig: Final[bytes] - - program: Final[bytes] - constants: Final[Sequence[Any]] - input_names: Final[Sequence[str]] - - def __init__( - self, - signature: bytes, - tempsig: bytes, - program: bytes, - constants: Sequence[Any] = ..., - input_names: Sequence[str] | None = None, - ) -> None: ... - def run( - self, - *args: Any, - casting: str = ..., - order: str = ..., - ex_uses_vml: bool = ..., - out: npt.NDArray[Any] = ..., - ) -> Any: ... - __call__ = run diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 77f655b..8b80737 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -14,20 +14,17 @@ import re import sys import threading -from typing import (TYPE_CHECKING, Any, ClassVar, Final, Generator, Iterable, - Iterator, Sequence, TypeAlias) - -if TYPE_CHECKING: - from typing_extensions import Unpack +from typing import Dict, Optional import numpy -is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE from numexpr import expressions, interpreter, use_vml from numexpr.utils import CacheDict, ContextDict # Declare a double type that does not exist in Python space double = numpy.double +double = numpy.double int_ = numpy.int32 long_ = numpy.int64 @@ -89,9 +86,7 @@ ] - - -class ASTNode: +class ASTNode(): """Abstract Syntax Tree node. Members: @@ -103,25 +98,16 @@ class ASTNode: children -- the children below this node reg -- the register assigned to the result for this node. """ - cmpnames: ClassVar = 'astType', 'astKind', 'value', 'children' - - astType: str - astKind: str - value: Any - children: tuple['ASTNode', ...] - reg: 'Register | None' - - def __init__(self, astType: str = 'generic', - astKind: str = 'unknown', - value: object | None = None, - children: Iterable['ASTNode'] = ()) -> None: + cmpnames = ['astType', 'astKind', 'value', 'children'] + + def __init__(self, astType='generic', astKind='unknown', value=None, children=()): self.astType = astType self.astKind = astKind self.value = value self.children = tuple(children) self.reg = None - def __eq__(self, other: 'ASTNode') -> bool: # type: ignore[override] + def __eq__(self, other): if self.astType == 'alias': self = self.value if other.astType == 'alias': @@ -133,50 +119,50 @@ def __eq__(self, other: 'ASTNode') -> bool: # type: ignore[override] return False return True - def __lt__(self, other: 'ASTNode') -> bool: + def __lt__(self,other): # RAM: this is a fix for issue #88 whereby sorting on constants # that may be of astKind == 'complex' but type(self.value) == int or float # Here we let NumPy sort as it will cast data properly for comparison # when the Python built-ins will raise an error. if self.astType == 'constant': if self.astKind == other.astKind: - return bool(numpy.array(self.value) < numpy.array(other.value)) + return numpy.array(self.value) < numpy.array(other.value) return self.astKind < other.astKind else: raise TypeError('Sorting not implemented for astType: %s'%self.astType) - def __hash__(self) -> int: + def __hash__(self): if self.astType == 'alias': self = self.value return hash((self.astType, self.astKind, self.value, self.children)) - def __str__(self) -> str: + def __str__(self): return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind, self.value, self.children, self.reg) - def __repr__(self) -> str: + def __repr__(self): return '' % id(self) - def key(self) -> tuple[str, str, Any, tuple['ASTNode', ...]]: + def key(self): return (self.astType, self.astKind, self.value, self.children) - def typecode(self) -> str: + def typecode(self): return kind_to_typecode[self.astKind] - def postorderWalk(self) -> Iterator['ASTNode']: + def postorderWalk(self): for c in self.children: for w in c.postorderWalk(): yield w yield self - def allOf(self, *astTypes: str) -> Iterator['ASTNode']: - astTypeSet = set(astTypes) + def allOf(self, *astTypes): + astTypes = set(astTypes) for w in self.postorderWalk(): - if w.astType in astTypeSet: + if w.astType in astTypes: yield w -def expressionToAST(ex: expressions.ExpressionNode | expressions.RawNode) -> ASTNode: +def expressionToAST(ex): """Take an expression tree made out of expressions.ExpressionNode, and convert to an AST tree. @@ -187,7 +173,7 @@ def expressionToAST(ex: expressions.ExpressionNode | expressions.RawNode) -> AST [expressionToAST(c) for c in ex.children]) -def sigPerms(s: str) -> Generator[str, None, None]: +def sigPerms(s): """Generate all possible signatures derived by upcasting the given signature. """ @@ -206,7 +192,7 @@ def sigPerms(s: str) -> Generator[str, None, None]: yield s -def typeCompileAst(ast: ASTNode) -> ASTNode: +def typeCompileAst(ast): """Assign appropriate types to each node in the AST. Will convert opcodes and functions to appropriate upcast version, @@ -249,7 +235,7 @@ def typeCompileAst(ast: ASTNode) -> ASTNode: [typeCompileAst(c) for c in children]) -class Register: +class Register(): """Abstraction for a register in the VM. Members: @@ -260,18 +246,13 @@ class Register: None if no number assigned yet. """ - node: Final[ASTNode] - temporary: bool - immediate: bool - n: int | None - - def __init__(self, astnode: ASTNode, temporary: bool = False) -> None: + def __init__(self, astnode, temporary=False): self.node = astnode self.temporary = temporary self.immediate = False self.n = None - def __str__(self) -> str: + def __str__(self): if self.temporary: name = 'Temporary' else: @@ -279,7 +260,7 @@ def __str__(self) -> str: return '%s(%s, %s, %s)' % (name, self.node.astType, self.node.astKind, self.n,) - def __repr__(self) -> str: + def __repr__(self): return self.__str__() @@ -288,11 +269,11 @@ class Immediate(Register): a register. """ - def __init__(self, astnode: ASTNode) -> None: + def __init__(self, astnode): Register.__init__(self, astnode) self.immediate = True - def __str__(self) -> str: + def __str__(self): return 'Immediate(%d)' % (self.node.value,) @@ -301,8 +282,7 @@ def __str__(self) -> str: _attr_pat = r'\.\b(?!(real|imag|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|(\d*j))\b)' _blacklist_re = re.compile(f'{_flow_pat}|{_dunder_pat}|{_attr_pat}') -def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], - sanitize: bool = True) -> expressions.ExpressionNode: +def stringToExpression(s, types, context, sanitize: bool=True): """Given a string, convert it to a tree of ExpressionNode's. """ # sanitize the string for obvious attack vectors that NumExpr cannot @@ -326,7 +306,7 @@ def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], flags = 0 c = compile(s, '', 'eval', flags) # make VariableNode's for the names - names: dict[str, Any] = {} + names = {} for name in c.co_names: if name == "None": names[name] = None @@ -340,10 +320,10 @@ def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], names.update(expressions.functions) # now build the expression - ex: expressions.ExpressionNode = eval(c, names) + ex = eval(c, names) if expressions.isConstant(ex): - ex = expressions.ConstantNode(ex) + ex = expressions.ConstantNode(ex, expressions.getKind(ex)) elif not isinstance(ex, expressions.ExpressionNode): raise TypeError("unsupported expression type: %s" % type(ex)) finally: @@ -351,12 +331,12 @@ def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], return ex -def isReduction(ast: ASTNode) -> bool: +def isReduction(ast): prefixes = (b'sum_', b'prod_', b'min_', b'max_') return any(ast.value.startswith(p) for p in prefixes) -def getInputOrder(ast: ASTNode, input_order: list[str] | None = None) -> list[ASTNode]: +def getInputOrder(ast, input_order=None): """ Derive the input order of the variables in an expression. """ @@ -379,16 +359,16 @@ def getInputOrder(ast: ASTNode, input_order: list[str] | None = None) -> list[AS return ordered_variables -def convertConstantToKind(x: Any, kind: str) -> Any: +def convertConstantToKind(x, kind): # Exception for 'float' types that will return the NumPy float32 type if kind == 'float': return numpy.float32(x) - elif isinstance(x, str): + elif isinstance(x,str): return x.encode('ascii') return kind_to_type[kind](x) -def getConstants(ast: ASTNode) -> tuple[list[ASTNode], list[Any]]: +def getConstants(ast): """ RAM: implemented magic method __lt__ for ASTNode to fix issues #88 and #209. The following test code works now, as does the test suite. @@ -397,17 +377,14 @@ def getConstants(ast: ASTNode) -> tuple[list[ASTNode], list[Any]]: a = 1 + 3j; b = 5.0 ne.evaluate('a*2 + 15j - b') """ - constant_registers = {node.reg for node in ast.allOf("constant") - if node.reg is not None} - constants_order = sorted(r.node for r in constant_registers) + constant_registers = set([node.reg for node in ast.allOf("constant")]) + constants_order = sorted([r.node for r in constant_registers]) constants = [convertConstantToKind(a.value, a.astKind) for a in constants_order] return constants_order, constants -# unused? -def sortNodesByOrder(nodes: Iterable[ASTNode], - order: Iterable[tuple[int, str, int]]) -> list[ASTNode]: +def sortNodesByOrder(nodes, order): order_map = {} for i, (_, v, _) in enumerate(order): order_map[v] = i @@ -416,12 +393,11 @@ def sortNodesByOrder(nodes: Iterable[ASTNode], return [a[1] for a in dec_nodes] -def assignLeafRegisters(inodes: Iterable[ASTNode], - registerMaker: type[Register]) -> None: +def assignLeafRegisters(inodes, registerMaker): """ Assign new registers to each of the leaf nodes. """ - leafRegisters: dict[tuple[object, ...], Register] = {} + leafRegisters = {} for node in inodes: key = node.key() if key in leafRegisters: @@ -430,8 +406,7 @@ def assignLeafRegisters(inodes: Iterable[ASTNode], node.reg = leafRegisters[key] = registerMaker(node) -def assignBranchRegisters(inodes: Iterable[ASTNode], - registerMaker: type[Register]) -> None: +def assignBranchRegisters(inodes, registerMaker): """ Assign temporary registers to each of the branch nodes. """ @@ -439,11 +414,11 @@ def assignBranchRegisters(inodes: Iterable[ASTNode], node.reg = registerMaker(node, temporary=True) -def collapseDuplicateSubtrees(ast: ASTNode) -> list[ASTNode]: +def collapseDuplicateSubtrees(ast): """ Common subexpression elimination. """ - seen: dict[ASTNode, ASTNode] = {} + seen = {} aliases = [] for a in ast.allOf('op'): if a in seen: @@ -462,66 +437,64 @@ def collapseDuplicateSubtrees(ast: ASTNode) -> list[ASTNode]: return aliases -def optimizeTemporariesAllocation(ast: ASTNode) -> None: +def optimizeTemporariesAllocation(ast): """ Attempt to minimize the number of temporaries needed, by reusing old ones. """ - nodes = [n for n in ast.postorderWalk() if n.reg and n.reg.temporary] - users_of: dict[Register, set[ASTNode]] = {n.reg: set() for n in nodes if n.reg} + nodes = [n for n in ast.postorderWalk() if n.reg.temporary] + users_of = dict((n.reg, set()) for n in nodes) + node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary)) + for n in nodes) if nodes and nodes[-1] is not ast: nodes_to_check = nodes + [ast] else: nodes_to_check = nodes for n in nodes_to_check: for c in n.children: - if c.reg and c.reg.temporary: + if c.reg.temporary: users_of[c.reg].add(n) - unused: dict[str, set[Register]] = {tc: set() for tc in scalar_constant_kinds} + unused = dict([(tc, set()) for tc in scalar_constant_kinds]) for n in nodes: for c in n.children: reg = c.reg - if reg and reg.temporary: + if reg.temporary: users = users_of[reg] users.discard(n) if not users: unused[reg.node.astKind].add(reg) if unused[n.astKind]: reg = unused[n.astKind].pop() - if n.reg: - users_of[reg] = users_of[n.reg] + users_of[reg] = users_of[n.reg] n.reg = reg -def setOrderedRegisterNumbers(order: Sequence[ASTNode], start: int) -> int: +def setOrderedRegisterNumbers(order, start): """ Given an order of nodes, assign register numbers. """ for i, node in enumerate(order): - assert node.reg is not None node.reg.n = start + i return start + len(order) -def setRegisterNumbersForTemporaries(ast: ASTNode, start: int) -> tuple[int, str]: +def setRegisterNumbersForTemporaries(ast, start): """ Assign register numbers for temporary registers, keeping track of aliases and handling immediate operands. """ seen = 0 signature = '' - aliases: list[ASTNode] = [] + aliases = [] for node in ast.postorderWalk(): if node.astType == 'alias': aliases.append(node) node = node.value - reg = node.reg - if not reg: - continue - if reg.immediate: - reg.n = node.value + if node.reg.immediate: + node.reg.n = node.value continue + reg = node.reg if reg.n is None: reg.n = start + seen seen += 1 @@ -531,10 +504,7 @@ def setRegisterNumbersForTemporaries(ast: ASTNode, start: int) -> tuple[int, str return start + seen, signature -_ThreeAddressForm: TypeAlias = tuple[bytes, Register, 'Unpack[tuple[Register, ...]]'] - - -def convertASTtoThreeAddrForm(ast: ASTNode) -> list[_ThreeAddressForm]: +def convertASTtoThreeAddrForm(ast): """ Convert an AST to a three address form. @@ -544,59 +514,55 @@ def convertASTtoThreeAddrForm(ast: ASTNode) -> list[_ThreeAddressForm]: I suppose this should be called three register form, but three address form is found in compiler theory. """ - return [(node.value, node.reg, *(c.reg for c in node.children if c.reg)) - for node in ast.allOf('op') if node.reg] + return [(node.value, node.reg) + tuple([c.reg for c in node.children]) + for node in ast.allOf('op')] -def compileThreeAddrForm(program: Iterable[_ThreeAddressForm]) -> bytes: +def compileThreeAddrForm(program): """ Given a three address form of the program, compile it a string that the VM understands. """ - def nToChr(reg: Register | None) -> bytes: + def nToChr(reg): if reg is None: return b'\xff' - assert reg.n is not None - if reg.n < 0: + elif reg.n < 0: raise ValueError("negative value for register number %s" % reg.n) - return bytes([reg.n]) + else: + return bytes([reg.n]) - def quadrupleToString(opcode: bytes, - store: Register | None, - a1: Register | None = None, - a2: Register | None = None) -> bytes: + def quadrupleToString(opcode, store, a1=None, a2=None): cop = chr(interpreter.opcodes[opcode]).encode('latin_1') cs = nToChr(store) ca1 = nToChr(a1) ca2 = nToChr(a2) return cop + cs + ca1 + ca2 - def toString(args: _ThreeAddressForm) -> bytes: - opcode: bytes = args[0] - store: Register = args[1] - a1: Register | None = args[2] if len(args) > 2 else None - a2: Register | None = args[3] if len(args) > 3 else None - an: tuple[Register, ...] = args[4:] if len(args) > 4 else () + def toString(args): + while len(args) < 4: + args += (None,) + opcode, store, a1, a2 = args[:4] s = quadrupleToString(opcode, store, a1, a2) l = [s] - while an: - s = quadrupleToString(b'noop', *an[:3]) + args = args[4:] + while args: + s = quadrupleToString(b'noop', *args[:3]) l.append(s) - an = an[3:] + args = args[3:] return b''.join(l) prog_str = b''.join([toString(t) for t in program]) return prog_str -context_info: Final = ( +context_info = [ ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'), - ('truediv', (False, True, 'auto'), 'auto'), -) + ('truediv', (False, True, 'auto'), 'auto') +] -def getContext(kwargs: dict[str, Any], _frame_depth: int = 1) -> dict[str, Any]: +def getContext(kwargs, _frame_depth=1): d = kwargs.copy() context = {} for name, allowed, default in context_info: @@ -615,23 +581,11 @@ def getContext(kwargs: dict[str, Any], _frame_depth: int = 1) -> dict[str, Any]: return context -_PrecompileResult: TypeAlias = tuple[ - list[_ThreeAddressForm], # threeAddrProgram - str, # inputsig - str, # tempsig - list[Any], # constants - tuple[str, ...], # input_names -] - - -def precompile(ex: expressions.ExpressionNode | str, - signature: Iterable[tuple[str, type]] = (), - context: dict[str, Any] = {}, - sanitize: bool = True) -> _PrecompileResult: +def precompile(ex, signature=(), context={}, sanitize: bool=True): """ Compile the expression to an intermediate form. """ - types: dict[str, type] = dict(signature) + types = dict(signature) input_order = [name for (name, type_) in signature] if isinstance(ex, str): @@ -660,16 +614,14 @@ def precompile(ex: expressions.ExpressionNode | str, input_order = getInputOrder(ast, input_order) constants_order, constants = getConstants(ast) - assert ast.reg is not None - if isReduction(ast): ast.reg.temporary = False optimizeTemporariesAllocation(ast) ast.reg.temporary = False - ast.reg.n = 0 r_output = 0 + ast.reg.n = 0 r_inputs = r_output + 1 r_constants = setOrderedRegisterNumbers(input_order, r_inputs) @@ -678,15 +630,12 @@ def precompile(ex: expressions.ExpressionNode | str, threeAddrProgram = convertASTtoThreeAddrForm(ast) input_names = tuple([a.value for a in input_order]) - inputsig = ''.join(type_to_typecode[types.get(x, default_type)] - for x in input_names) - return threeAddrProgram, inputsig, tempsig, constants, input_names + signature = ''.join(type_to_typecode[types.get(x, default_type)] + for x in input_names) + return threeAddrProgram, signature, tempsig, constants, input_names -def NumExpr(ex: expressions.ExpressionNode | str, - signature: Iterable[tuple[str, type]] = (), - sanitize: bool = True, - **kwargs: object) -> interpreter.NumExpr: +def NumExpr(ex, signature=(), sanitize: bool=True, **kwargs): """ Compile an expression built using E. variables to a function. @@ -710,21 +659,23 @@ def NumExpr(ex: expressions.ExpressionNode | str, program, constants, input_names) -def disassemble(nex: interpreter.NumExpr) -> list[list[bytes | int | None]]: +def disassemble(nex): """ Given a NumExpr object, return a list which is the program disassembled. """ - rev_opcodes = {code: op for op, code in interpreter.opcodes.items()} + rev_opcodes = {} + for op in interpreter.opcodes: + rev_opcodes[interpreter.opcodes[op]] = op r_constants = 1 + len(nex.signature) r_temps = r_constants + len(nex.constants) - def parseOp(op: bytes) -> tuple[bytes, bytes]: - name, sig, *_ = *op.rsplit(b'_', 1), b'' + def parseOp(op): + name, sig = [*op.rsplit(b'_', 1), ''][:2] return name, sig - def getArg(pc: int, offset: int) -> int | bytes | None: + def getArg(pc, offset): arg = nex.program[pc + (offset if offset < 4 else offset+1)] - _, sig = parseOp(rev_opcodes[nex.program[pc]]) + _, sig = parseOp(rev_opcodes.get(nex.program[pc])) try: code = sig[offset - 1] except IndexError: @@ -748,9 +699,9 @@ def getArg(pc: int, offset: int) -> int | bytes | None: source = [] for pc in range(0, len(nex.program), 4): - op = rev_opcodes[nex.program[pc]] + op = rev_opcodes.get(nex.program[pc]) _, sig = parseOp(op) - parsed: list[bytes | int | None] = [op] + parsed = [op] for i in range(len(sig)): parsed.append(getArg(pc, 1 + i)) while len(parsed) < 4: @@ -759,7 +710,7 @@ def getArg(pc: int, offset: int) -> int | bytes | None: return source -def getType(a: numpy.typing.NDArray[Any] | numpy.generic) -> type: +def getType(a): kind = a.dtype.kind if kind == 'b': return bool @@ -782,9 +733,7 @@ def getType(a: numpy.typing.NDArray[Any] | numpy.generic) -> type: raise ValueError("unknown type %s" % a.dtype.name) -def getExprNames(text: str, - context: dict[str, Any], - sanitize: bool = True) -> tuple[list[str], bool]: +def getExprNames(text, context, sanitize: bool=True): ex = stringToExpression(text, {}, context, sanitize) ast = expressionToAST(ex) input_order = getInputOrder(ast, None) @@ -802,10 +751,7 @@ def getExprNames(text: str, return [a.value for a in input_order], ex_uses_vml -def getArguments(names: Iterable[str], - local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - _frame_depth: int = 2) -> list[numpy.typing.NDArray[Any]]: +def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): """ Get the arguments based on the names. """ @@ -849,14 +795,14 @@ def getArguments(names: Iterable[str], def validate(ex: str, - local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - out: numpy.typing.NDArray[Any] | None = None, + local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + out: numpy.ndarray = None, order: str = 'K', casting: str = 'safe', _frame_depth: int = 2, - sanitize: bool | None = None, - **kwargs: object) -> Exception | None: + sanitize: Optional[bool] = None, + **kwargs) -> Optional[Exception]: r""" Validate a NumExpr expression with the given `local_dict` or `locals()`. Returns `None` on success and the Exception object if one occurs. Note that @@ -903,7 +849,7 @@ def validate(ex: str, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. - sanitize: bool | None + sanitize: Optional[bool] Both `validate` and by extension `evaluate` call `eval(ex)`, which is potentially dangerous on unsanitized inputs. As such, NumExpr by default performs simple sanitization, banning the character ':;[', the @@ -967,14 +913,14 @@ def validate(ex: str, return None def evaluate(ex: str, - local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - out: numpy.typing.NDArray[Any] | None = None, + local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + out: numpy.ndarray = None, order: str = 'K', casting: str = 'same_kind', - sanitize: bool | None = None, + sanitize: Optional[bool] = None, _frame_depth: int = 3, - **kwargs: object) -> numpy.typing.NDArray[Any]: + **kwargs) -> numpy.ndarray: r""" Evaluate a simple array expression element-wise using the virtual machine. @@ -1044,9 +990,9 @@ def evaluate(ex: str, else: raise e -def re_evaluate(local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - _frame_depth: int = 2) -> numpy.typing.NDArray[Any]: +def re_evaluate(local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + _frame_depth: int=2) -> numpy.ndarray: """ Re-evaluate the previous executed array expression without any check. @@ -1064,20 +1010,13 @@ def re_evaluate(local_dict: dict[str, Any] | None = None, """ if not hasattr(_numexpr_last, 'l'): _numexpr_last.l = ContextDict() - ctx: ContextDict[Any] = _numexpr_last.l try: - compiled_ex = ctx['ex'] + compiled_ex = _numexpr_last.l['ex'] except KeyError: raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`") - assert compiled_ex is not None - - argnames = ctx['argnames'] - assert argnames is not None + argnames = _numexpr_last.l['argnames'] args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth) - - kwargs = ctx['kwargs'] - assert kwargs is not None - + kwargs = _numexpr_last.l['kwargs'] # with evaluate_lock: return compiled_ex(*args, **kwargs) diff --git a/numexpr/py.typed b/numexpr/py.typed deleted file mode 100644 index e69de29..0000000 diff --git a/numexpr/tests/__init__.py b/numexpr/tests/__init__.py index 9ecc21d..f47c8cc 100644 --- a/numexpr/tests/__init__.py +++ b/numexpr/tests/__init__.py @@ -8,8 +8,7 @@ # rights to use. #################################################################### -from numexpr.tests.test_numexpr import print_versions as print_versions -from numexpr.tests.test_numexpr import test as test +from numexpr.tests.test_numexpr import print_versions, test if __name__ == '__main__': - test() # type: ignore[no-untyped-call] + test() diff --git a/numexpr/tests/conftest.py b/numexpr/tests/conftest.py index 6cf5ca1..3d32260 100644 --- a/numexpr/tests/conftest.py +++ b/numexpr/tests/conftest.py @@ -11,7 +11,7 @@ import pytest -def pytest_configure(config: pytest.Config) -> None: +def pytest_configure(config): config.addinivalue_line( "markers", "thread_unsafe: mark test as unsafe for parallel execution" ) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index b9efc87..e2c97f7 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -9,7 +9,6 @@ # rights to use. #################################################################### -# mypy: ignore-errors import os import platform diff --git a/numexpr/utils.py b/numexpr/utils.py index 83ae1bd..9e45fbe 100644 --- a/numexpr/utils.py +++ b/numexpr/utils.py @@ -15,8 +15,6 @@ import contextvars import os import subprocess -from typing import (Final, Generic, ItemsView, Iterable, Iterator, KeysView, - Literal, TypeVar, ValuesView, cast, overload) from numexpr import use_vml from numexpr.interpreter import MAX_THREADS, _get_num_threads, _set_num_threads @@ -27,25 +25,19 @@ from numexpr.interpreter import (_get_vml_num_threads, _get_vml_version, _set_vml_accuracy_mode, _set_vml_num_threads) -else: - # mypy does not understand this, whereas pyright does - _get_vml_num_threads = None # type: ignore[assignment] - _get_vml_version = None # type: ignore[assignment] - _set_vml_accuracy_mode = None # type: ignore[assignment] - _set_vml_num_threads = None # type: ignore[assignment] -def get_vml_version() -> str | None: +def get_vml_version(): """ Get the VML/MKL library version. """ - if _get_vml_version is not None: + if use_vml: return _get_vml_version() else: return None -def set_vml_accuracy_mode(mode: str | None) -> Literal['low', 'high', 'fast'] | None: +def set_vml_accuracy_mode(mode): """ Set the accuracy mode for VML operations. @@ -64,10 +56,8 @@ def set_vml_accuracy_mode(mode: str | None) -> Literal['low', 'high', 'fast'] | Returns old accuracy settings. """ - if _set_vml_accuracy_mode is not None: - acc_dict: dict[str | None, Literal[0, 1, 2, 3]] + if use_vml: acc_dict = {None: 0, 'low': 1, 'high': 2, 'fast': 3} - acc_reverse_dict: dict[int, Literal['low', 'high', 'fast']] acc_reverse_dict = {1: 'low', 2: 'high', 3: 'fast'} if mode not in list(acc_dict.keys()): raise ValueError( @@ -78,7 +68,7 @@ def set_vml_accuracy_mode(mode: str | None) -> Literal['low', 'high', 'fast'] | return None -def set_vml_num_threads(nthreads: int) -> None: +def set_vml_num_threads(nthreads): """ Suggests a maximum number of threads to be used in VML operations. @@ -90,11 +80,11 @@ def set_vml_num_threads(nthreads: int) -> None: for more info about it. """ - if _set_vml_num_threads is not None: + if use_vml: _set_vml_num_threads(nthreads) pass -def get_vml_num_threads() -> int | None: +def get_vml_num_threads(): """ Gets the maximum number of threads to be used in VML operations. @@ -106,11 +96,11 @@ def get_vml_num_threads() -> int | None: for more info about it. """ - if _get_vml_num_threads is not None: + if use_vml: return _get_vml_num_threads() return None -def set_num_threads(nthreads: int) -> int: +def set_num_threads(nthreads): """ Sets a number of threads to be used in operations. @@ -122,13 +112,13 @@ def set_num_threads(nthreads: int) -> int: old_nthreads = _set_num_threads(nthreads) return old_nthreads -def get_num_threads() -> int: +def get_num_threads(): """ Gets the number of threads currently in use for operations. """ return _get_num_threads() -def _init_num_threads() -> int: +def _init_num_threads(): """ Detects the environment variable 'NUMEXPR_MAX_THREADS' to set the threadpool size, and if necessary the slightly redundant 'NUMEXPR_NUM_THREADS' or @@ -178,7 +168,7 @@ def _init_num_threads() -> int: return requested_threads -def detect_number_of_cores() -> int: +def detect_number_of_cores(): """ Detects the number of cores on a system. Cribbed from pp. """ @@ -187,7 +177,7 @@ def detect_number_of_cores() -> int: if "SC_NPROCESSORS_ONLN" in os.sysconf_names: # Linux & Unix: ncpus = os.sysconf("SC_NPROCESSORS_ONLN") - if isinstance(ncpus, int) and ncpus > 0: # type: ignore[redundant-expr] + if isinstance(ncpus, int) and ncpus > 0: return ncpus else: # OSX: return int(subprocess.check_output(["sysctl", "-n", "hw.ncpu"])) @@ -201,7 +191,7 @@ def detect_number_of_cores() -> int: return 1 # Default -def detect_number_of_threads() -> int: +def detect_number_of_threads(): """ DEPRECATED: use `_init_num_threads` instead. If this is modified, please update the note in: https://github.com/pydata/numexpr/wiki/Numexpr-Users-Guide @@ -221,90 +211,64 @@ def detect_number_of_threads() -> int: return nthreads -_KT = TypeVar('_KT') -_VT = TypeVar('_VT') - - -class CacheDict(dict[_KT, _VT], Generic[_KT, _VT]): +class CacheDict(dict): """ A dictionary that prevents itself from growing too much. """ - maxentries: Final[int] - - def __init__(self, maxentries: int) -> None: + def __init__(self, maxentries): self.maxentries = maxentries - super().__init__(self) + super(CacheDict, self).__init__(self) - def __setitem__(self, key: _KT, value: _VT) -> None: + def __setitem__(self, key, value): # Protection against growing the cache too much if len(self) > self.maxentries: # Remove a 10% of (arbitrary) elements from the cache entries_to_remove = self.maxentries // 10 for k in list(self.keys())[:entries_to_remove]: - super().__delitem__(k) - super().__setitem__(key, value) + super(CacheDict, self).__delitem__(k) + super(CacheDict, self).__setitem__(key, value) -class ContextDict(Generic[_VT]): +class ContextDict: """ A context aware version dictionary """ - _context_data: contextvars.ContextVar[dict[str, _VT]] - - def __init__(self) -> None: + def __init__(self): self._context_data = contextvars.ContextVar('context_data', default={}) - @overload - def set(self, key: None = None, value: None = None, **kwargs: _VT) -> None: ... - @overload - def set(self, key: str, value: _VT, **kwargs: _VT) -> None: ... - def set(self, key: str | None = None, value: _VT | None = None, **kwargs: _VT) -> None: + def set(self, key=None, value=None, **kwargs): data = self._context_data.get().copy() if key is not None: - data[key] = cast('_VT', value) + data[key] = value for k, v in kwargs.items(): data[k] = v self._context_data.set(data) - @overload - def get(self, key: str, default: _VT) -> _VT: ... - @overload - def get(self, key: str, default: _VT | None = None) -> _VT | None: ... - def get(self, key: str, default: _VT | None = None) -> _VT | None: + def get(self, key, default=None): data = self._context_data.get() return data.get(key, default) - def delete(self, key: str) -> None: + def delete(self, key): data = self._context_data.get().copy() if key in data: del data[key] self._context_data.set(data) - def clear(self) -> None: + def clear(self): self._context_data.set({}) - def all(self) -> dict[str, _VT]: + def all(self): return self._context_data.get() - @overload - def update(self, **kwargs: _VT) -> None: ... - @overload - def update(self, other: dict[str, _VT], /, **kwargs: _VT) -> None: ... - @overload - def update(self, other: Iterable[tuple[str, _VT]], /, **kwargs: _VT) -> None: ... - def update( # type: ignore[misc] # false positive mypy error - self, - *args: dict[str, _VT] | Iterable[tuple[str, _VT]], - **kwargs: _VT, - ) -> None: + def update(self, *args, **kwargs): data = self._context_data.get().copy() if args: - if len(args) != 1: + if len(args) > 1: raise TypeError(f"update() takes at most 1 positional argument ({len(args)} given)") other = args[0] if isinstance(other, dict): @@ -316,32 +280,32 @@ def update( # type: ignore[misc] # false positive mypy error data.update(kwargs) self._context_data.set(data) - def keys(self) -> KeysView[str]: + def keys(self): return self._context_data.get().keys() - def values(self) -> ValuesView[_VT]: + def values(self): return self._context_data.get().values() - def items(self) -> ItemsView[str, _VT]: + def items(self): return self._context_data.get().items() - def __getitem__(self, key: str) -> _VT | None: + def __getitem__(self, key): return self.get(key) - def __setitem__(self, key: str, value: _VT) -> None: + def __setitem__(self, key, value): self.set(key, value) - def __delitem__(self, key: str) -> None: + def __delitem__(self, key): self.delete(key) - def __contains__(self, key: str) -> bool: + def __contains__(self, key): return key in self._context_data.get() - def __len__(self) -> int: + def __len__(self): return len(self._context_data.get()) - def __iter__(self) -> Iterator[str]: + def __iter__(self): return iter(self._context_data.get()) - def __repr__(self) -> str: + def __repr__(self): return repr(self._context_data.get()) diff --git a/numexpr/version.pyi b/numexpr/version.pyi deleted file mode 100644 index 2dfa994..0000000 --- a/numexpr/version.pyi +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Final - -__version__: Final[str] = ... -version: Final[str] = ... -numpy_build_version: Final[str] = ... -platform_machine: Final[str] = ... diff --git a/pyproject.toml b/pyproject.toml index e5a7d18..264a999 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ classifiers = [ "Intended Audience :: Science/Research", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", - "Typing :: Typed", "Operating System :: Microsoft :: Windows", "Operating System :: Unix", "Programming Language :: Python :: 3", @@ -63,23 +62,3 @@ test-command = ["python -m pytest --pyargs numexpr"] [[tool.cibuildwheel.overrides]] select = "cp31*t-*" test-command = ["python -m pytest --parallel-threads=4 --pyargs numexpr"] - - -[tool.mypy] -files = ["numexpr/*.py"] -exclude = ["^bench/", "^build/", "^doc/", "^issues/"] -strict = true -disable_error_code = ["no-any-return"] -enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] -warn_unreachable = false -local_partial_types = true -allow_redefinition_new = true - - -[tool.pyright] -include = ["numexpr"] -exclude = ["bench", "build", "doc", "issues", "numexpr/tests", "setup.py"] -ignore = [".venv", "numexpr/tests/test_numexpr.py", "setup.py"] -stubPath = "." -reportPrivateUsage = false -reportConstantRedefinition = false diff --git a/setup.py b/setup.py index beeece3..64d9f20 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,6 @@ # rights to use. #################################################################### -# mypy: ignore-errors - import configparser import os import os.path as op From 7cadd0f93b4bcd10bd585bae3bcfc5f9f8c42306 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 15:50:59 +0200 Subject: [PATCH 156/166] Revert "Merge pull request #534 from jorenham/static-typing" This reverts commit bce38b4317746a3e7a33c4489a3bc81898fd7c36, reversing changes made to cd730c826fa2ac16d145703a94a90b56e158cd5d. --- .github/workflows/typecheck.yml | 36 --- .pre-commit-config.yaml | 13 +- MANIFEST.in | 2 +- RELEASE_NOTES.rst | 1 - numexpr/__init__.py | 50 ++-- numexpr/cpuinfo.py | 513 +++++++++++++++----------------- numexpr/expressions.py | 225 ++++++-------- numexpr/interpreter.pyi | 52 ---- numexpr/necompiler.py | 297 ++++++++---------- numexpr/py.typed | 0 numexpr/tests/__init__.py | 5 +- numexpr/tests/conftest.py | 2 +- numexpr/tests/test_numexpr.py | 1 - numexpr/utils.py | 116 +++----- numexpr/version.pyi | 6 - pyproject.toml | 21 -- setup.py | 2 - 17 files changed, 510 insertions(+), 832 deletions(-) delete mode 100644 .github/workflows/typecheck.yml delete mode 100644 numexpr/interpreter.pyi delete mode 100644 numexpr/py.typed delete mode 100644 numexpr/version.pyi diff --git a/.github/workflows/typecheck.yml b/.github/workflows/typecheck.yml deleted file mode 100644 index a6ca065..0000000 --- a/.github/workflows/typecheck.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: Validate static types -permissions: read-all - -on: - pull_request: - paths: - - .github/workflows/typecheck.yml - - numexpr/* - - pyproject.toml - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - typecheck_quaddtype: - runs-on: ubuntu-latest - timeout-minutes: 2 - - steps: - - uses: actions/checkout@v5.0.0 - - - uses: astral-sh/setup-uv@v6.7.0 - with: - activate-environment: true - python-version: "3.10" - - - name: install - run: uv pip install mypy pyright pytest . - - - name: pyright - run: pyright - - - name: mypy - run: mypy --no-incremental --cache-dir=/dev/null . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 35c3d40..3235b5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,10 +18,9 @@ repos: hooks: - id: isort -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.18.2 - hooks: - - id: mypy - args: [--config-file=pyproject.toml] - exclude: ^(bench/|build/|doc/|issues/|setup.py) - additional_dependencies: [numpy, pytest] +# Too many things to fix, let's just ignore it for now +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v1.8.0 +# hooks: +# - id: mypy +# exclude: ^(docs/|setup.py) diff --git a/MANIFEST.in b/MANIFEST.in index 886a494..4ec8d9f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,7 @@ include MANIFEST.in VERSION include *.rst *.txt *.cfg site.cfg.example -recursive-include numexpr *.cpp *.hpp *.py *.pyi py.typed +recursive-include numexpr *.cpp *.hpp *.py recursive-include numexpr/win32 *.c *.h exclude numexpr/__config__.py RELEASING.txt site.cfg diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 1b11c43..0294048 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -25,7 +25,6 @@ Changes from 2.13.0 to 2.13.1 * Patch to maximum/minimum functions in order to match NumPy NaN handling * Patch to convert '+'->'|' and '*'->'&' for booleans - Changes from 2.12.1 to 2.13.0 ----------------------------- diff --git a/numexpr/__init__.py b/numexpr/__init__.py index af9defc..63bb9e9 100644 --- a/numexpr/__init__.py +++ b/numexpr/__init__.py @@ -21,62 +21,46 @@ """ -from typing import TYPE_CHECKING, Final +from numexpr.interpreter import __BLOCK_SIZE1__, MAX_THREADS, use_vml -if TYPE_CHECKING: - import unittest - -# the `import _ as _` are needed for mypy to understand these are re-exports - -from numexpr.interpreter import __BLOCK_SIZE1__ as __BLOCK_SIZE1__ -from numexpr.interpreter import MAX_THREADS as MAX_THREADS -from numexpr.interpreter import use_vml as use_vml - -is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE # cpuinfo imports were moved into the test submodule function that calls them # to improve import times. -from numexpr.expressions import E as E -from numexpr.necompiler import NumExpr as NumExpr -from numexpr.necompiler import disassemble as disassemble -from numexpr.necompiler import evaluate as evaluate -from numexpr.necompiler import re_evaluate as re_evaluate -from numexpr.necompiler import validate as validate -from numexpr.utils import _init_num_threads -from numexpr.utils import detect_number_of_cores as detect_number_of_cores -from numexpr.utils import detect_number_of_threads as detect_number_of_threads -from numexpr.utils import get_num_threads as get_num_threads -from numexpr.utils import get_vml_version as get_vml_version -from numexpr.utils import set_num_threads as set_num_threads -from numexpr.utils import set_vml_accuracy_mode as set_vml_accuracy_mode -from numexpr.utils import set_vml_num_threads as set_vml_num_threads +from numexpr.expressions import E +from numexpr.necompiler import (NumExpr, disassemble, evaluate, re_evaluate, + validate) +from numexpr.utils import (_init_num_threads, detect_number_of_cores, + detect_number_of_threads, get_num_threads, + get_vml_version, set_num_threads, + set_vml_accuracy_mode, set_vml_num_threads) # Detect the number of cores -ncores: Final = detect_number_of_cores() +ncores = detect_number_of_cores() # Initialize the number of threads to be used -nthreads: Final = _init_num_threads() +nthreads = _init_num_threads() # The default for VML is 1 thread (see #39) # set_vml_num_threads(1) -from . import version as version +from . import version -__version__: Final = version.version +__version__ = version.version -def print_versions() -> None: +def print_versions(): """Print the versions of software that numexpr relies on.""" try: import numexpr.tests - return numexpr.tests.print_versions() # type: ignore[no-untyped-call] + return numexpr.tests.print_versions() except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') -def test(verbosity: int = 1) -> "unittest.result.TestResult": +def test(verbosity=1): """Run all the tests in the test suite.""" try: import numexpr.tests - return numexpr.tests.test(verbosity=verbosity) # type: ignore[no-untyped-call] + return numexpr.tests.test(verbosity=verbosity) except ImportError: # To maintain Python 2.6 compatibility we have simple error handling raise ImportError('`numexpr.tests` could not be imported, likely it was excluded from the distribution.') diff --git a/numexpr/cpuinfo.py b/numexpr/cpuinfo.py index 9a4e5be..897a4ca 100755 --- a/numexpr/cpuinfo.py +++ b/numexpr/cpuinfo.py @@ -29,33 +29,25 @@ import re import subprocess import sys +import types import warnings -from typing import (Any, Callable, ClassVar, Container, Final, Generator, - NoReturn, Sequence, TypeAlias, TypeVar, overload) -_CMD: TypeAlias = str | Sequence[str] -_Statuses: TypeAlias = Container[int] +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE -is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE - -def getoutput(cmd: _CMD, - successful_status: _Statuses = (0,), - stacklevel: int = 1) -> tuple[bool, bytes]: +def getoutput(cmd, successful_status=(0,), stacklevel=1): try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, _ = p.communicate() status = p.returncode except EnvironmentError as e: warnings.warn(str(e), UserWarning, stacklevel=stacklevel) - return False, b'' + return False, '' if os.WIFEXITED(status) and os.WEXITSTATUS(status) in successful_status: return True, output return False, output -def command_info(successful_status: _Statuses = (0,), - stacklevel: int = 1, - **kw: _CMD) -> dict[str, bytes]: +def command_info(successful_status=(0,), stacklevel=1, **kw): info = {} for key in kw: ok, output = getoutput(kw[key], successful_status=successful_status, @@ -65,9 +57,7 @@ def command_info(successful_status: _Statuses = (0,), return info -def command_by_line(cmd: _CMD, - successful_status: _Statuses = (0,), - stacklevel: int = 1) -> Generator[str, None, None]: +def command_by_line(cmd, successful_status=(0,), stacklevel=1): ok, output = getoutput(cmd, successful_status=successful_status, stacklevel=stacklevel + 1) if not ok: @@ -80,10 +70,8 @@ def command_by_line(cmd: _CMD, yield line.strip() -def key_value_from_command(cmd: _CMD, - sep: str, - successful_status: _Statuses = (0,), - stacklevel: int = 1) -> dict[str, str]: +def key_value_from_command(cmd, sep, successful_status=(0,), + stacklevel=1): d = {} for line in command_by_line(cmd, successful_status=successful_status, stacklevel=stacklevel + 1): @@ -93,25 +81,18 @@ def key_value_from_command(cmd: _CMD, return d -_T = TypeVar('_T') - - -class CPUInfoBase: +class CPUInfoBase(object): """Holds CPU information and provides methods for requiring the availability of various CPU features. """ - @overload - def _try_call(self, func: Callable[..., NoReturn]) -> None: ... - @overload - def _try_call(self, func: Callable[[], _T]) -> _T | None: ... - def _try_call(self, func: Callable[[], _T]) -> _T | None: + def _try_call(self, func): try: return func() except: - return None + pass - def __getattr__(self, name: str) -> Callable[..., Any]: + def __getattr__(self, name): if not name.startswith('_'): if hasattr(self, '_' + name): attr = getattr(self, '_' + name) @@ -121,27 +102,25 @@ def __getattr__(self, name: str) -> Callable[..., Any]: return lambda: None raise AttributeError(name) - def _getNCPUs(self) -> int: + def _getNCPUs(self): return 1 - def __get_nbits(self) -> str: + def __get_nbits(self): abits = platform.architecture()[0] - match = re.compile(r'(\d+)bit').search(abits) - assert match, abits - return match.group(1) + nbits = re.compile(r'(\d+)bit').search(abits).group(1) + return nbits - def _is_32bit(self) -> bool: + def _is_32bit(self): return self.__get_nbits() == '32' - def _is_64bit(self) -> bool: + def _is_64bit(self): return self.__get_nbits() == '64' class LinuxCPUInfo(CPUInfoBase): - # This will never be `None` on (initialized) instances - info: list[dict[str, str]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return info = [{}] @@ -164,64 +143,64 @@ def __init__(self) -> None: fo.close() self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass # Athlon - def _is_AMD(self) -> bool: + def _is_AMD(self): return self.info[0]['vendor_id'] == 'AuthenticAMD' - def _is_AthlonK6_2(self) -> bool: + def _is_AthlonK6_2(self): return self._is_AMD() and self.info[0]['model'] == '2' - def _is_AthlonK6_3(self) -> bool: + def _is_AthlonK6_3(self): return self._is_AMD() and self.info[0]['model'] == '3' - def _is_AthlonK6(self) -> bool: + def _is_AthlonK6(self): return re.match(r'.*?AMD-K6', self.info[0]['model name']) is not None - def _is_AthlonK7(self) -> bool: + def _is_AthlonK7(self): return re.match(r'.*?AMD-K7', self.info[0]['model name']) is not None - def _is_AthlonMP(self) -> bool: + def _is_AthlonMP(self): return re.match(r'.*?Athlon\(tm\) MP\b', self.info[0]['model name']) is not None - def _is_AMD64(self) -> bool: + def _is_AMD64(self): return self.is_AMD() and self.info[0]['family'] == '15' - def _is_Athlon64(self) -> bool: + def _is_Athlon64(self): return re.match(r'.*?Athlon\(tm\) 64\b', self.info[0]['model name']) is not None - def _is_AthlonHX(self) -> bool: + def _is_AthlonHX(self): return re.match(r'.*?Athlon HX\b', self.info[0]['model name']) is not None - def _is_Opteron(self) -> bool: + def _is_Opteron(self): return re.match(r'.*?Opteron\b', self.info[0]['model name']) is not None - def _is_Hammer(self) -> bool: + def _is_Hammer(self): return re.match(r'.*?Hammer\b', self.info[0]['model name']) is not None # Alpha - def _is_Alpha(self) -> bool: + def _is_Alpha(self): return self.info[0]['cpu'] == 'Alpha' - def _is_EV4(self) -> bool: + def _is_EV4(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV4' - def _is_EV5(self) -> bool: + def _is_EV5(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV5' - def _is_EV56(self) -> bool: + def _is_EV56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'EV56' - def _is_PCA56(self) -> bool: + def _is_PCA56(self): return self.is_Alpha() and self.info[0]['cpu model'] == 'PCA56' # Intel @@ -229,377 +208,356 @@ def _is_PCA56(self) -> bool: #XXX _is_i386 = _not_impl - def _is_Intel(self) -> bool: + def _is_Intel(self): return self.info[0]['vendor_id'] == 'GenuineIntel' - def _is_i486(self) -> bool: + def _is_i486(self): return self.info[0]['cpu'] == 'i486' - def _is_i586(self) -> bool: + def _is_i586(self): return self.is_Intel() and self.info[0]['cpu family'] == '5' - def _is_i686(self) -> bool: + def _is_i686(self): return self.is_Intel() and self.info[0]['cpu family'] == '6' - def _is_Celeron(self) -> bool: + def _is_Celeron(self): return re.match(r'.*?Celeron', self.info[0]['model name']) is not None - def _is_Pentium(self) -> bool: + def _is_Pentium(self): return re.match(r'.*?Pentium', self.info[0]['model name']) is not None - def _is_PentiumII(self) -> bool: + def _is_PentiumII(self): return re.match(r'.*?Pentium.*?II\b', self.info[0]['model name']) is not None - def _is_PentiumPro(self) -> bool: + def _is_PentiumPro(self): return re.match(r'.*?PentiumPro\b', self.info[0]['model name']) is not None - def _is_PentiumMMX(self) -> bool: + def _is_PentiumMMX(self): return re.match(r'.*?Pentium.*?MMX\b', self.info[0]['model name']) is not None - def _is_PentiumIII(self) -> bool: + def _is_PentiumIII(self): return re.match(r'.*?Pentium.*?III\b', self.info[0]['model name']) is not None - def _is_PentiumIV(self) -> bool: + def _is_PentiumIV(self): return re.match(r'.*?Pentium.*?(IV|4)\b', self.info[0]['model name']) is not None - def _is_PentiumM(self) -> bool: + def _is_PentiumM(self): return re.match(r'.*?Pentium.*?M\b', self.info[0]['model name']) is not None - def _is_Prescott(self) -> bool: + def _is_Prescott(self): return self.is_PentiumIV() and self.has_sse3() - def _is_Nocona(self) -> bool: + def _is_Nocona(self): return (self.is_Intel() and self.info[0]['cpu family'] in ('6', '15') and # two s sse3; three s ssse3 not the same thing, this is fine (self.has_sse3() and not self.has_ssse3()) and re.match(r'.*?\blm\b', self.info[0]['flags']) is not None) - def _is_Core2(self) -> bool: + def _is_Core2(self): return (self.is_64bit() and self.is_Intel() and re.match(r'.*?Core\(TM\)2\b', self.info[0]['model name']) is not None) - def _is_Itanium(self) -> bool: + def _is_Itanium(self): return re.match(r'.*?Itanium\b', self.info[0]['family']) is not None - def _is_XEON(self) -> bool: + def _is_XEON(self): return re.match(r'.*?XEON\b', self.info[0]['model name'], re.IGNORECASE) is not None _is_Xeon = _is_XEON # Power - def _is_Power(self) -> bool: + def _is_Power(self): return re.match(r'.*POWER.*', self.info[0]['cpu']) is not None - def _is_Power7(self) -> bool: + def _is_Power7(self): return re.match(r'.*POWER7.*', self.info[0]['cpu']) is not None - def _is_Power8(self) -> bool: + def _is_Power8(self): return re.match(r'.*POWER8.*', self.info[0]['cpu']) is not None - def _is_Power9(self) -> bool: + def _is_Power9(self): return re.match(r'.*POWER9.*', self.info[0]['cpu']) is not None - def _has_Altivec(self) -> bool: + def _has_Altivec(self): return re.match(r'.*altivec\ supported.*', self.info[0]['cpu']) is not None # Varia - def _is_singleCPU(self) -> bool: + def _is_singleCPU(self): return len(self.info) == 1 - def _getNCPUs(self) -> int: + def _getNCPUs(self): return len(self.info) - def _has_fdiv_bug(self) -> bool: + def _has_fdiv_bug(self): return self.info[0]['fdiv_bug'] == 'yes' - def _has_f00f_bug(self) -> bool: + def _has_f00f_bug(self): return self.info[0]['f00f_bug'] == 'yes' - def _has_mmx(self) -> bool: + def _has_mmx(self): return re.match(r'.*?\bmmx\b', self.info[0]['flags']) is not None - def _has_sse(self) -> bool: + def _has_sse(self): return re.match(r'.*?\bsse\b', self.info[0]['flags']) is not None - def _has_sse2(self) -> bool: + def _has_sse2(self): return re.match(r'.*?\bsse2\b', self.info[0]['flags']) is not None - def _has_sse3(self) -> bool: + def _has_sse3(self): return re.match(r'.*?\bpni\b', self.info[0]['flags']) is not None - def _has_ssse3(self) -> bool: + def _has_ssse3(self): return re.match(r'.*?\bssse3\b', self.info[0]['flags']) is not None - def _has_3dnow(self) -> bool: + def _has_3dnow(self): return re.match(r'.*?\b3dnow\b', self.info[0]['flags']) is not None - def _has_3dnowext(self) -> bool: + def _has_3dnowext(self): return re.match(r'.*?\b3dnowext\b', self.info[0]['flags']) is not None class IRIXCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, str]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return info = key_value_from_command('sysconf', sep=' ', successful_status=(0, 1)) self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass - def _is_singleCPU(self) -> bool: + def _is_singleCPU(self): return self.info.get('NUM_PROCESSORS') == '1' - def _getNCPUs(self) -> int: + def _getNCPUs(self): return int(self.info.get('NUM_PROCESSORS', 1)) - def __cputype(self, n: int | str) -> bool: - return self.info['PROCESSORS'].split()[0].lower() == 'r%s' % (n) + def __cputype(self, n): + return self.info.get('PROCESSORS').split()[0].lower() == 'r%s' % (n) - def _is_r2000(self) -> bool: + def _is_r2000(self): return self.__cputype(2000) - def _is_r3000(self) -> bool: + def _is_r3000(self): return self.__cputype(3000) - def _is_r3900(self) -> bool: + def _is_r3900(self): return self.__cputype(3900) - def _is_r4000(self) -> bool: + def _is_r4000(self): return self.__cputype(4000) - def _is_r4100(self) -> bool: + def _is_r4100(self): return self.__cputype(4100) - def _is_r4300(self) -> bool: + def _is_r4300(self): return self.__cputype(4300) - def _is_r4400(self) -> bool: + def _is_r4400(self): return self.__cputype(4400) - def _is_r4600(self) -> bool: + def _is_r4600(self): return self.__cputype(4600) - def _is_r4650(self) -> bool: + def _is_r4650(self): return self.__cputype(4650) - def _is_r5000(self) -> bool: + def _is_r5000(self): return self.__cputype(5000) - def _is_r6000(self) -> bool: + def _is_r6000(self): return self.__cputype(6000) - def _is_r8000(self) -> bool: + def _is_r8000(self): return self.__cputype(8000) - def _is_r10000(self) -> bool: + def _is_r10000(self): return self.__cputype(10000) - def _is_r12000(self) -> bool: + def _is_r12000(self): return self.__cputype(12000) - def _is_rorion(self) -> bool: + def _is_rorion(self): return self.__cputype('orion') - def get_ip(self) -> str | None: + def get_ip(self): try: return self.info.get('MACHINE') except: - return None + pass - def __machine(self, n: int) -> bool: - return self.info['MACHINE'].lower() == 'ip%s' % (n) + def __machine(self, n): + return self.info.get('MACHINE').lower() == 'ip%s' % (n) - def _is_IP19(self) -> bool: + def _is_IP19(self): return self.__machine(19) - def _is_IP20(self) -> bool: + def _is_IP20(self): return self.__machine(20) - def _is_IP21(self) -> bool: + def _is_IP21(self): return self.__machine(21) - def _is_IP22(self) -> bool: + def _is_IP22(self): return self.__machine(22) - def _is_IP22_4k(self) -> bool: + def _is_IP22_4k(self): return self.__machine(22) and self._is_r4000() - def _is_IP22_5k(self) -> bool: + def _is_IP22_5k(self): return self.__machine(22) and self._is_r5000() - def _is_IP24(self) -> bool: + def _is_IP24(self): return self.__machine(24) - def _is_IP25(self) -> bool: + def _is_IP25(self): return self.__machine(25) - def _is_IP26(self) -> bool: + def _is_IP26(self): return self.__machine(26) - def _is_IP27(self) -> bool: + def _is_IP27(self): return self.__machine(27) - def _is_IP28(self) -> bool: + def _is_IP28(self): return self.__machine(28) - def _is_IP30(self) -> bool: + def _is_IP30(self): return self.__machine(30) - def _is_IP32(self) -> bool: + def _is_IP32(self): return self.__machine(32) - def _is_IP32_5k(self) -> bool: + def _is_IP32_5k(self): return self.__machine(32) and self._is_r5000() - def _is_IP32_10k(self) -> bool: + def _is_IP32_10k(self): return self.__machine(32) and self._is_r10000() class DarwinCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return - info: dict[str, bytes | Any] = command_info(arch='arch', machine='machine') + info = command_info(arch='arch', + machine='machine') info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') self.__class__.info = info - def _not_impl(self) -> None: - pass + def _not_impl(self): pass - def _getNCPUs(self) -> int: + def _getNCPUs(self): return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) - def _is_Power_Macintosh(self) -> bool: + def _is_Power_Macintosh(self): return self.info['sysctl_hw']['hw.machine'] == 'Power Macintosh' - def _is_i386(self) -> bool: + def _is_i386(self): return self.info['arch'] == 'i386' - def _is_ppc(self) -> bool: + def _is_ppc(self): return self.info['arch'] == 'ppc' - def __machine(self, n: int | str) -> bool: + def __machine(self, n): return self.info['machine'] == 'ppc%s' % n - def _is_ppc601(self) -> bool: - return self.__machine(601) + def _is_ppc601(self): return self.__machine(601) - def _is_ppc602(self) -> bool: - return self.__machine(602) + def _is_ppc602(self): return self.__machine(602) - def _is_ppc603(self) -> bool: - return self.__machine(603) + def _is_ppc603(self): return self.__machine(603) - def _is_ppc603e(self) -> bool: - return self.__machine('603e') + def _is_ppc603e(self): return self.__machine('603e') - def _is_ppc604(self) -> bool: - return self.__machine(604) + def _is_ppc604(self): return self.__machine(604) - def _is_ppc604e(self) -> bool: - return self.__machine('604e') + def _is_ppc604e(self): return self.__machine('604e') - def _is_ppc620(self) -> bool: - return self.__machine(620) + def _is_ppc620(self): return self.__machine(620) - def _is_ppc630(self) -> bool: - return self.__machine(630) + def _is_ppc630(self): return self.__machine(630) - def _is_ppc740(self) -> bool: - return self.__machine(740) + def _is_ppc740(self): return self.__machine(740) - def _is_ppc7400(self) -> bool: - return self.__machine(7400) + def _is_ppc7400(self): return self.__machine(7400) - def _is_ppc7450(self) -> bool: - return self.__machine(7450) + def _is_ppc7450(self): return self.__machine(7450) - def _is_ppc750(self) -> bool: - return self.__machine(750) + def _is_ppc750(self): return self.__machine(750) - def _is_ppc403(self) -> bool: - return self.__machine(403) + def _is_ppc403(self): return self.__machine(403) - def _is_ppc505(self) -> bool: - return self.__machine(505) + def _is_ppc505(self): return self.__machine(505) - def _is_ppc801(self) -> bool: - return self.__machine(801) + def _is_ppc801(self): return self.__machine(801) - def _is_ppc821(self) -> bool: - return self.__machine(821) + def _is_ppc821(self): return self.__machine(821) - def _is_ppc823(self) -> bool: - return self.__machine(823) + def _is_ppc823(self): return self.__machine(823) - def _is_ppc860(self) -> bool: - return self.__machine(860) + def _is_ppc860(self): return self.__machine(860) class NetBSDCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: - if self.info is not None: - return - sysctl_hw = key_value_from_command(['sysctl', 'hw'], sep='=') - self.__class__.info = { - 'sysctl_hw': sysctl_hw, - 'arch': sysctl_hw.get('hw.machine_arch', 1), - 'machine': sysctl_hw.get('hw.machine', 1), - } - - def _not_impl(self) -> None: - pass + def __init__(self): + if self.info is not None: + return + info = {} + info['sysctl_hw'] = key_value_from_command(['sysctl', 'hw'], sep='=') + info['arch'] = info['sysctl_hw'].get('hw.machine_arch', 1) + info['machine'] = info['sysctl_hw'].get('hw.machine', 1) + self.__class__.info = info - def _getNCPUs(self) -> int: - return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) + def _not_impl(self): pass + + def _getNCPUs(self): + return int(self.info['sysctl_hw'].get('hw.ncpu', 1)) - def _is_Intel(self) -> bool: - return self.info['sysctl_hw'].get('hw.model', "")[:5] == 'Intel' + def _is_Intel(self): + if self.info['sysctl_hw'].get('hw.model', "")[0:5] == 'Intel': + return True + return False - def _is_AMD(self) -> bool: - return self.info['sysctl_hw'].get('hw.model', "")[:3] == 'AMD' + def _is_AMD(self): + if self.info['sysctl_hw'].get('hw.model', "")[0:3] == 'AMD': + return True + return False class SunOSCPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[dict[str, Any]] = None # type: ignore[assignment] + info = None - def __init__(self) -> None: + def __init__(self): if self.info is not None: return - info: dict[str, Any] = command_info( - arch='arch', - mach='mach', - uname_i=['uname', '-i'], - isainfo_b=['isainfo', '-b'], - isainfo_n=['isainfo', '-n'], + info = command_info(arch='arch', + mach='mach', + uname_i=['uname', '-i'], + isainfo_b=['isainfo', '-b'], + isainfo_n=['isainfo', '-n'], ) info['uname_X'] = key_value_from_command(['uname', '-X'], sep='=') for line in command_by_line(['psrinfo', '-v', '0']): @@ -609,115 +567,116 @@ def __init__(self) -> None: break self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass - def _is_i386(self) -> bool: + def _is_i386(self): return self.info['isainfo_n'] == 'i386' - def _is_sparc(self) -> bool: + def _is_sparc(self): return self.info['isainfo_n'] == 'sparc' - def _is_sparcv9(self) -> bool: + def _is_sparcv9(self): return self.info['isainfo_n'] == 'sparcv9' - def _getNCPUs(self) -> int: + def _getNCPUs(self): return int(self.info['uname_X'].get('NumCPU', 1)) - def _is_sun4(self) -> bool: + def _is_sun4(self): return self.info['arch'] == 'sun4' - def _is_SUNW(self) -> bool: + def _is_SUNW(self): return re.match(r'SUNW', self.info['uname_i']) is not None - def _is_sparcstation5(self) -> bool: + def _is_sparcstation5(self): return re.match(r'.*SPARCstation-5', self.info['uname_i']) is not None - def _is_ultra1(self) -> bool: + def _is_ultra1(self): return re.match(r'.*Ultra-1', self.info['uname_i']) is not None - def _is_ultra250(self) -> bool: + def _is_ultra250(self): return re.match(r'.*Ultra-250', self.info['uname_i']) is not None - def _is_ultra2(self) -> bool: + def _is_ultra2(self): return re.match(r'.*Ultra-2', self.info['uname_i']) is not None - def _is_ultra30(self) -> bool: + def _is_ultra30(self): return re.match(r'.*Ultra-30', self.info['uname_i']) is not None - def _is_ultra4(self) -> bool: + def _is_ultra4(self): return re.match(r'.*Ultra-4', self.info['uname_i']) is not None - def _is_ultra5_10(self) -> bool: + def _is_ultra5_10(self): return re.match(r'.*Ultra-5_10', self.info['uname_i']) is not None - def _is_ultra5(self) -> bool: + def _is_ultra5(self): return re.match(r'.*Ultra-5', self.info['uname_i']) is not None - def _is_ultra60(self) -> bool: + def _is_ultra60(self): return re.match(r'.*Ultra-60', self.info['uname_i']) is not None - def _is_ultra80(self) -> bool: + def _is_ultra80(self): return re.match(r'.*Ultra-80', self.info['uname_i']) is not None - def _is_ultraenterprice(self) -> bool: + def _is_ultraenterprice(self): return re.match(r'.*Ultra-Enterprise', self.info['uname_i']) is not None - def _is_ultraenterprice10k(self) -> bool: + def _is_ultraenterprice10k(self): return re.match(r'.*Ultra-Enterprise-10000', self.info['uname_i']) is not None - def _is_sunfire(self) -> bool: + def _is_sunfire(self): return re.match(r'.*Sun-Fire', self.info['uname_i']) is not None - def _is_ultra(self) -> bool: + def _is_ultra(self): return re.match(r'.*Ultra', self.info['uname_i']) is not None - def _is_cpusparcv7(self) -> bool: + def _is_cpusparcv7(self): return self.info['processor'] == 'sparcv7' - def _is_cpusparcv8(self) -> bool: + def _is_cpusparcv8(self): return self.info['processor'] == 'sparcv8' - def _is_cpusparcv9(self) -> bool: + def _is_cpusparcv9(self): return self.info['processor'] == 'sparcv9' class Win32CPUInfo(CPUInfoBase): - # The first initialized instance will set this class variable - info: ClassVar[list[dict[str, Any]]] = None # type: ignore[assignment] - - pkey: ClassVar = r"HARDWARE\DESCRIPTION\System\CentralProcessor" + info = None + pkey = r"HARDWARE\DESCRIPTION\System\CentralProcessor" # XXX: what does the value of # HKEY_LOCAL_MACHINE\HARDWARE\DESCRIPTION\System\CentralProcessor\0 # mean? - def __init__(self) -> None: - import winreg as _winreg + def __init__(self): + try: + import _winreg + except ImportError: # Python 3 + import winreg as _winreg if self.info is not None: return - info: list[dict[str, Any]] = [] + info = [] try: #XXX: Bad style to use so long `try:...except:...`. Fix it! prgx = re.compile(r"family\s+(?P\d+)\s+model\s+(?P\d+)" r"\s+stepping\s+(?P\d+)", re.IGNORECASE) - chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) # pyright: ignore[reportAttributeAccessIssue] + chnd = _winreg.OpenKey(_winreg.HKEY_LOCAL_MACHINE, self.pkey) pnum = 0 while 1: try: - proc = _winreg.EnumKey(chnd, pnum) # pyright: ignore[reportAttributeAccessIssue] - except _winreg.error: # pyright: ignore[reportAttributeAccessIssue] + proc = _winreg.EnumKey(chnd, pnum) + except _winreg.error: break else: pnum += 1 info.append({"Processor": proc}) - phnd = _winreg.OpenKey(chnd, proc) # pyright: ignore[reportAttributeAccessIssue] + phnd = _winreg.OpenKey(chnd, proc) pidx = 0 while True: try: - name, value, vtpe = _winreg.EnumValue(phnd, pidx) # pyright: ignore[reportAttributeAccessIssue] - except _winreg.error: # pyright: ignore[reportAttributeAccessIssue] + name, value, vtpe = _winreg.EnumValue(phnd, pidx) + except _winreg.error: break else: pidx = pidx + 1 @@ -729,105 +688,105 @@ def __init__(self) -> None: info[-1]["Model"] = int(srch.group("MDL")) info[-1]["Stepping"] = int(srch.group("STP")) except: - print(sys.exc_info()[1], '(ignoring)') + print(sys.exc_value, '(ignoring)') self.__class__.info = info - def _not_impl(self) -> None: + def _not_impl(self): pass # Athlon - def _is_AMD(self) -> bool: + def _is_AMD(self): return self.info[0]['VendorIdentifier'] == 'AuthenticAMD' - def _is_Am486(self) -> bool: + def _is_Am486(self): return self.is_AMD() and self.info[0]['Family'] == 4 - def _is_Am5x86(self) -> bool: + def _is_Am5x86(self): return self.is_AMD() and self.info[0]['Family'] == 4 - def _is_AMDK5(self) -> bool: + def _is_AMDK5(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [0, 1, 2, 3]) - def _is_AMDK6(self) -> bool: + def _is_AMDK6(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] in [6, 7]) - def _is_AMDK6_2(self) -> bool: + def _is_AMDK6_2(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 8) - def _is_AMDK6_3(self) -> bool: + def _is_AMDK6_3(self): return (self.is_AMD() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 9) - def _is_AMDK7(self) -> bool: + def _is_AMDK7(self): return self.is_AMD() and self.info[0]['Family'] == 6 # To reliably distinguish between the different types of AMD64 chips # (Athlon64, Operton, Athlon64 X2, Semperon, Turion 64, etc.) would # require looking at the 'brand' from cpuid - def _is_AMD64(self) -> bool: + def _is_AMD64(self): return self.is_AMD() and self.info[0]['Family'] == 15 # Intel - def _is_Intel(self) -> bool: + def _is_Intel(self): return self.info[0]['VendorIdentifier'] == 'GenuineIntel' - def _is_i386(self) -> bool: + def _is_i386(self): return self.info[0]['Family'] == 3 - def _is_i486(self) -> bool: + def _is_i486(self): return self.info[0]['Family'] == 4 - def _is_i586(self) -> bool: + def _is_i586(self): return self.is_Intel() and self.info[0]['Family'] == 5 - def _is_i686(self) -> bool: + def _is_i686(self): return self.is_Intel() and self.info[0]['Family'] == 6 - def _is_Pentium(self) -> bool: + def _is_Pentium(self): return self.is_Intel() and self.info[0]['Family'] == 5 - def _is_PentiumMMX(self) -> bool: + def _is_PentiumMMX(self): return (self.is_Intel() and self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) - def _is_PentiumPro(self) -> bool: + def _is_PentiumPro(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] == 1) - def _is_PentiumII(self) -> bool: + def _is_PentiumII(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [3, 5, 6]) - def _is_PentiumIII(self) -> bool: + def _is_PentiumIII(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) - def _is_PentiumIV(self) -> bool: + def _is_PentiumIV(self): return self.is_Intel() and self.info[0]['Family'] == 15 - def _is_PentiumM(self) -> bool: + def _is_PentiumM(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [9, 13, 14]) - def _is_Core2(self) -> bool: + def _is_Core2(self): return (self.is_Intel() and self.info[0]['Family'] == 6 and self.info[0]['Model'] in [15, 16, 17]) # Varia - def _is_singleCPU(self) -> bool: + def _is_singleCPU(self): return len(self.info) == 1 - def _getNCPUs(self) -> int: + def _getNCPUs(self): return len(self.info) - def _has_mmx(self) -> bool: + def _has_mmx(self): if self.is_Intel(): return ((self.info[0]['Family'] == 5 and self.info[0]['Model'] == 4) or @@ -837,7 +796,7 @@ def _has_mmx(self) -> bool: else: return False - def _has_sse(self) -> bool: + def _has_sse(self): if self.is_Intel(): return ((self.info[0]['Family'] == 6 and self.info[0]['Model'] in [7, 8, 9, 10, 11]) or @@ -849,7 +808,7 @@ def _has_sse(self) -> bool: else: return False - def _has_sse2(self) -> bool: + def _has_sse2(self): if self.is_Intel(): return self.is_Pentium4() or self.is_PentiumM() or self.is_Core2() elif self.is_AMD(): @@ -857,10 +816,10 @@ def _has_sse2(self) -> bool: else: return False - def _has_3dnow(self) -> bool: + def _has_3dnow(self): return self.is_AMD() and self.info[0]['Family'] in [5, 6, 15] - def _has_3dnowext(self) -> bool: + def _has_3dnowext(self): return self.is_AMD() and self.info[0]['Family'] in [6, 15] diff --git a/numexpr/expressions.py b/numexpr/expressions.py index b62c3f8..cab0247 100644 --- a/numexpr/expressions.py +++ b/numexpr/expressions.py @@ -11,37 +11,34 @@ __all__ = ['E'] import operator +import sys import threading -import types -from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Final, Iterable, - Mapping, NoReturn, TypeVar, cast) - -if TYPE_CHECKING: - from typing_extensions import Self, TypeIs import numpy # Declare a double type that does not exist in Python space -double = numpy.float64 +double = numpy.double # The default kind for undeclared variables default_kind = 'double' int_ = numpy.int32 long_ = numpy.int64 -type_to_kind: Final = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', - double: 'double', complex: 'complex', bytes: 'bytes', str: 'str'} -kind_to_type: Final = {'bool': bool, 'int': int_, 'long': long_, 'float': float, - 'double': double, 'complex': complex, 'bytes': bytes, 'str': str} -kind_rank: Final = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') -scalar_constant_types: Final = (bool, int_, int, float, double, complex, bytes, str) +type_to_kind = {bool: 'bool', int_: 'int', long_: 'long', float: 'float', + double: 'double', complex: 'complex', bytes: 'bytes', str: 'str'} +kind_to_type = {'bool': bool, 'int': int_, 'long': long_, 'float': float, + 'double': double, 'complex': complex, 'bytes': bytes, 'str': str} +kind_rank = ('bool', 'int', 'long', 'float', 'double', 'complex', 'none') +scalar_constant_types = [bool, int_, int, float, double, complex, bytes, str] + +scalar_constant_types = tuple(scalar_constant_types) from numexpr import interpreter -class Expression: +class Expression(): - def __getattr__(self, name: str) -> Any: + def __getattr__(self, name): if name.startswith('_'): try: return self.__dict__[name] @@ -51,40 +48,38 @@ def __getattr__(self, name: str) -> Any: return VariableNode(name, default_kind) -E: Final = Expression() +E = Expression() class Context(threading.local): - def get(self, value: str, default: object) -> Any: + def get(self, value, default): return self.__dict__.get(value, default) - def get_current_context(self) -> dict[str, Any]: + def get_current_context(self): return self.__dict__ - def set_new_context(self, dict_: Mapping[str, Any]) -> None: + def set_new_context(self, dict_): self.__dict__.update(dict_) # This will be called each time the local object is used in a separate thread -_context: Final = Context() +_context = Context() -def get_optimization() -> str: +def get_optimization(): return _context.get('optimization', 'none') -_T = TypeVar('_T') - # helper functions for creating __magic__ methods -def ophelper(f: Callable[..., _T]) -> Callable[..., _T]: - def func(*args: 'ExpressionNode') -> _T: - arglist = list(args) +def ophelper(f): + def func(*args): + args = list(args) for i, x in enumerate(args): if isConstant(x): - arglist[i] = x = ConstantNode(x) + args[i] = x = ConstantNode(x) if not isinstance(x, ExpressionNode): raise TypeError("unsupported object type: %s" % type(x)) - return f(*arglist) + return f(*args) func.__name__ = f.__name__ func.__doc__ = f.__doc__ @@ -92,7 +87,7 @@ def func(*args: 'ExpressionNode') -> _T: return func -def allConstantNodes(args: Iterable[object]) -> bool: +def allConstantNodes(args): "returns True if args are all ConstantNodes." for x in args: if not isinstance(x, ConstantNode): @@ -100,12 +95,12 @@ def allConstantNodes(args: Iterable[object]) -> bool: return True -def isConstant(ex: object) -> "TypeIs[complex | bytes | str | numpy.number]": +def isConstant(ex): "Returns True if ex is a constant scalar of an allowed type." - return isinstance(ex, scalar_constant_types) # pyright: ignore[reportArgumentType] + return isinstance(ex, scalar_constant_types) -def commonKind(nodes: Iterable['ExpressionNode | RawNode']) -> str: +def commonKind(nodes): node_kinds = [node.astKind for node in nodes] str_count = node_kinds.count('bytes') + node_kinds.count('str') if 0 < str_count < len(node_kinds): # some args are strings, but not all @@ -122,7 +117,7 @@ def commonKind(nodes: Iterable['ExpressionNode | RawNode']) -> str: min_int32 = -max_int32 - 1 -def bestConstantType(x: object) -> type | None: +def bestConstantType(x): # ``numpy.string_`` is a subclass of ``bytes`` if isinstance(x, (bytes, str)): return bytes @@ -135,14 +130,14 @@ def bestConstantType(x: object) -> type | None: # ``long`` objects are kept as is to allow the user to force # promotion of results by using long constants, e.g. by operating # a 32-bit array with a long (64-bit) constant. - if isinstance(x, (long_, numpy.int64)): # type: ignore[misc] + if isinstance(x, (long_, numpy.int64)): return long_ # ``double`` objects are kept as is to allow the user to force # promotion of results by using double constants, e.g. by operating # a float (32-bit) array with a double (64-bit) constant. if isinstance(x, double): return double - if isinstance(x, numpy.float32): # pyright: ignore[reportArgumentType] + if isinstance(x, numpy.float32): return float if isinstance(x, (int, numpy.integer)): # Constants needing more than 32 bits are always @@ -155,29 +150,25 @@ def bestConstantType(x: object) -> type | None: # ``double`` too. for converter in float, complex: try: - y = converter(x) # type: ignore[arg-type, call-overload] + y = converter(x) except Exception as err: continue if y == x or numpy.isnan(y): return converter - return None -def getKind(x: object) -> str: +def getKind(x): converter = bestConstantType(x) - assert converter is not None return type_to_kind[converter] -def binop( - opname: str, reversed: bool = False, kind: str | None = None -) -> Callable[['ExpressionNode', 'ExpressionNode'], 'ExpressionNode']: +def binop(opname, reversed=False, kind=None): # Getting the named method from self (after reversal) does not # always work (e.g. int constants do not have a __lt__ method). opfunc = getattr(operator, "__%s__" % opname) @ophelper - def operation(self: 'ExpressionNode', other: 'ExpressionNode') -> 'ExpressionNode': + def operation(self, other): if reversed: self, other = other, self if allConstantNodes([self, other]): @@ -188,11 +179,9 @@ def operation(self: 'ExpressionNode', other: 'ExpressionNode') -> 'ExpressionNod return operation -def func( - func: Callable[..., Any], minkind: str | None = None, maxkind: str | None = None -) -> Callable[..., 'FuncNode | ConstantNode']: +def func(func, minkind=None, maxkind=None): @ophelper - def function(*args: 'ExpressionNode') -> 'FuncNode | ConstantNode': + def function(*args): if allConstantNodes(args): return ConstantNode(func(*[x.value for x in args])) kind = commonKind(args) @@ -215,23 +204,20 @@ def function(*args: 'ExpressionNode') -> 'FuncNode | ConstantNode': @ophelper -def where_func( - a: 'ExpressionNode', b: 'ExpressionNode', c: 'ExpressionNode' -) -> 'ExpressionNode': +def where_func(a, b, c): if isinstance(a, ConstantNode): return b if a.value else c if allConstantNodes([a, b, c]): - return ConstantNode(numpy.where(a, b, c)) # type: ignore[call-overload] + return ConstantNode(numpy.where(a, b, c)) return FuncNode('where', [a, b, c]) -def encode_axis(axis: 'ConstantNode | int | None') -> 'RawNode': +def encode_axis(axis): if isinstance(axis, ConstantNode): axis = axis.value if axis is None: axis = interpreter.allaxes else: - assert isinstance(axis, int) if axis < 0: raise ValueError("negative axis are not supported") if axis > 254: @@ -239,26 +225,24 @@ def encode_axis(axis: 'ConstantNode | int | None') -> 'RawNode': return RawNode(axis) -def gen_reduce_axis_func(name: str) -> Callable[..., 'ExpressionNode']: - def _func(a: object, axis: 'ConstantNode | int | None' = None) -> 'ExpressionNode': - _axis = encode_axis(axis) +def gen_reduce_axis_func(name): + def _func(a, axis=None): + axis = encode_axis(axis) if isinstance(a, ConstantNode): return a - if isinstance(a, (bool, int_, long_, float, double, complex)): # type: ignore[misc] - _a = ConstantNode(a) - else: - _a = cast('ExpressionNode', a) - return FuncNode(name, [_a, _axis], kind=_a.astKind) + if isinstance(a, (bool, int_, long_, float, double, complex)): + a = ConstantNode(a) + return FuncNode(name, [a, axis], kind=a.astKind) return _func @ophelper -def contains_func(a: 'ExpressionNode', b: 'ExpressionNode') -> 'FuncNode': +def contains_func(a, b): return FuncNode('contains', [a, b], kind='bool') @ophelper -def div_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': +def div_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and @@ -268,7 +252,7 @@ def div_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': @ophelper -def truediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': +def truediv_op(a, b): if get_optimization() in ('moderate', 'aggressive'): if (isinstance(b, ConstantNode) and (a.astKind == b.astKind) and @@ -281,12 +265,12 @@ def truediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': @ophelper -def rtruediv_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'OpNode': +def rtruediv_op(a, b): return truediv_op(b, a) @ophelper -def pow_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'ExpressionNode': +def pow_op(a, b): if isinstance(b, ConstantNode): x = b.value @@ -302,9 +286,7 @@ def pow_op(a: 'ExpressionNode', b: 'ExpressionNode') -> 'ExpressionNode': n = int_(abs(x)) ishalfpower = int_(abs(2 * x)) % 2 - def multiply( - x: ExpressionNode | None, y: ExpressionNode - ) -> ExpressionNode: + def multiply(x, y): if x is None: return y return OpNode('mul', [x, y]) @@ -345,7 +327,7 @@ def multiply( return OpNode('pow', [a, b]) # The functions and the minimum and maximum types accepted -numpy.expm1x = numpy.expm1 # type: ignore[attr-defined] +numpy.expm1x = numpy.expm1 functions = { 'copy': func(numpy.copy), 'ones_like': func(numpy.ones_like), @@ -408,72 +390,58 @@ def multiply( } -class ExpressionNode: +class ExpressionNode(): """ An object that represents a generic number object. This implements the number special methods so that we can keep track of how this object has been used. """ - astType: ClassVar = 'generic' - astKind: Final[str] - - children: Final[tuple['ExpressionNode | RawNode', ...]] - value: Final[Any] - - def __init__( - self, - value: object | None = None, - kind: str | None = None, - children: Iterable['ExpressionNode | RawNode'] | None = None, - ) -> None: + astType = 'generic' + + def __init__(self, value=None, kind=None, children=None): self.value = value if kind is None: kind = 'none' self.astKind = kind - self.children = () if children is None else tuple(children) + if children is None: + self.children = () + else: + self.children = tuple(children) - def get_real(self) -> 'OpNode | ConstantNode': + def get_real(self): if self.astType == 'constant': return ConstantNode(complex(self.value).real) return OpNode('real', (self,), 'double') - if TYPE_CHECKING: - @property - def real(self) -> 'OpNode | ConstantNode': ... - else: - real = property(get_real) + real = property(get_real) - def get_imag(self) -> 'OpNode | ConstantNode': + def get_imag(self): if self.astType == 'constant': return ConstantNode(complex(self.value).imag) return OpNode('imag', (self,), 'double') - if TYPE_CHECKING: - @property - def imag(self) -> 'OpNode | ConstantNode': ... - else: - imag = property(get_imag) + imag = property(get_imag) - def __str__(self) -> str: + def __str__(self): return '%s(%s, %s, %s)' % (self.__class__.__name__, self.value, self.astKind, self.children) - def __repr__(self) -> str: + def __repr__(self): return self.__str__() - def __neg__(self) -> 'OpNode': + def __neg__(self): return OpNode('neg', (self,)) - def __invert__(self) -> 'OpNode': + def __invert__(self): return OpNode('invert', (self,)) - def __pos__(self) -> 'Self': + def __pos__(self): return self # The next check is commented out. See #24 for more info. - def __bool__(self) -> NoReturn: + def __bool__(self): raise TypeError("You can't use Python's standard boolean operators in " "NumExpr expressions. You should use their bitwise " "counterparts instead: '&' instead of 'and', " @@ -503,74 +471,64 @@ def __bool__(self) -> NoReturn: __gt__ = binop('gt', kind='bool') __ge__ = binop('ge', kind='bool') - __eq__ = binop('eq', kind='bool') # type: ignore[assignment] - __ne__ = binop('ne', kind='bool') # type: ignore[assignment] + __eq__ = binop('eq', kind='bool') + __ne__ = binop('ne', kind='bool') __lt__ = binop('gt', reversed=True, kind='bool') __le__ = binop('ge', reversed=True, kind='bool') class LeafNode(ExpressionNode): - leafNode: ClassVar = True + leafNode = True class VariableNode(LeafNode): - astType: ClassVar = 'variable' - - def __init__( - self, - value: object | None = None, - kind: str | None = None, - children: None = None, - ) -> None: + astType = 'variable' + + def __init__(self, value=None, kind=None, children=None): LeafNode.__init__(self, value=value, kind=kind) -class RawNode: +class RawNode(): """ Used to pass raw integers to interpreter. For instance, for selecting what function to use in func1. Purposely don't inherit from ExpressionNode, since we don't wan't this to be used for anything but being walked. """ - astType: ClassVar = 'raw' - astKind: ClassVar = 'none' + astType = 'raw' + astKind = 'none' - def __init__(self, value: object) -> None: + def __init__(self, value): self.value = value self.children = () - def __str__(self) -> str: + def __str__(self): return 'RawNode(%s)' % (self.value,) __repr__ = __str__ class ConstantNode(LeafNode): - astType: ClassVar = 'constant' + astType = 'constant' - def __init__(self, value: object | None = None, children: None = None): + def __init__(self, value=None, children=None): kind = getKind(value) # Python float constants are double precision by default if kind == 'float' and isinstance(value, float): kind = 'double' LeafNode.__init__(self, value=value, kind=kind) - def __neg__(self) -> 'ConstantNode': # type: ignore[override] + def __neg__(self): return ConstantNode(-self.value) - def __invert__(self) -> 'ConstantNode': # type: ignore[override] + def __invert__(self): return ConstantNode(~self.value) class OpNode(ExpressionNode): - astType: ClassVar = 'op' - - def __init__( - self, - opcode: str | None = None, - args: Iterable[ExpressionNode | RawNode] | None = None, - kind: str | None = None, - ) -> None: + astType = 'op' + + def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) if kind=='bool': # handle bool*bool and bool+bool cases @@ -580,12 +538,7 @@ def __init__( class FuncNode(OpNode): - def __init__( - self, - opcode: str | None = None, - args: Iterable[ExpressionNode | RawNode] | None = None, - kind: str | None = None, - ) -> None: + def __init__(self, opcode=None, args=None, kind=None): if (kind is None) and (args is not None): kind = commonKind(args) if opcode in ("isnan", "isfinite", "isinf", "signbit"): # bodge for boolean return functions diff --git a/numexpr/interpreter.pyi b/numexpr/interpreter.pyi deleted file mode 100644 index 90f8d80..0000000 --- a/numexpr/interpreter.pyi +++ /dev/null @@ -1,52 +0,0 @@ -from collections.abc import Sequence -from typing import Any, Final, Literal, TypeAlias - -import numpy.typing as npt - -_VMLAccuracyMode: TypeAlias = Literal[0, 1, 2, 3] - -MAX_THREADS: Final[int] = ... -__BLOCK_SIZE1__: Final[int] = ... - -#ifdef USE_VML -def _get_vml_version() -> str: ... -def _set_vml_accuracy_mode(mode_in: _VMLAccuracyMode, /) -> _VMLAccuracyMode: ... -def _set_vml_num_threads(max_num_threads: int, /) -> None: ... -def _get_vml_num_threads() -> int: ... -#endif -def _get_num_threads() -> int: ... -def _set_num_threads(num_threads: int, /) -> int: ... - -allaxes: Final = 255 -funccodes: Final[dict[bytes, int]] = ... -maxdims: Final[int] = ... -opcodes: Final[dict[bytes, int]] = ... -use_vml: Final[bool] = ... - -class NumExpr: - signature: Final[bytes] - constsig: Final[bytes] - tempsig: Final[bytes] - fullsig: Final[bytes] - - program: Final[bytes] - constants: Final[Sequence[Any]] - input_names: Final[Sequence[str]] - - def __init__( - self, - signature: bytes, - tempsig: bytes, - program: bytes, - constants: Sequence[Any] = ..., - input_names: Sequence[str] | None = None, - ) -> None: ... - def run( - self, - *args: Any, - casting: str = ..., - order: str = ..., - ex_uses_vml: bool = ..., - out: npt.NDArray[Any] = ..., - ) -> Any: ... - __call__ = run diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 77f655b..8b80737 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -14,20 +14,17 @@ import re import sys import threading -from typing import (TYPE_CHECKING, Any, ClassVar, Final, Generator, Iterable, - Iterator, Sequence, TypeAlias) - -if TYPE_CHECKING: - from typing_extensions import Unpack +from typing import Dict, Optional import numpy -is_cpu_amd_intel: Final = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE +is_cpu_amd_intel = False # DEPRECATION WARNING: WILL BE REMOVED IN FUTURE RELEASE from numexpr import expressions, interpreter, use_vml from numexpr.utils import CacheDict, ContextDict # Declare a double type that does not exist in Python space double = numpy.double +double = numpy.double int_ = numpy.int32 long_ = numpy.int64 @@ -89,9 +86,7 @@ ] - - -class ASTNode: +class ASTNode(): """Abstract Syntax Tree node. Members: @@ -103,25 +98,16 @@ class ASTNode: children -- the children below this node reg -- the register assigned to the result for this node. """ - cmpnames: ClassVar = 'astType', 'astKind', 'value', 'children' - - astType: str - astKind: str - value: Any - children: tuple['ASTNode', ...] - reg: 'Register | None' - - def __init__(self, astType: str = 'generic', - astKind: str = 'unknown', - value: object | None = None, - children: Iterable['ASTNode'] = ()) -> None: + cmpnames = ['astType', 'astKind', 'value', 'children'] + + def __init__(self, astType='generic', astKind='unknown', value=None, children=()): self.astType = astType self.astKind = astKind self.value = value self.children = tuple(children) self.reg = None - def __eq__(self, other: 'ASTNode') -> bool: # type: ignore[override] + def __eq__(self, other): if self.astType == 'alias': self = self.value if other.astType == 'alias': @@ -133,50 +119,50 @@ def __eq__(self, other: 'ASTNode') -> bool: # type: ignore[override] return False return True - def __lt__(self, other: 'ASTNode') -> bool: + def __lt__(self,other): # RAM: this is a fix for issue #88 whereby sorting on constants # that may be of astKind == 'complex' but type(self.value) == int or float # Here we let NumPy sort as it will cast data properly for comparison # when the Python built-ins will raise an error. if self.astType == 'constant': if self.astKind == other.astKind: - return bool(numpy.array(self.value) < numpy.array(other.value)) + return numpy.array(self.value) < numpy.array(other.value) return self.astKind < other.astKind else: raise TypeError('Sorting not implemented for astType: %s'%self.astType) - def __hash__(self) -> int: + def __hash__(self): if self.astType == 'alias': self = self.value return hash((self.astType, self.astKind, self.value, self.children)) - def __str__(self) -> str: + def __str__(self): return 'AST(%s, %s, %s, %s, %s)' % (self.astType, self.astKind, self.value, self.children, self.reg) - def __repr__(self) -> str: + def __repr__(self): return '' % id(self) - def key(self) -> tuple[str, str, Any, tuple['ASTNode', ...]]: + def key(self): return (self.astType, self.astKind, self.value, self.children) - def typecode(self) -> str: + def typecode(self): return kind_to_typecode[self.astKind] - def postorderWalk(self) -> Iterator['ASTNode']: + def postorderWalk(self): for c in self.children: for w in c.postorderWalk(): yield w yield self - def allOf(self, *astTypes: str) -> Iterator['ASTNode']: - astTypeSet = set(astTypes) + def allOf(self, *astTypes): + astTypes = set(astTypes) for w in self.postorderWalk(): - if w.astType in astTypeSet: + if w.astType in astTypes: yield w -def expressionToAST(ex: expressions.ExpressionNode | expressions.RawNode) -> ASTNode: +def expressionToAST(ex): """Take an expression tree made out of expressions.ExpressionNode, and convert to an AST tree. @@ -187,7 +173,7 @@ def expressionToAST(ex: expressions.ExpressionNode | expressions.RawNode) -> AST [expressionToAST(c) for c in ex.children]) -def sigPerms(s: str) -> Generator[str, None, None]: +def sigPerms(s): """Generate all possible signatures derived by upcasting the given signature. """ @@ -206,7 +192,7 @@ def sigPerms(s: str) -> Generator[str, None, None]: yield s -def typeCompileAst(ast: ASTNode) -> ASTNode: +def typeCompileAst(ast): """Assign appropriate types to each node in the AST. Will convert opcodes and functions to appropriate upcast version, @@ -249,7 +235,7 @@ def typeCompileAst(ast: ASTNode) -> ASTNode: [typeCompileAst(c) for c in children]) -class Register: +class Register(): """Abstraction for a register in the VM. Members: @@ -260,18 +246,13 @@ class Register: None if no number assigned yet. """ - node: Final[ASTNode] - temporary: bool - immediate: bool - n: int | None - - def __init__(self, astnode: ASTNode, temporary: bool = False) -> None: + def __init__(self, astnode, temporary=False): self.node = astnode self.temporary = temporary self.immediate = False self.n = None - def __str__(self) -> str: + def __str__(self): if self.temporary: name = 'Temporary' else: @@ -279,7 +260,7 @@ def __str__(self) -> str: return '%s(%s, %s, %s)' % (name, self.node.astType, self.node.astKind, self.n,) - def __repr__(self) -> str: + def __repr__(self): return self.__str__() @@ -288,11 +269,11 @@ class Immediate(Register): a register. """ - def __init__(self, astnode: ASTNode) -> None: + def __init__(self, astnode): Register.__init__(self, astnode) self.immediate = True - def __str__(self) -> str: + def __str__(self): return 'Immediate(%d)' % (self.node.value,) @@ -301,8 +282,7 @@ def __str__(self) -> str: _attr_pat = r'\.\b(?!(real|imag|(\d*[eE]?[+-]?\d+)|(\d*[eE]?[+-]?\d+j)|(\d*j))\b)' _blacklist_re = re.compile(f'{_flow_pat}|{_dunder_pat}|{_attr_pat}') -def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], - sanitize: bool = True) -> expressions.ExpressionNode: +def stringToExpression(s, types, context, sanitize: bool=True): """Given a string, convert it to a tree of ExpressionNode's. """ # sanitize the string for obvious attack vectors that NumExpr cannot @@ -326,7 +306,7 @@ def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], flags = 0 c = compile(s, '', 'eval', flags) # make VariableNode's for the names - names: dict[str, Any] = {} + names = {} for name in c.co_names: if name == "None": names[name] = None @@ -340,10 +320,10 @@ def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], names.update(expressions.functions) # now build the expression - ex: expressions.ExpressionNode = eval(c, names) + ex = eval(c, names) if expressions.isConstant(ex): - ex = expressions.ConstantNode(ex) + ex = expressions.ConstantNode(ex, expressions.getKind(ex)) elif not isinstance(ex, expressions.ExpressionNode): raise TypeError("unsupported expression type: %s" % type(ex)) finally: @@ -351,12 +331,12 @@ def stringToExpression(s: str, types: dict[str, type], context: dict[str, Any], return ex -def isReduction(ast: ASTNode) -> bool: +def isReduction(ast): prefixes = (b'sum_', b'prod_', b'min_', b'max_') return any(ast.value.startswith(p) for p in prefixes) -def getInputOrder(ast: ASTNode, input_order: list[str] | None = None) -> list[ASTNode]: +def getInputOrder(ast, input_order=None): """ Derive the input order of the variables in an expression. """ @@ -379,16 +359,16 @@ def getInputOrder(ast: ASTNode, input_order: list[str] | None = None) -> list[AS return ordered_variables -def convertConstantToKind(x: Any, kind: str) -> Any: +def convertConstantToKind(x, kind): # Exception for 'float' types that will return the NumPy float32 type if kind == 'float': return numpy.float32(x) - elif isinstance(x, str): + elif isinstance(x,str): return x.encode('ascii') return kind_to_type[kind](x) -def getConstants(ast: ASTNode) -> tuple[list[ASTNode], list[Any]]: +def getConstants(ast): """ RAM: implemented magic method __lt__ for ASTNode to fix issues #88 and #209. The following test code works now, as does the test suite. @@ -397,17 +377,14 @@ def getConstants(ast: ASTNode) -> tuple[list[ASTNode], list[Any]]: a = 1 + 3j; b = 5.0 ne.evaluate('a*2 + 15j - b') """ - constant_registers = {node.reg for node in ast.allOf("constant") - if node.reg is not None} - constants_order = sorted(r.node for r in constant_registers) + constant_registers = set([node.reg for node in ast.allOf("constant")]) + constants_order = sorted([r.node for r in constant_registers]) constants = [convertConstantToKind(a.value, a.astKind) for a in constants_order] return constants_order, constants -# unused? -def sortNodesByOrder(nodes: Iterable[ASTNode], - order: Iterable[tuple[int, str, int]]) -> list[ASTNode]: +def sortNodesByOrder(nodes, order): order_map = {} for i, (_, v, _) in enumerate(order): order_map[v] = i @@ -416,12 +393,11 @@ def sortNodesByOrder(nodes: Iterable[ASTNode], return [a[1] for a in dec_nodes] -def assignLeafRegisters(inodes: Iterable[ASTNode], - registerMaker: type[Register]) -> None: +def assignLeafRegisters(inodes, registerMaker): """ Assign new registers to each of the leaf nodes. """ - leafRegisters: dict[tuple[object, ...], Register] = {} + leafRegisters = {} for node in inodes: key = node.key() if key in leafRegisters: @@ -430,8 +406,7 @@ def assignLeafRegisters(inodes: Iterable[ASTNode], node.reg = leafRegisters[key] = registerMaker(node) -def assignBranchRegisters(inodes: Iterable[ASTNode], - registerMaker: type[Register]) -> None: +def assignBranchRegisters(inodes, registerMaker): """ Assign temporary registers to each of the branch nodes. """ @@ -439,11 +414,11 @@ def assignBranchRegisters(inodes: Iterable[ASTNode], node.reg = registerMaker(node, temporary=True) -def collapseDuplicateSubtrees(ast: ASTNode) -> list[ASTNode]: +def collapseDuplicateSubtrees(ast): """ Common subexpression elimination. """ - seen: dict[ASTNode, ASTNode] = {} + seen = {} aliases = [] for a in ast.allOf('op'): if a in seen: @@ -462,66 +437,64 @@ def collapseDuplicateSubtrees(ast: ASTNode) -> list[ASTNode]: return aliases -def optimizeTemporariesAllocation(ast: ASTNode) -> None: +def optimizeTemporariesAllocation(ast): """ Attempt to minimize the number of temporaries needed, by reusing old ones. """ - nodes = [n for n in ast.postorderWalk() if n.reg and n.reg.temporary] - users_of: dict[Register, set[ASTNode]] = {n.reg: set() for n in nodes if n.reg} + nodes = [n for n in ast.postorderWalk() if n.reg.temporary] + users_of = dict((n.reg, set()) for n in nodes) + node_regs = dict((n, set(c.reg for c in n.children if c.reg.temporary)) + for n in nodes) if nodes and nodes[-1] is not ast: nodes_to_check = nodes + [ast] else: nodes_to_check = nodes for n in nodes_to_check: for c in n.children: - if c.reg and c.reg.temporary: + if c.reg.temporary: users_of[c.reg].add(n) - unused: dict[str, set[Register]] = {tc: set() for tc in scalar_constant_kinds} + unused = dict([(tc, set()) for tc in scalar_constant_kinds]) for n in nodes: for c in n.children: reg = c.reg - if reg and reg.temporary: + if reg.temporary: users = users_of[reg] users.discard(n) if not users: unused[reg.node.astKind].add(reg) if unused[n.astKind]: reg = unused[n.astKind].pop() - if n.reg: - users_of[reg] = users_of[n.reg] + users_of[reg] = users_of[n.reg] n.reg = reg -def setOrderedRegisterNumbers(order: Sequence[ASTNode], start: int) -> int: +def setOrderedRegisterNumbers(order, start): """ Given an order of nodes, assign register numbers. """ for i, node in enumerate(order): - assert node.reg is not None node.reg.n = start + i return start + len(order) -def setRegisterNumbersForTemporaries(ast: ASTNode, start: int) -> tuple[int, str]: +def setRegisterNumbersForTemporaries(ast, start): """ Assign register numbers for temporary registers, keeping track of aliases and handling immediate operands. """ seen = 0 signature = '' - aliases: list[ASTNode] = [] + aliases = [] for node in ast.postorderWalk(): if node.astType == 'alias': aliases.append(node) node = node.value - reg = node.reg - if not reg: - continue - if reg.immediate: - reg.n = node.value + if node.reg.immediate: + node.reg.n = node.value continue + reg = node.reg if reg.n is None: reg.n = start + seen seen += 1 @@ -531,10 +504,7 @@ def setRegisterNumbersForTemporaries(ast: ASTNode, start: int) -> tuple[int, str return start + seen, signature -_ThreeAddressForm: TypeAlias = tuple[bytes, Register, 'Unpack[tuple[Register, ...]]'] - - -def convertASTtoThreeAddrForm(ast: ASTNode) -> list[_ThreeAddressForm]: +def convertASTtoThreeAddrForm(ast): """ Convert an AST to a three address form. @@ -544,59 +514,55 @@ def convertASTtoThreeAddrForm(ast: ASTNode) -> list[_ThreeAddressForm]: I suppose this should be called three register form, but three address form is found in compiler theory. """ - return [(node.value, node.reg, *(c.reg for c in node.children if c.reg)) - for node in ast.allOf('op') if node.reg] + return [(node.value, node.reg) + tuple([c.reg for c in node.children]) + for node in ast.allOf('op')] -def compileThreeAddrForm(program: Iterable[_ThreeAddressForm]) -> bytes: +def compileThreeAddrForm(program): """ Given a three address form of the program, compile it a string that the VM understands. """ - def nToChr(reg: Register | None) -> bytes: + def nToChr(reg): if reg is None: return b'\xff' - assert reg.n is not None - if reg.n < 0: + elif reg.n < 0: raise ValueError("negative value for register number %s" % reg.n) - return bytes([reg.n]) + else: + return bytes([reg.n]) - def quadrupleToString(opcode: bytes, - store: Register | None, - a1: Register | None = None, - a2: Register | None = None) -> bytes: + def quadrupleToString(opcode, store, a1=None, a2=None): cop = chr(interpreter.opcodes[opcode]).encode('latin_1') cs = nToChr(store) ca1 = nToChr(a1) ca2 = nToChr(a2) return cop + cs + ca1 + ca2 - def toString(args: _ThreeAddressForm) -> bytes: - opcode: bytes = args[0] - store: Register = args[1] - a1: Register | None = args[2] if len(args) > 2 else None - a2: Register | None = args[3] if len(args) > 3 else None - an: tuple[Register, ...] = args[4:] if len(args) > 4 else () + def toString(args): + while len(args) < 4: + args += (None,) + opcode, store, a1, a2 = args[:4] s = quadrupleToString(opcode, store, a1, a2) l = [s] - while an: - s = quadrupleToString(b'noop', *an[:3]) + args = args[4:] + while args: + s = quadrupleToString(b'noop', *args[:3]) l.append(s) - an = an[3:] + args = args[3:] return b''.join(l) prog_str = b''.join([toString(t) for t in program]) return prog_str -context_info: Final = ( +context_info = [ ('optimization', ('none', 'moderate', 'aggressive'), 'aggressive'), - ('truediv', (False, True, 'auto'), 'auto'), -) + ('truediv', (False, True, 'auto'), 'auto') +] -def getContext(kwargs: dict[str, Any], _frame_depth: int = 1) -> dict[str, Any]: +def getContext(kwargs, _frame_depth=1): d = kwargs.copy() context = {} for name, allowed, default in context_info: @@ -615,23 +581,11 @@ def getContext(kwargs: dict[str, Any], _frame_depth: int = 1) -> dict[str, Any]: return context -_PrecompileResult: TypeAlias = tuple[ - list[_ThreeAddressForm], # threeAddrProgram - str, # inputsig - str, # tempsig - list[Any], # constants - tuple[str, ...], # input_names -] - - -def precompile(ex: expressions.ExpressionNode | str, - signature: Iterable[tuple[str, type]] = (), - context: dict[str, Any] = {}, - sanitize: bool = True) -> _PrecompileResult: +def precompile(ex, signature=(), context={}, sanitize: bool=True): """ Compile the expression to an intermediate form. """ - types: dict[str, type] = dict(signature) + types = dict(signature) input_order = [name for (name, type_) in signature] if isinstance(ex, str): @@ -660,16 +614,14 @@ def precompile(ex: expressions.ExpressionNode | str, input_order = getInputOrder(ast, input_order) constants_order, constants = getConstants(ast) - assert ast.reg is not None - if isReduction(ast): ast.reg.temporary = False optimizeTemporariesAllocation(ast) ast.reg.temporary = False - ast.reg.n = 0 r_output = 0 + ast.reg.n = 0 r_inputs = r_output + 1 r_constants = setOrderedRegisterNumbers(input_order, r_inputs) @@ -678,15 +630,12 @@ def precompile(ex: expressions.ExpressionNode | str, threeAddrProgram = convertASTtoThreeAddrForm(ast) input_names = tuple([a.value for a in input_order]) - inputsig = ''.join(type_to_typecode[types.get(x, default_type)] - for x in input_names) - return threeAddrProgram, inputsig, tempsig, constants, input_names + signature = ''.join(type_to_typecode[types.get(x, default_type)] + for x in input_names) + return threeAddrProgram, signature, tempsig, constants, input_names -def NumExpr(ex: expressions.ExpressionNode | str, - signature: Iterable[tuple[str, type]] = (), - sanitize: bool = True, - **kwargs: object) -> interpreter.NumExpr: +def NumExpr(ex, signature=(), sanitize: bool=True, **kwargs): """ Compile an expression built using E. variables to a function. @@ -710,21 +659,23 @@ def NumExpr(ex: expressions.ExpressionNode | str, program, constants, input_names) -def disassemble(nex: interpreter.NumExpr) -> list[list[bytes | int | None]]: +def disassemble(nex): """ Given a NumExpr object, return a list which is the program disassembled. """ - rev_opcodes = {code: op for op, code in interpreter.opcodes.items()} + rev_opcodes = {} + for op in interpreter.opcodes: + rev_opcodes[interpreter.opcodes[op]] = op r_constants = 1 + len(nex.signature) r_temps = r_constants + len(nex.constants) - def parseOp(op: bytes) -> tuple[bytes, bytes]: - name, sig, *_ = *op.rsplit(b'_', 1), b'' + def parseOp(op): + name, sig = [*op.rsplit(b'_', 1), ''][:2] return name, sig - def getArg(pc: int, offset: int) -> int | bytes | None: + def getArg(pc, offset): arg = nex.program[pc + (offset if offset < 4 else offset+1)] - _, sig = parseOp(rev_opcodes[nex.program[pc]]) + _, sig = parseOp(rev_opcodes.get(nex.program[pc])) try: code = sig[offset - 1] except IndexError: @@ -748,9 +699,9 @@ def getArg(pc: int, offset: int) -> int | bytes | None: source = [] for pc in range(0, len(nex.program), 4): - op = rev_opcodes[nex.program[pc]] + op = rev_opcodes.get(nex.program[pc]) _, sig = parseOp(op) - parsed: list[bytes | int | None] = [op] + parsed = [op] for i in range(len(sig)): parsed.append(getArg(pc, 1 + i)) while len(parsed) < 4: @@ -759,7 +710,7 @@ def getArg(pc: int, offset: int) -> int | bytes | None: return source -def getType(a: numpy.typing.NDArray[Any] | numpy.generic) -> type: +def getType(a): kind = a.dtype.kind if kind == 'b': return bool @@ -782,9 +733,7 @@ def getType(a: numpy.typing.NDArray[Any] | numpy.generic) -> type: raise ValueError("unknown type %s" % a.dtype.name) -def getExprNames(text: str, - context: dict[str, Any], - sanitize: bool = True) -> tuple[list[str], bool]: +def getExprNames(text, context, sanitize: bool=True): ex = stringToExpression(text, {}, context, sanitize) ast = expressionToAST(ex) input_order = getInputOrder(ast, None) @@ -802,10 +751,7 @@ def getExprNames(text: str, return [a.value for a in input_order], ex_uses_vml -def getArguments(names: Iterable[str], - local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - _frame_depth: int = 2) -> list[numpy.typing.NDArray[Any]]: +def getArguments(names, local_dict=None, global_dict=None, _frame_depth: int=2): """ Get the arguments based on the names. """ @@ -849,14 +795,14 @@ def getArguments(names: Iterable[str], def validate(ex: str, - local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - out: numpy.typing.NDArray[Any] | None = None, + local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + out: numpy.ndarray = None, order: str = 'K', casting: str = 'safe', _frame_depth: int = 2, - sanitize: bool | None = None, - **kwargs: object) -> Exception | None: + sanitize: Optional[bool] = None, + **kwargs) -> Optional[Exception]: r""" Validate a NumExpr expression with the given `local_dict` or `locals()`. Returns `None` on success and the Exception object if one occurs. Note that @@ -903,7 +849,7 @@ def validate(ex: str, like float64 to float32, are allowed. * 'unsafe' means any data conversions may be done. - sanitize: bool | None + sanitize: Optional[bool] Both `validate` and by extension `evaluate` call `eval(ex)`, which is potentially dangerous on unsanitized inputs. As such, NumExpr by default performs simple sanitization, banning the character ':;[', the @@ -967,14 +913,14 @@ def validate(ex: str, return None def evaluate(ex: str, - local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - out: numpy.typing.NDArray[Any] | None = None, + local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + out: numpy.ndarray = None, order: str = 'K', casting: str = 'same_kind', - sanitize: bool | None = None, + sanitize: Optional[bool] = None, _frame_depth: int = 3, - **kwargs: object) -> numpy.typing.NDArray[Any]: + **kwargs) -> numpy.ndarray: r""" Evaluate a simple array expression element-wise using the virtual machine. @@ -1044,9 +990,9 @@ def evaluate(ex: str, else: raise e -def re_evaluate(local_dict: dict[str, Any] | None = None, - global_dict: dict[str, Any] | None = None, - _frame_depth: int = 2) -> numpy.typing.NDArray[Any]: +def re_evaluate(local_dict: Optional[Dict] = None, + global_dict: Optional[Dict] = None, + _frame_depth: int=2) -> numpy.ndarray: """ Re-evaluate the previous executed array expression without any check. @@ -1064,20 +1010,13 @@ def re_evaluate(local_dict: dict[str, Any] | None = None, """ if not hasattr(_numexpr_last, 'l'): _numexpr_last.l = ContextDict() - ctx: ContextDict[Any] = _numexpr_last.l try: - compiled_ex = ctx['ex'] + compiled_ex = _numexpr_last.l['ex'] except KeyError: raise RuntimeError("A previous evaluate() execution was not found, please call `validate` or `evaluate` once before `re_evaluate`") - assert compiled_ex is not None - - argnames = ctx['argnames'] - assert argnames is not None + argnames = _numexpr_last.l['argnames'] args = getArguments(argnames, local_dict, global_dict, _frame_depth=_frame_depth) - - kwargs = ctx['kwargs'] - assert kwargs is not None - + kwargs = _numexpr_last.l['kwargs'] # with evaluate_lock: return compiled_ex(*args, **kwargs) diff --git a/numexpr/py.typed b/numexpr/py.typed deleted file mode 100644 index e69de29..0000000 diff --git a/numexpr/tests/__init__.py b/numexpr/tests/__init__.py index 9ecc21d..f47c8cc 100644 --- a/numexpr/tests/__init__.py +++ b/numexpr/tests/__init__.py @@ -8,8 +8,7 @@ # rights to use. #################################################################### -from numexpr.tests.test_numexpr import print_versions as print_versions -from numexpr.tests.test_numexpr import test as test +from numexpr.tests.test_numexpr import print_versions, test if __name__ == '__main__': - test() # type: ignore[no-untyped-call] + test() diff --git a/numexpr/tests/conftest.py b/numexpr/tests/conftest.py index 6cf5ca1..3d32260 100644 --- a/numexpr/tests/conftest.py +++ b/numexpr/tests/conftest.py @@ -11,7 +11,7 @@ import pytest -def pytest_configure(config: pytest.Config) -> None: +def pytest_configure(config): config.addinivalue_line( "markers", "thread_unsafe: mark test as unsafe for parallel execution" ) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index b9efc87..e2c97f7 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -9,7 +9,6 @@ # rights to use. #################################################################### -# mypy: ignore-errors import os import platform diff --git a/numexpr/utils.py b/numexpr/utils.py index 83ae1bd..9e45fbe 100644 --- a/numexpr/utils.py +++ b/numexpr/utils.py @@ -15,8 +15,6 @@ import contextvars import os import subprocess -from typing import (Final, Generic, ItemsView, Iterable, Iterator, KeysView, - Literal, TypeVar, ValuesView, cast, overload) from numexpr import use_vml from numexpr.interpreter import MAX_THREADS, _get_num_threads, _set_num_threads @@ -27,25 +25,19 @@ from numexpr.interpreter import (_get_vml_num_threads, _get_vml_version, _set_vml_accuracy_mode, _set_vml_num_threads) -else: - # mypy does not understand this, whereas pyright does - _get_vml_num_threads = None # type: ignore[assignment] - _get_vml_version = None # type: ignore[assignment] - _set_vml_accuracy_mode = None # type: ignore[assignment] - _set_vml_num_threads = None # type: ignore[assignment] -def get_vml_version() -> str | None: +def get_vml_version(): """ Get the VML/MKL library version. """ - if _get_vml_version is not None: + if use_vml: return _get_vml_version() else: return None -def set_vml_accuracy_mode(mode: str | None) -> Literal['low', 'high', 'fast'] | None: +def set_vml_accuracy_mode(mode): """ Set the accuracy mode for VML operations. @@ -64,10 +56,8 @@ def set_vml_accuracy_mode(mode: str | None) -> Literal['low', 'high', 'fast'] | Returns old accuracy settings. """ - if _set_vml_accuracy_mode is not None: - acc_dict: dict[str | None, Literal[0, 1, 2, 3]] + if use_vml: acc_dict = {None: 0, 'low': 1, 'high': 2, 'fast': 3} - acc_reverse_dict: dict[int, Literal['low', 'high', 'fast']] acc_reverse_dict = {1: 'low', 2: 'high', 3: 'fast'} if mode not in list(acc_dict.keys()): raise ValueError( @@ -78,7 +68,7 @@ def set_vml_accuracy_mode(mode: str | None) -> Literal['low', 'high', 'fast'] | return None -def set_vml_num_threads(nthreads: int) -> None: +def set_vml_num_threads(nthreads): """ Suggests a maximum number of threads to be used in VML operations. @@ -90,11 +80,11 @@ def set_vml_num_threads(nthreads: int) -> None: for more info about it. """ - if _set_vml_num_threads is not None: + if use_vml: _set_vml_num_threads(nthreads) pass -def get_vml_num_threads() -> int | None: +def get_vml_num_threads(): """ Gets the maximum number of threads to be used in VML operations. @@ -106,11 +96,11 @@ def get_vml_num_threads() -> int | None: for more info about it. """ - if _get_vml_num_threads is not None: + if use_vml: return _get_vml_num_threads() return None -def set_num_threads(nthreads: int) -> int: +def set_num_threads(nthreads): """ Sets a number of threads to be used in operations. @@ -122,13 +112,13 @@ def set_num_threads(nthreads: int) -> int: old_nthreads = _set_num_threads(nthreads) return old_nthreads -def get_num_threads() -> int: +def get_num_threads(): """ Gets the number of threads currently in use for operations. """ return _get_num_threads() -def _init_num_threads() -> int: +def _init_num_threads(): """ Detects the environment variable 'NUMEXPR_MAX_THREADS' to set the threadpool size, and if necessary the slightly redundant 'NUMEXPR_NUM_THREADS' or @@ -178,7 +168,7 @@ def _init_num_threads() -> int: return requested_threads -def detect_number_of_cores() -> int: +def detect_number_of_cores(): """ Detects the number of cores on a system. Cribbed from pp. """ @@ -187,7 +177,7 @@ def detect_number_of_cores() -> int: if "SC_NPROCESSORS_ONLN" in os.sysconf_names: # Linux & Unix: ncpus = os.sysconf("SC_NPROCESSORS_ONLN") - if isinstance(ncpus, int) and ncpus > 0: # type: ignore[redundant-expr] + if isinstance(ncpus, int) and ncpus > 0: return ncpus else: # OSX: return int(subprocess.check_output(["sysctl", "-n", "hw.ncpu"])) @@ -201,7 +191,7 @@ def detect_number_of_cores() -> int: return 1 # Default -def detect_number_of_threads() -> int: +def detect_number_of_threads(): """ DEPRECATED: use `_init_num_threads` instead. If this is modified, please update the note in: https://github.com/pydata/numexpr/wiki/Numexpr-Users-Guide @@ -221,90 +211,64 @@ def detect_number_of_threads() -> int: return nthreads -_KT = TypeVar('_KT') -_VT = TypeVar('_VT') - - -class CacheDict(dict[_KT, _VT], Generic[_KT, _VT]): +class CacheDict(dict): """ A dictionary that prevents itself from growing too much. """ - maxentries: Final[int] - - def __init__(self, maxentries: int) -> None: + def __init__(self, maxentries): self.maxentries = maxentries - super().__init__(self) + super(CacheDict, self).__init__(self) - def __setitem__(self, key: _KT, value: _VT) -> None: + def __setitem__(self, key, value): # Protection against growing the cache too much if len(self) > self.maxentries: # Remove a 10% of (arbitrary) elements from the cache entries_to_remove = self.maxentries // 10 for k in list(self.keys())[:entries_to_remove]: - super().__delitem__(k) - super().__setitem__(key, value) + super(CacheDict, self).__delitem__(k) + super(CacheDict, self).__setitem__(key, value) -class ContextDict(Generic[_VT]): +class ContextDict: """ A context aware version dictionary """ - _context_data: contextvars.ContextVar[dict[str, _VT]] - - def __init__(self) -> None: + def __init__(self): self._context_data = contextvars.ContextVar('context_data', default={}) - @overload - def set(self, key: None = None, value: None = None, **kwargs: _VT) -> None: ... - @overload - def set(self, key: str, value: _VT, **kwargs: _VT) -> None: ... - def set(self, key: str | None = None, value: _VT | None = None, **kwargs: _VT) -> None: + def set(self, key=None, value=None, **kwargs): data = self._context_data.get().copy() if key is not None: - data[key] = cast('_VT', value) + data[key] = value for k, v in kwargs.items(): data[k] = v self._context_data.set(data) - @overload - def get(self, key: str, default: _VT) -> _VT: ... - @overload - def get(self, key: str, default: _VT | None = None) -> _VT | None: ... - def get(self, key: str, default: _VT | None = None) -> _VT | None: + def get(self, key, default=None): data = self._context_data.get() return data.get(key, default) - def delete(self, key: str) -> None: + def delete(self, key): data = self._context_data.get().copy() if key in data: del data[key] self._context_data.set(data) - def clear(self) -> None: + def clear(self): self._context_data.set({}) - def all(self) -> dict[str, _VT]: + def all(self): return self._context_data.get() - @overload - def update(self, **kwargs: _VT) -> None: ... - @overload - def update(self, other: dict[str, _VT], /, **kwargs: _VT) -> None: ... - @overload - def update(self, other: Iterable[tuple[str, _VT]], /, **kwargs: _VT) -> None: ... - def update( # type: ignore[misc] # false positive mypy error - self, - *args: dict[str, _VT] | Iterable[tuple[str, _VT]], - **kwargs: _VT, - ) -> None: + def update(self, *args, **kwargs): data = self._context_data.get().copy() if args: - if len(args) != 1: + if len(args) > 1: raise TypeError(f"update() takes at most 1 positional argument ({len(args)} given)") other = args[0] if isinstance(other, dict): @@ -316,32 +280,32 @@ def update( # type: ignore[misc] # false positive mypy error data.update(kwargs) self._context_data.set(data) - def keys(self) -> KeysView[str]: + def keys(self): return self._context_data.get().keys() - def values(self) -> ValuesView[_VT]: + def values(self): return self._context_data.get().values() - def items(self) -> ItemsView[str, _VT]: + def items(self): return self._context_data.get().items() - def __getitem__(self, key: str) -> _VT | None: + def __getitem__(self, key): return self.get(key) - def __setitem__(self, key: str, value: _VT) -> None: + def __setitem__(self, key, value): self.set(key, value) - def __delitem__(self, key: str) -> None: + def __delitem__(self, key): self.delete(key) - def __contains__(self, key: str) -> bool: + def __contains__(self, key): return key in self._context_data.get() - def __len__(self) -> int: + def __len__(self): return len(self._context_data.get()) - def __iter__(self) -> Iterator[str]: + def __iter__(self): return iter(self._context_data.get()) - def __repr__(self) -> str: + def __repr__(self): return repr(self._context_data.get()) diff --git a/numexpr/version.pyi b/numexpr/version.pyi deleted file mode 100644 index 2dfa994..0000000 --- a/numexpr/version.pyi +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Final - -__version__: Final[str] = ... -version: Final[str] = ... -numpy_build_version: Final[str] = ... -platform_machine: Final[str] = ... diff --git a/pyproject.toml b/pyproject.toml index e5a7d18..264a999 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ classifiers = [ "Intended Audience :: Science/Research", "Programming Language :: Python", "Topic :: Software Development :: Libraries :: Python Modules", - "Typing :: Typed", "Operating System :: Microsoft :: Windows", "Operating System :: Unix", "Programming Language :: Python :: 3", @@ -63,23 +62,3 @@ test-command = ["python -m pytest --pyargs numexpr"] [[tool.cibuildwheel.overrides]] select = "cp31*t-*" test-command = ["python -m pytest --parallel-threads=4 --pyargs numexpr"] - - -[tool.mypy] -files = ["numexpr/*.py"] -exclude = ["^bench/", "^build/", "^doc/", "^issues/"] -strict = true -disable_error_code = ["no-any-return"] -enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] -warn_unreachable = false -local_partial_types = true -allow_redefinition_new = true - - -[tool.pyright] -include = ["numexpr"] -exclude = ["bench", "build", "doc", "issues", "numexpr/tests", "setup.py"] -ignore = [".venv", "numexpr/tests/test_numexpr.py", "setup.py"] -stubPath = "." -reportPrivateUsage = false -reportConstantRedefinition = false diff --git a/setup.py b/setup.py index beeece3..64d9f20 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,6 @@ # rights to use. #################################################################### -# mypy: ignore-errors - import configparser import os import os.path as op From 898b4d9dbfdb654391999c65744f678f5bd05508 Mon Sep 17 00:00:00 2001 From: lshaw8317 Date: Mon, 13 Oct 2025 16:12:48 +0200 Subject: [PATCH 157/166] Update tests to pass for numpy 1.26 --- numexpr/tests/test_numexpr.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index e2c97f7..9e98ff1 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -963,7 +963,11 @@ def test_expressions( # "overflows" or "divide by zero" in plain eval(). warnings.simplefilter("ignore") try: - npval = eval(expr, globals(), locals()) + npexpr = expr + if "sign" in expr and dtype==complex and np.__version__<"2.0": + #definition of sign changed in numpy 2.0 for complex numbers + npexpr = expr.replace("sign(b+c)", "(b+c)/abs(b+c)") + npval = eval(npexpr, globals(), locals()) except Exception as ex: np_exception = ex npval = None From d9cf0f0785f9c8de8d790800ceebff89eced8354 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 13 Oct 2025 17:10:54 +0200 Subject: [PATCH 158/166] Getting ready for release 2.14.1 --- ANNOUNCE.rst | 18 ++++++------------ RELEASE_NOTES.rst | 3 ++- VERSION | 2 +- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index 7ce676c..f6f4c7e 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -1,27 +1,21 @@ ========================= -Announcing NumExpr 2.14. +Announcing NumExpr 2.14.1 ========================= Hi everyone, -NumExpr 2.14.0 introduces a couple of patches for tan / tanh and -adds static typing support. -Thanks to Luke Shaw and Joren Hammudoglu (@jorenham) for these contributions. +NumExpr 2.14.1 introduces a patch to ensure compatibility with NumPy 1.26, +rolling back static typing support. Project documentation is available at: https://numexpr.readthedocs.io/ -Changes from 2.13.1 to 2.14.0 +Changes from 2.14.0 to 2.14.1 ----------------------------- -* Numerical stability for overflow has been improved for ``tan`` and ``tanh`` - to handle possible overflows for complex numbers. - -* Static typing support has been added, making NumExpr compatible with - static type checkers like `mypy` and `pyright`. - Thanks to Joren Hammudoglu (@jorenham) for the work. - +* Rolled back static typing support to ensure compatibiity with NumPy 1.26. +* Added CI tests for NumPy 1.26 What's Numexpr? --------------- diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index 0294048..f433ada 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -5,7 +5,8 @@ Release notes for NumExpr 2.14 series Changes from 2.14.0 to 2.14.1 ----------------------------- -* **Under development.** +* Rolled back static typing support to ensure compatibiity with NumPy 1.26. +* Added CI tests for NumPy 1.26 Changes from 2.13.1 to 2.14.0 diff --git a/VERSION b/VERSION index 7e52938..b70ae75 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.14.1.dev0 +2.14.1 From 048a5c42b09e4c5a4ccb5097162ff7c571973fde Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 13 Oct 2025 17:21:55 +0200 Subject: [PATCH 159/166] Getting ready for release 2.14.1 --- ANNOUNCE.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ANNOUNCE.rst b/ANNOUNCE.rst index f6f4c7e..c3fda2c 100644 --- a/ANNOUNCE.rst +++ b/ANNOUNCE.rst @@ -4,7 +4,7 @@ Announcing NumExpr 2.14.1 Hi everyone, -NumExpr 2.14.1 introduces a patch to ensure compatibility with NumPy 1.26, +NumExpr 2.14.1 introduces patches to ensure compatibility with NumPy 1.26, rolling back static typing support. Project documentation is available at: From 15ee846b5e8a295b9c4ae863db769aa662ac11f1 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 13 Oct 2025 17:42:40 +0200 Subject: [PATCH 160/166] Streamline build --- .github/workflows/build.yml | 41 ++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 976a42f..8e88f52 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,71 +15,80 @@ jobs: CIBW_ARCHS_LINUX: ${{ matrix.arch }} CIBW_ARCHS_MACOS: "x86_64 arm64" CIBW_ENABLE: cpython-freethreading + strategy: fail-fast: false matrix: include: - # Linux x86_64 builds + # Linux x86_64 (build wheels) - os: ubuntu-latest arch: x86_64 artifact_name: "linux-x86_64" + python-version: "3.x" - # Linux x86_64 with numpy 1.23 + # Linux x86_64 (test numpy 1.26) - os: ubuntu-latest arch: x86_64 - artifact_name: "linux-x86_64_numpy1_23" + artifact_name: "linux-x86_64_numpy1_26" + python-version: "3.12" numpy-version: "1.26" - # Linux ARM64 builds (native runners) + # Linux ARM64 (build wheels) - os: ubuntu-24.04-arm arch: aarch64 artifact_name: "linux-aarch64" + python-version: "3.x" - # Windows builds + # Windows (build wheels) - os: windows-latest arch: x86_64 artifact_name: "windows-x86_64" + python-version: "3.x" - # macOS builds (universal2) + # macOS (build wheels) - os: macos-latest arch: x86_64 artifact_name: "macos-universal2" + python-version: "3.x" + steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v3 name: Install Python with: - python-version: '3.12' + python-version: ${{ matrix.python-version }} - - name: Install specific numpy version + # Run tests with specific numpy version + - name: Install and test with specific numpy version if: matrix.numpy-version - run: pip install "numpy==${{ matrix.numpy-version }}.*" - - - name: Local Build - run: pip install -e . - - - name: Test run: | + pip install "numpy==${{ matrix.numpy-version }}.*" + pip install -e . pip install pytest python -m pytest + # Build wheels only if: + # - No numpy version is specified + # - Python version is "3.x" - name: Build wheels + if: ${{ !matrix.numpy-version }} uses: pypa/cibuildwheel@v3.1.3 - name: Make sdist - if: ${{ matrix.os == 'windows-latest' }} + if: ${{ matrix.os == 'windows-latest' && !matrix.numpy-version }} run: | python -m pip install build python -m build --sdist --outdir wheelhouse . - uses: actions/upload-artifact@v4 + if: ${{ !matrix.numpy-version }} with: name: ${{ matrix.artifact_name }} path: ./wheelhouse/* - name: Upload to GitHub Release + if: startsWith(github.ref, 'refs/tags/') && !matrix.numpy-version uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') with: files: wheelhouse/* From cedcf1617bb17a404e0963b63be06a12703ddf16 Mon Sep 17 00:00:00 2001 From: Luke Shaw Date: Mon, 13 Oct 2025 18:20:03 +0200 Subject: [PATCH 161/166] Post 2.14.1 release actions done --- RELEASE_NOTES.rst | 5 +++++ VERSION | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst index f433ada..43ca0d1 100644 --- a/RELEASE_NOTES.rst +++ b/RELEASE_NOTES.rst @@ -2,6 +2,11 @@ Release notes for NumExpr 2.14 series ===================================== +Changes from 2.14.1 to 2.14.2 +----------------------------- + +* **Under development.** + Changes from 2.14.0 to 2.14.1 ----------------------------- diff --git a/VERSION b/VERSION index b70ae75..3047337 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.14.1 +2.14.2.dev0 From 0b98caf833d9ceaf6e0bc36d3c7b11ce9364e14b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 19:15:40 +0000 Subject: [PATCH 162/166] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pycqa/isort: 6.1.0 → 7.0.0](https://github.com/pycqa/isort/compare/6.1.0...7.0.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3235b5d..8b00c7e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: # - id: flake8 # - repo: https://github.com/pycqa/isort - rev: 6.1.0 + rev: 7.0.0 hooks: - id: isort From d1aa68b1a79944c3b4b706bc138db3639e327a00 Mon Sep 17 00:00:00 2001 From: 27rabbitlt <27rabbitlt@gmail.com> Date: Fri, 7 Nov 2025 17:03:11 +0100 Subject: [PATCH 163/166] FEAT: add `disable_cache` parameter in `evaluate` function. When set to True, we disable the internal cache mechenism nad evaluate the expression directly --- numexpr/necompiler.py | 32 ++++++++++++++++++++++++++++++++ numexpr/tests/test_numexpr.py | 9 +++++++++ 2 files changed, 41 insertions(+) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 8b80737..4307d37 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -920,6 +920,7 @@ def evaluate(ex: str, casting: str = 'same_kind', sanitize: Optional[bool] = None, _frame_depth: int = 3, + disable_cache: bool = False, **kwargs) -> numpy.ndarray: r""" Evaluate a simple array expression element-wise using the virtual machine. @@ -978,10 +979,41 @@ def evaluate(ex: str, The calling frame depth. Unless you are a NumExpr developer you should not set this value. + disable_cache: bool + If set to be `True`, disables the uses of internal expression cache. + + By default, NumExpr caches compiled expressions and associated metadata + (via the internal `_numexpr_last`, `_numexpr_cache`, and `_names_cache` + structures). This allows repeated evaluations of the same expression + to skip recompilation, improving performance in workloads where the same + expression is executed multiple times. + + However, caching retains references to input and output arrays in order + to support re-evaluation. As a result, this can increase their reference + counts and may prevent them from being garbage-collected immediately. + In situations where precise control over object lifetimes or memory + management is required, set `disable_cache=True` to avoid this behavior. + + Default is `False`. + """ # We could avoid code duplication if we called validate and then re_evaluate # here, but we have difficulties with the `sys.getframe(2)` call in # `getArguments` + + # If dissable_cache set to be True, we evaluate the expression here + # Otherwise we validate and then re_evaluate + if disable_cache: + context = getContext(kwargs) + names, ex_uses_vml = getExprNames(ex, context, sanitize=sanitize) + arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth - 1) + signature = [(name, getType(arg)) for (name, arg) in + zip(names, arguments)] + compiled_ex = NumExpr(ex, signature, sanitize=sanitize, **context) + kwargs = {'out': out, 'order': order, 'casting': casting, + 'ex_uses_vml': ex_uses_vml} + return compiled_ex(*arguments, **kwargs) + e = validate(ex, local_dict=local_dict, global_dict=global_dict, out=out, order=order, casting=casting, _frame_depth=_frame_depth, sanitize=sanitize, **kwargs) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 9e98ff1..9ae9170 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -336,6 +336,15 @@ def _test_refcount(self): assert sys.getrefcount(a) == 2 evaluate('1') assert sys.getrefcount(a) == 2 + + # Test if `disable_cache` works correctly with refcount, see issue #521 + @unittest.skipIf(hasattr(sys, "pypy_version_info"), + "PyPy does not have sys.getrefcount()") + def test_refcount_disable_cache(self): + a = array([1]) + b = array([1]) + evaluate('a', out=b, disable_cache=True) + assert sys.getrefcount(b) == 2 @pytest.mark.thread_unsafe def test_locals_clears_globals(self): From 20f31d46fd499aa83def2328829411c4ecc2a105 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Nov 2025 16:05:26 +0000 Subject: [PATCH 164/166] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- numexpr/necompiler.py | 2 +- numexpr/tests/test_numexpr.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/numexpr/necompiler.py b/numexpr/necompiler.py index 4307d37..96c66f6 100644 --- a/numexpr/necompiler.py +++ b/numexpr/necompiler.py @@ -1007,7 +1007,7 @@ def evaluate(ex: str, context = getContext(kwargs) names, ex_uses_vml = getExprNames(ex, context, sanitize=sanitize) arguments = getArguments(names, local_dict, global_dict, _frame_depth=_frame_depth - 1) - signature = [(name, getType(arg)) for (name, arg) in + signature = [(name, getType(arg)) for (name, arg) in zip(names, arguments)] compiled_ex = NumExpr(ex, signature, sanitize=sanitize, **context) kwargs = {'out': out, 'order': order, 'casting': casting, diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index 9ae9170..c4f63d2 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -336,7 +336,7 @@ def _test_refcount(self): assert sys.getrefcount(a) == 2 evaluate('1') assert sys.getrefcount(a) == 2 - + # Test if `disable_cache` works correctly with refcount, see issue #521 @unittest.skipIf(hasattr(sys, "pypy_version_info"), "PyPy does not have sys.getrefcount()") From c657605cd1d61f3ee871d71595074513750f3dab Mon Sep 17 00:00:00 2001 From: 27rabbitlt <27rabbitlt@gmail.com> Date: Fri, 7 Nov 2025 19:25:15 +0100 Subject: [PATCH 165/166] FIX: comment out refcount since Python optimizes refcounts. Keep the test for future reference purpose --- numexpr/tests/test_numexpr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/numexpr/tests/test_numexpr.py b/numexpr/tests/test_numexpr.py index c4f63d2..46fad29 100644 --- a/numexpr/tests/test_numexpr.py +++ b/numexpr/tests/test_numexpr.py @@ -338,9 +338,10 @@ def _test_refcount(self): assert sys.getrefcount(a) == 2 # Test if `disable_cache` works correctly with refcount, see issue #521 + # Comment out as modern Python optimizes handling refcounts. @unittest.skipIf(hasattr(sys, "pypy_version_info"), "PyPy does not have sys.getrefcount()") - def test_refcount_disable_cache(self): + def _test_refcount_disable_cache(self): a = array([1]) b = array([1]) evaluate('a', out=b, disable_cache=True) From 1e3241d9883eeabc08b15d2d5b0f28a2804381fc Mon Sep 17 00:00:00 2001 From: Francesc Alted Date: Tue, 2 Dec 2025 12:52:05 +0100 Subject: [PATCH 166/166] Print versions during tests execution --- numexpr/tests/conftest.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/numexpr/tests/conftest.py b/numexpr/tests/conftest.py index 3d32260..ea8b30f 100644 --- a/numexpr/tests/conftest.py +++ b/numexpr/tests/conftest.py @@ -10,8 +10,12 @@ import pytest +import numexpr + def pytest_configure(config): config.addinivalue_line( "markers", "thread_unsafe: mark test as unsafe for parallel execution" ) + print("") + numexpr.print_versions()