commit 39d9f0d71a22e89a0eda80c6ede687f6eef35ff1 Author: vvs Date: Sat May 9 14:02:02 2026 +0100 Initial release: precompiled Linux x86_64 / CPython 3.12 binary + Python wrapper OpenMP-parallel numerical kernel library. No build step required at install: drop-in shared object plus thin numpy-facing wrapper. Contents: - sem_cython12/sem_core12.cpython-312-x86_64-linux-gnu.so - sem_cython12/wrapper.py - sem_cython12/__init__.py - requirements.txt - README.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ad61f71 --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +__pycache__/ +*.pyc +*.pyo +.venv/ +venv/ +build/ +dist/ +*.egg-info/ +.pytest_cache/ +.idea/ +.vscode/ +.DS_Store diff --git a/README.md b/README.md new file mode 100644 index 0000000..649b7cc --- /dev/null +++ b/README.md @@ -0,0 +1,135 @@ +# sem_cython12 + +OpenMP-parallel numerical kernel library for Python. Pre-built Linux +shared object included; no compilation required at install time. + +## Contents + +- `sem_cython12/sem_core12.cpython-312-x86_64-linux-gnu.so` - + compiled extension (Linux, CPython 3.12, x86_64). +- `sem_cython12/wrapper.py` - Python API. +- `sem_cython12/__init__.py` - package entry. + +## Requirements + +- Linux x86_64. +- CPython 3.12. +- numpy >= 1.23 (see `requirements.txt`). +- A modern glibc + libgomp. Both ship with Ubuntu 20.04 LTS and + later. No other system libraries needed. + +The Windows / macOS binaries are not included in this distribution. + +## Install + +```bash +git clone https://git.sevana.biz/vvs/sem_cython12.git +cd sem_cython12 +pip install -r requirements.txt +# Make the package importable, either: +pip install -e . # if pyproject.toml/setup.py is added +# or just put the package on PYTHONPATH: +export PYTHONPATH=$PWD:$PYTHONPATH +``` + +## Quick start + +```python +import numpy as np +from sem_cython12 import wrapper as cy + +# Sanity check +assert cy.available(), "compiled extension did not load" +print("backend:", cy.backend()) + +# Thread count (defaults to ~50% of logical cores; set explicitly via +# either the SEM_NUM_THREADS env var or set_num_threads()): +cy.set_num_threads(8) +print("threads:", cy.get_num_threads()) + +# Example workload +rng = np.random.default_rng(0) +Q = rng.standard_normal((1000, 32)) # 1000 queries +M = rng.standard_normal((5000, 32)) # 5000 reference points + +# For each query: max similarity to any reference, with kernel scale lam. +sim = cy.batch_max_similarity(Q, M, lam=1.0) +print(sim.shape, sim.dtype) # (1000,) float64 +``` + +## API reference + +All functions accept either Python lists or numpy arrays; inputs are +internally cast to contiguous `float64`. Outputs are numpy arrays. + +### Configuration + +| Function | Purpose | +|---|---| +| `available() -> bool` | True iff the compiled extension loaded | +| `backend() -> str` | `'cython12'` or `'python-fallback'` | +| `get_num_threads() -> int` | Active OpenMP worker count | +| `set_num_threads(n: int)` | Set OpenMP worker count (n >= 1) | + +### Distance / similarity + +| Function | Inputs | Output | +|---|---|---| +| `batch_max_similarity(X_query, X_members, lam)` | `(Q, D)`, `(M, D)`, `lam > 0` | `(Q,)` - per-query max of `exp(-d / lam)` | +| `concept_support_matrix(X_query, member_mats, lam)` | `(Q, D)`, list of `(M_k, D)`, `lam > 0` | `(Q, K)` - one column per member matrix | +| `pairwise_distances(X)` | `(N, D)` | `(N, N)` - symmetric Euclidean matrix | +| `nn_distances(X)` | `(N, D)` | `(N,)` - min positive distance per row; `inf` if none | + +### Pareto / dominance + +| Function | Inputs | Output | +|---|---|---| +| `pareto_core_mask(S)` | `(N, k)` | `(N,)` byte mask: `1` iff row not strictly dominated | +| `one_sided_mask(S)` | `(N, k)` | `(N, k)` byte mask: see docstring | +| `non_redundant_witnesses(S)` | `(N, k)` | int32 array of row indices | + +### Vector reduction + +| Function | Inputs | Output | +|---|---|---| +| `extend_frontier_kernel(cur_centers, cur_radii, new_emb, cur_arity)` | `(F, D)`, `(F,)`, `(A, D)`, `int` | `(flat_centers (F*A, D), flat_radii (F*A,))` | + +See the wrapper docstrings for exact semantics of each function. + +## Performance notes + +Threads are configured globally per process; calling +`set_num_threads(n)` updates the OpenMP team size for all subsequent +calls. The default uses approximately 50% of the host's logical +cores so other processes are not starved on shared machines. + +For workloads dominated by `pairwise_distances` and +`pareto_core_mask`, near-linear scaling up to ~8 threads is typical +on commodity x86 hardware. `batch_max_similarity` is BLAS-friendly +and benefits most from larger `M` (reference set) at fixed `D`. + +## Memory / threading model + +- All arrays are processed in shared memory; no inter-process + serialisation. +- Each routine releases the GIL during its inner loops, so calling + it concurrently from Python threads is safe. +- The compiled extension links against the system OpenMP runtime + (`libgomp`); avoid mixing with conda's `intel-openmp` in the same + process if possible. + +## Diagnostics + +`backend()` returns `'python-fallback'` only when the `.so` failed +to import (wrong architecture, glibc too old, missing libgomp). In +that state, every numerical function raises `RuntimeError`; check +`available()` before each batch to fail loudly rather than silently +fall back. + +## Licence + +Proprietary. Internal use only. + +## Support + +Open an issue at https://git.sevana.biz/vvs/sem_cython12. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5b82150 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +numpy>=1.23 diff --git a/sem_cython12/__init__.py b/sem_cython12/__init__.py new file mode 100644 index 0000000..f5ac44b --- /dev/null +++ b/sem_cython12/__init__.py @@ -0,0 +1,8 @@ +"""sem_cython12 - OpenMP-parallel numerical kernel library. + +Pre-compiled C-level routines callable from Python. A Linux +shared-object ships with the package; no build step required at +install time. + +Public API: see ``sem_cython12.wrapper``. +""" diff --git a/sem_cython12/sem_core12.cpython-312-x86_64-linux-gnu.so b/sem_cython12/sem_core12.cpython-312-x86_64-linux-gnu.so new file mode 100644 index 0000000..c343a77 Binary files /dev/null and b/sem_cython12/sem_core12.cpython-312-x86_64-linux-gnu.so differ diff --git a/sem_cython12/wrapper.py b/sem_cython12/wrapper.py new file mode 100644 index 0000000..b7b4f90 --- /dev/null +++ b/sem_cython12/wrapper.py @@ -0,0 +1,162 @@ +"""sem_cython12.wrapper - thin Python API over the compiled core. + +Numpy-facing entry points for OpenMP-parallel numerical routines. +Each function accepts numpy arrays, marshals them to contiguous +float64 form, and returns numpy arrays. + +Thread count is shared globally via ``set_num_threads`` / the +``SEM_NUM_THREADS`` environment variable; default is roughly 50% of +the host's logical cores. +""" + +from __future__ import annotations + +import numpy as np + +# Fail gracefully when the compiled extension is not present. +try: + from sem_cython12.sem_core12 import ( + cy12_get_num_threads, + cy12_set_num_threads, + cy12_batch_max_similarity, + cy12_concept_support_matrix, + cy12_pareto_core_mask, + cy12_one_sided_mask, + cy12_non_redundant_witnesses, + cy12_pairwise_distances, + cy12_nn_distances, + cy12_extend_frontier_kernel, + ) + _HAS_CYTHON = True +except ImportError: + _HAS_CYTHON = False + + +def available() -> bool: + """Return True iff the compiled extension imported successfully.""" + return _HAS_CYTHON + + +def backend() -> str: + """Return ``'cython12'`` when the compiled extension is loaded, + otherwise ``'python-fallback'``. In the fallback case the + numpy-facing helpers below raise RuntimeError; callers should + branch on ``available()`` and provide their own pure-Python path + when False.""" + return "cython12" if _HAS_CYTHON else "python-fallback" + + +def get_num_threads() -> int: + """Return the active OpenMP worker count.""" + if not _HAS_CYTHON: + return 1 + return int(cy12_get_num_threads()) + + +def set_num_threads(n: int) -> None: + """Set the OpenMP worker count. No-op when the extension is + unavailable. ``n`` must be >= 1.""" + if not _HAS_CYTHON: + return + cy12_set_num_threads(int(n)) + + +# ---- numpy-facing helpers --------------------------------------- + + +def batch_max_similarity(X_query, X_members, lam: float) -> np.ndarray: + """For each row of ``X_query`` (shape ``(Q, D)``), return the + maximum of ``exp(-||q - m|| / lam)`` over rows of ``X_members`` + (shape ``(M, D)``). Output shape: ``(Q,)``. Empty members + array yields zeros. ``lam`` must be > 0.""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + Xq = np.ascontiguousarray(X_query, dtype=np.float64) + Xm = np.ascontiguousarray(X_members, dtype=np.float64) + if Xm.size == 0: + return np.zeros(Xq.shape[0], dtype=np.float64) + return cy12_batch_max_similarity(Xq, Xm, float(lam)) + + +def concept_support_matrix(X_query, member_mats, lam: float) -> np.ndarray: + """For each row of ``X_query`` (shape ``(Q, D)``) and each member + matrix in ``member_mats`` (a list of ``(M_k, D)`` arrays; M_k may + differ), return the ``(Q, K)`` matrix whose entry ``(q, k)`` is + ``batch_max_similarity(X_query[q:q+1], member_mats[k], lam)``. + Empty member matrices contribute a column of zeros.""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + Xq = np.ascontiguousarray(X_query, dtype=np.float64) + return cy12_concept_support_matrix(Xq, list(member_mats), float(lam)) + + +def pareto_core_mask(S) -> np.ndarray: + """Given a real-valued matrix ``S`` of shape ``(N, k)``, return a + byte mask of shape ``(N,)`` where ``mask[i] == 1`` iff row ``i`` + is not strictly dominated by any other row in the + maximisation order (``a`` dominates ``b`` iff ``a >= b`` + componentwise and ``a != b``).""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + return cy12_pareto_core_mask(np.ascontiguousarray(S, dtype=np.float64)) + + +def one_sided_mask(S) -> np.ndarray: + """Given ``S`` of shape ``(N, k)``, return a byte mask of shape + ``(N, k)`` where ``mask[w, i] == 1`` iff row ``w`` attains the + column-i maximum AND, on every other column ``j``, the value + ``S[w, j]`` is strictly below the max of column ``j`` taken over + all rows other than ``w``.""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + return cy12_one_sided_mask(np.ascontiguousarray(S, dtype=np.float64)) + + +def non_redundant_witnesses(S) -> np.ndarray: + """Given ``S`` of shape ``(N, k)``, return a 1-D int32 array of + row indices that are in the Pareto core AND not flagged by + ``one_sided_mask`` on any column.""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + return cy12_non_redundant_witnesses(np.ascontiguousarray(S, dtype=np.float64)) + + +def pairwise_distances(X) -> np.ndarray: + """Symmetric ``(N, N)`` Euclidean distance matrix from rows of + ``X`` (shape ``(N, D)``). Computed with parallel upper-triangle + iteration; the lower triangle is mirrored.""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + return cy12_pairwise_distances(np.ascontiguousarray(X, dtype=np.float64)) + + +def nn_distances(X) -> np.ndarray: + """For each row of ``X`` (shape ``(N, D)``), return the minimum + POSITIVE Euclidean distance to any other row. Rows for which no + positive-distance neighbour exists receive ``inf`` so the caller + can filter them with ``np.isfinite``.""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + return cy12_nn_distances(np.ascontiguousarray(X, dtype=np.float64)) + + +def extend_frontier_kernel(cur_centers, cur_radii, new_emb, cur_arity: int): + """Fused (centroid + radius) bulk computation. For each pair + ``(f, a)`` with ``f`` in ``range(F)`` and ``a`` in ``range(A)``: + + new_center = (cur_arity * cur_centers[f] + new_emb[a]) / (cur_arity + 1) + norm_new = || new_emb[a] - new_center || + new_radius = max(1e-12, + (cur_radii[f] * cur_arity + norm_new) / (cur_arity + 1)) + + Returns ``(flat_new_centers, flat_radii)`` with shapes + ``((F*A, D), (F*A,))`` packed in row-major (f, a) order. + OpenMP-parallel over the outer ``f`` index.""" + if not _HAS_CYTHON: + raise RuntimeError("sem_cython12 not available; import guarded caller") + return cy12_extend_frontier_kernel( + np.ascontiguousarray(cur_centers, dtype=np.float64), + np.ascontiguousarray(cur_radii, dtype=np.float64), + np.ascontiguousarray(new_emb, dtype=np.float64), + int(cur_arity), + )