Initial commit: three sem_cython12 demos (Iris boundary, anomaly detection, multi-criteria selection)

2026-05-09 15:23:21 +01:00
commit 0909b835d8
6 changed files with 665 additions and 0 deletions
@@ -0,0 +1,106 @@
+"""Demo 3 - Multi-criteria candidate selection.
+
+You have 100 candidates evaluated on 4 independent criteria
+(quality, cost-efficiency, robustness, compatibility - or whatever
+your domain calls them).  You want to pick the ones worth a deeper
+look.
+
+Naive ranking by total score finds the high-mean candidates - which
+are often single-criterion peaks that compensate with weakness on
+the rest.
+
+SEM's two-stage filter
+  1) best-tradeoff filter ('Pareto core')
+  2) cross-criterion filter ('non-redundant witnesses')
+finds the genuine all-rounders: candidates that are not strictly
+worse than another on every axis AND that contribute meaningfully on
+multiple axes (not just one).
+
+Run:
+    python 03_multicriteria_selection.py
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from sem_cython12 import wrapper as cy
+
+
+def main() -> int:
+    if not cy.available():
+        print("ERROR: sem_cython12 compiled extension did not load.")
+        return 1
+
+    rng = np.random.default_rng(7)
+
+    N, K = 100, 4
+    criteria_names = ["Quality", "Cost-efficiency", "Robustness", "Compatibility"]
+
+    # Most candidates: noisy uniform draws across the criteria
+    S = rng.uniform(0.30, 0.95, size=(N, K))
+
+    # Inject 5 hidden 'all-rounders' that score moderately well on EVERY
+    # criterion - none top any single axis, but they're well-balanced.
+    S[0:5] = rng.uniform(0.65, 0.85, size=(5, K))
+
+    # ---- Naive ranking by sum of scores ---------------------------------
+    naive_order = np.argsort(S.sum(axis=1))[::-1]
+    naive_top10 = naive_order[:10]
+
+    # ---- SEM ranking ----------------------------------------------------
+    pareto_mask = cy.pareto_core_mask(S)
+    pareto_idx = np.where(pareto_mask == 1)[0]
+
+    nrw = cy.non_redundant_witnesses(S)
+
+    # ---- Reporting ------------------------------------------------------
+    print(f"Candidates                       : {N}")
+    print(f"Criteria                         : {K} ({', '.join(criteria_names)})")
+    print()
+    print(f"Best-tradeoff frontier size      : {len(pareto_idx)}")
+    print(f"Cross-criterion winners (NRW)    : {len(nrw)}")
+    print(f"Hidden all-rounders we injected  : 5 (indices 0-4)")
+    print()
+
+    overlap_with_hidden = set(nrw.tolist()) & set(range(5))
+    naive_overlap_with_hidden = set(naive_top10.tolist()) & set(range(5))
+    print(f"NRW recovered hidden all-rounders     : "
+          f"{len(overlap_with_hidden)}/5  {sorted(overlap_with_hidden)}")
+    print(f"Naive top-10 found hidden all-rounders: "
+          f"{len(naive_overlap_with_hidden)}/5  {sorted(naive_overlap_with_hidden)}")
+    print()
+
+    # Profile of NRW candidates
+    print("Cross-criterion winners (NRW) - score profiles:")
+    print(f"  {'idx':>4}  " + " ".join(f"{n[:8]:>9}" for n in criteria_names) +
+          f"   {'min':>5}  {'mean':>5}")
+    for i in nrw:
+        scores = S[i]
+        print(f"  {int(i):>4}  " +
+              " ".join(f"{v:9.3f}" for v in scores) +
+              f"   {scores.min():5.2f}  {scores.mean():5.2f}")
+    print()
+
+    print("Naive top-3 (by total score) - score profiles for comparison:")
+    print(f"  {'idx':>4}  " + " ".join(f"{n[:8]:>9}" for n in criteria_names) +
+          f"   {'min':>5}  {'mean':>5}")
+    for i in naive_top10[:3]:
+        scores = S[i]
+        print(f"  {int(i):>4}  " +
+              " ".join(f"{v:9.3f}" for v in scores) +
+              f"   {scores.min():5.2f}  {scores.mean():5.2f}")
+    print()
+
+    # Wow line - honest comparison
+    n_nrw_hits = len(overlap_with_hidden)
+    n_naive_hits = len(naive_overlap_with_hidden)
+    print(f"*** SEM's NRW filter recovered {n_nrw_hits}/5 hidden all-rounders. ***")
+    print(f"*** Naive sum-of-scores top-10 found only {n_naive_hits}/5.            ***")
+    if n_nrw_hits > n_naive_hits:
+        print(f"*** SEM surfaces {n_nrw_hits - n_naive_hits} candidates the naive ranking misses     ***")
+        print(f"*** because they don't peak on any single criterion.        ***")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())