sem_cython12-demos/03_multicriteria_selection.py

"""Demo 3 - Multi-criteria candidate selection.

You have 100 candidates evaluated on 4 independent criteria
(quality, cost-efficiency, robustness, compatibility - or whatever
your domain calls them).  You want to pick the ones worth a deeper
look.

Naive ranking by total score finds the high-mean candidates - which
are often single-criterion peaks that compensate with weakness on
the rest.

SEM's two-stage filter
  1) best-tradeoff filter ('Pareto core')
  2) cross-criterion filter ('non-redundant witnesses')
finds the genuine all-rounders: candidates that are not strictly
worse than another on every axis AND that contribute meaningfully on
multiple axes (not just one).

Run:
    python 03_multicriteria_selection.py
"""

from __future__ import annotations

import numpy as np
from sem_cython12 import wrapper as cy


def main() -> int:
    if not cy.available():
        print("ERROR: sem_cython12 compiled extension did not load.")
        return 1

    rng = np.random.default_rng(7)

    N, K = 100, 4
    criteria_names = ["Quality", "Cost-efficiency", "Robustness", "Compatibility"]

    # Most candidates: noisy uniform draws across the criteria
    S = rng.uniform(0.30, 0.95, size=(N, K))

    # Inject 5 hidden 'all-rounders' that score moderately well on EVERY
    # criterion - none top any single axis, but they're well-balanced.
    S[0:5] = rng.uniform(0.65, 0.85, size=(5, K))

    # ---- Naive ranking by sum of scores ---------------------------------
    naive_order = np.argsort(S.sum(axis=1))[::-1]
    naive_top10 = naive_order[:10]

    # ---- SEM ranking ----------------------------------------------------
    pareto_mask = cy.pareto_core_mask(S)
    pareto_idx = np.where(pareto_mask == 1)[0]

    nrw = cy.non_redundant_witnesses(S)

    # ---- Reporting ------------------------------------------------------
    print(f"Candidates                       : {N}")
    print(f"Criteria                         : {K} ({', '.join(criteria_names)})")
    print()
    print(f"Best-tradeoff frontier size      : {len(pareto_idx)}")
    print(f"Cross-criterion winners (NRW)    : {len(nrw)}")
    print(f"Hidden all-rounders we injected  : 5 (indices 0-4)")
    print()

    overlap_with_hidden = set(nrw.tolist()) & set(range(5))
    naive_overlap_with_hidden = set(naive_top10.tolist()) & set(range(5))
    print(f"NRW recovered hidden all-rounders     : "
          f"{len(overlap_with_hidden)}/5  {sorted(overlap_with_hidden)}")
    print(f"Naive top-10 found hidden all-rounders: "
          f"{len(naive_overlap_with_hidden)}/5  {sorted(naive_overlap_with_hidden)}")
    print()

    # Profile of NRW candidates
    print("Cross-criterion winners (NRW) - score profiles:")
    print(f"  {'idx':>4}  " + " ".join(f"{n[:8]:>9}" for n in criteria_names) +
          f"   {'min':>5}  {'mean':>5}")
    for i in nrw:
        scores = S[i]
        print(f"  {int(i):>4}  " +
              " ".join(f"{v:9.3f}" for v in scores) +
              f"   {scores.min():5.2f}  {scores.mean():5.2f}")
    print()

    print("Naive top-3 (by total score) - score profiles for comparison:")
    print(f"  {'idx':>4}  " + " ".join(f"{n[:8]:>9}" for n in criteria_names) +
          f"   {'min':>5}  {'mean':>5}")
    for i in naive_top10[:3]:
        scores = S[i]
        print(f"  {int(i):>4}  " +
              " ".join(f"{v:9.3f}" for v in scores) +
              f"   {scores.min():5.2f}  {scores.mean():5.2f}")
    print()

    # Wow line - honest comparison
    n_nrw_hits = len(overlap_with_hidden)
    n_naive_hits = len(naive_overlap_with_hidden)
    print(f"*** SEM's NRW filter recovered {n_nrw_hits}/5 hidden all-rounders. ***")
    print(f"*** Naive sum-of-scores top-10 found only {n_naive_hits}/5.            ***")
    if n_nrw_hits > n_naive_hits:
        print(f"*** SEM surfaces {n_nrw_hits - n_naive_hits} candidates the naive ranking misses     ***")
        print(f"*** because they don't peak on any single criterion.        ***")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())