{
  "@context": "https://schema.org",
  "@type": "Dataset",
  "name": "agent-agentic-os — evals",
  "dateModified": "2026-06-12T01:05:59.820Z",
  "benchmarks": [
    "GAIA",
    "AgentBench",
    "WebArena",
    "OSWorld",
    "SWE-bench Multi",
    "Cybench"
  ],
  "avg_score_pct": 82,
  "regression_gates": 124,
  "ship_blocker_drop_pct": 2
}