{
  "@context": "https://schema.org",
  "@type": "Dataset",
  "name": "Benchmarks",
  "dateModified": "2026-06-02T10:00:37.351Z",
  "benchmarks": [
    {
      "id": "TH-LEGAL-QA",
      "name": "Thai Legal QA",
      "size": 3200,
      "metric": "exact-match+LLM-judge"
    },
    {
      "id": "NOTARY-INTENT",
      "name": "Notary Intent Routing",
      "size": 8400,
      "metric": "F1"
    },
    {
      "id": "NAATI-EVAL",
      "name": "NAATI Translation Quality",
      "size": 1200,
      "metric": "COMET-22+human"
    },
    {
      "id": "SAFETY-COMP",
      "name": "Safety Composite",
      "size": 4820,
      "metric": "attack-success-rate (lower=better)"
    },
    {
      "id": "TOOL-USE",
      "name": "Tool-Use Agentic",
      "size": 540,
      "metric": "task-success"
    },
    {
      "id": "COST-LAT",
      "name": "Cost-Latency Pareto",
      "metric": "pareto-front"
    }
  ]
}