{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 37.44,
    "wer_norm": 32.05,
    "wer_numcanon": 32.05,
    "mer": 14.89,
    "cer_norm": 14.28,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 5.4,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 17.15
    },
    "wer_norm_nonum": 31.37,
    "mer_nonum": 14.29,
    "numeric_samples_dropped": 14,
    "space_norm_wer": 25.94
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 19.36,
    "wer_norm": 13.65,
    "wer_numcanon": 13.65,
    "mer": 3.89,
    "cer_norm": 3.89,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 5.71,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 9.76
    },
    "wer_norm_nonum": 11.13,
    "mer_nonum": 2.1,
    "numeric_samples_dropped": 63,
    "space_norm_wer": 9.79
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 22.73,
    "wer_norm": 12.35,
    "wer_numcanon": 12.35,
    "mer": 7.79,
    "cer_norm": 7.87,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 10.38,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 4.56
    },
    "wer_norm_nonum": 11.14,
    "mer_nonum": 6.87,
    "numeric_samples_dropped": 98,
    "space_norm_wer": 7.31
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 22.61,
    "wer_norm": 14.8,
    "wer_numcanon": 14.8,
    "mer": 4.63,
    "cer_norm": 4.53,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 7.81,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 10.17
    },
    "wer_norm_nonum": 13.11,
    "mer_nonum": 3.28,
    "numeric_samples_dropped": 30,
    "space_norm_wer": 11.32
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 17.06,
    "wer_norm": 9.31,
    "wer_numcanon": 9.31,
    "mer": 2.95,
    "cer_norm": 2.81,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 7.75,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 6.36
    },
    "wer_norm_nonum": 8.83,
    "mer_nonum": 2.56,
    "numeric_samples_dropped": 6,
    "space_norm_wer": 6.94
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 28.62,
    "wer_norm": 20.61,
    "wer_numcanon": 20.61,
    "mer": 4.37,
    "cer_norm": 4.62,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 8.01,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 16.24
    },
    "wer_norm_nonum": 19.77,
    "mer_nonum": 3.55,
    "numeric_samples_dropped": 12,
    "space_norm_wer": 13.01
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 43.29,
    "wer_norm": 35.9,
    "wer_numcanon": 35.9,
    "mer": 6.59,
    "cer_norm": 6.6,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 7.39,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 29.31
    },
    "wer_norm_nonum": 35.86,
    "mer_nonum": 6.58,
    "numeric_samples_dropped": 1,
    "space_norm_wer": 27.49
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 26.93,
    "wer_norm": 19.24,
    "wer_numcanon": 19.24,
    "mer": 6.3,
    "cer_norm": 6.38,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 7.69,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 12.94
    },
    "wer_norm_nonum": 15.11,
    "mer_nonum": 3.32,
    "numeric_samples_dropped": 66,
    "space_norm_wer": 15.0
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 29.14,
    "wer_norm": 21.11,
    "wer_numcanon": 21.11,
    "mer": 4.52,
    "cer_norm": 4.85,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 8.03,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 16.59
    },
    "wer_norm_nonum": 19.31,
    "mer_nonum": 3.11,
    "numeric_samples_dropped": 52,
    "space_norm_wer": 12.83
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 22.33,
    "wer_norm": 14.79,
    "wer_numcanon": 14.79,
    "mer": 4.74,
    "cer_norm": 4.47,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 7.54,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 10.05
    },
    "wer_norm_nonum": 14.57,
    "mer_nonum": 4.56,
    "numeric_samples_dropped": 2,
    "space_norm_wer": 11.42
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 30.06,
    "wer_norm": 23.23,
    "wer_numcanon": 23.23,
    "mer": 4.06,
    "cer_norm": 4.3,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 6.83,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 19.17
    },
    "wer_norm_nonum": 23.12,
    "mer_nonum": 4.03,
    "numeric_samples_dropped": 1,
    "space_norm_wer": 14.83
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 29.77,
    "wer_norm": 21.48,
    "wer_numcanon": 21.48,
    "mer": 3.06,
    "cer_norm": 3.57,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 8.29,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 18.42
    },
    "wer_norm_nonum": 21.48,
    "mer_nonum": 3.06,
    "numeric_samples_dropped": 0,
    "space_norm_wer": 11.73
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 27.62,
    "wer_norm": 20.12,
    "wer_numcanon": 20.12,
    "mer": 5.71,
    "cer_norm": 5.79,
    "wer_norm_nonum": 19.27,
    "mer_nonum": 4.91,
    "numeric_samples_dropped": 345,
    "space_norm_wer": 14.25
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 27.44,
    "wer_norm": 19.88,
    "wer_numcanon": 19.88,
    "mer": 5.65,
    "cer_norm": 5.68,
    "wer_norm_nonum": 18.73,
    "mer_nonum": 4.78,
    "space_norm_wer": 13.97
  },
  "__meta__": {
    "checkpoint": "api/gemini-3-flash-strict",
    "checkpoint_name": "baseline",
    "model_id": "gemini-3-flash-strict",
    "model_type": "gemini-3-flash-preview",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 1,
    "inference_time_sec": 0,
    "total_audio_sec": 40354.42,
    "rtf": 0,
    "timestamp": "2026-03-27T07:17:35Z",
    "normalization_version": "v1",
    "framework": "api"
  }
}