{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 61.07,
    "wer_norm": 59.09,
    "wer_numcanon": 59.09,
    "mer": 40.04,
    "cer_norm": 40.07,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 1.99,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 19.05
    },
    "wer_norm_nonum": 30.89,
    "mer_nonum": 14.65,
    "numeric_samples_dropped": 14,
    "space_norm_wer": 25.96
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 13.46,
    "wer_norm": 12.81,
    "wer_numcanon": 12.81,
    "mer": 3.54,
    "cer_norm": 3.53,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.65,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 9.27
    },
    "wer_norm_nonum": 10.54,
    "mer_nonum": 1.96,
    "numeric_samples_dropped": 60,
    "space_norm_wer": 9.44
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 26.45,
    "wer_norm": 10.94,
    "wer_numcanon": 10.94,
    "mer": 6.54,
    "cer_norm": 6.55,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 15.51,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 4.4
    },
    "wer_norm_nonum": 8.39,
    "mer_nonum": 3.84,
    "numeric_samples_dropped": 99,
    "space_norm_wer": 7.58
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 14.71,
    "wer_norm": 14.65,
    "wer_numcanon": 14.65,
    "mer": 4.12,
    "cer_norm": 4.12,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 10.53
    },
    "wer_norm_nonum": 13.32,
    "mer_nonum": 3.06,
    "numeric_samples_dropped": 24,
    "space_norm_wer": 10.77
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 8.39,
    "wer_norm": 8.36,
    "wer_numcanon": 8.36,
    "mer": 2.78,
    "cer_norm": 2.62,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.03,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 5.58
    },
    "wer_norm_nonum": 8.04,
    "mer_nonum": 2.49,
    "numeric_samples_dropped": 4,
    "space_norm_wer": 6.52
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 315.47,
    "wer_norm": 290.99,
    "wer_numcanon": 290.99,
    "mer": 320.79,
    "cer_norm": 318.96,
    "empty_hypotheses": 2,
    "normalization_delta": {
      "raw_to_norm": 24.48,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": -29.79
    },
    "wer_norm_nonum": 19.55,
    "mer_nonum": 3.48,
    "numeric_samples_dropped": 13,
    "space_norm_wer": 12.73
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 325.86,
    "wer_norm": 324.26,
    "wer_numcanon": 324.26,
    "mer": 497.36,
    "cer_norm": 481.73,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.6,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": -173.11
    },
    "wer_norm_nonum": 35.39,
    "mer_nonum": 6.46,
    "numeric_samples_dropped": 4,
    "space_norm_wer": 27.12
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 18.73,
    "wer_norm": 18.73,
    "wer_numcanon": 18.73,
    "mer": 5.7,
    "cer_norm": 5.79,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 13.03
    },
    "wer_norm_nonum": 15.45,
    "mer_nonum": 3.49,
    "numeric_samples_dropped": 52,
    "space_norm_wer": 14.16
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 26.79,
    "wer_norm": 24.34,
    "wer_numcanon": 24.34,
    "mer": 7.74,
    "cer_norm": 8.1,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 2.45,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 16.6
    },
    "wer_norm_nonum": 19.13,
    "mer_nonum": 3.16,
    "numeric_samples_dropped": 53,
    "space_norm_wer": 13.29
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 14.79,
    "wer_norm": 14.79,
    "wer_numcanon": 14.79,
    "mer": 4.79,
    "cer_norm": 4.49,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 10.0
    },
    "wer_norm_nonum": 14.57,
    "mer_nonum": 4.61,
    "numeric_samples_dropped": 2,
    "space_norm_wer": 11.59
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 22.94,
    "wer_norm": 22.9,
    "wer_numcanon": 22.9,
    "mer": 4.04,
    "cer_norm": 4.31,
    "empty_hypotheses": 2,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 18.86
    },
    "wer_norm_nonum": 22.9,
    "mer_nonum": 4.04,
    "numeric_samples_dropped": 0,
    "space_norm_wer": 14.16
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 21.83,
    "wer_norm": 21.61,
    "wer_numcanon": 21.61,
    "mer": 3.03,
    "cer_norm": 3.54,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.22,
      "norm_to_numcanon": 0.0,
      "norm_to_mer": 18.58
    },
    "wer_norm_nonum": 21.63,
    "mer_nonum": 3.01,
    "numeric_samples_dropped": 1,
    "space_norm_wer": 11.66
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 71.47,
    "wer_norm": 67.68,
    "wer_numcanon": 67.68,
    "mer": 100.7,
    "cer_norm": 95.0,
    "wer_norm_nonum": 18.85,
    "mer_nonum": 4.74,
    "numeric_samples_dropped": 326,
    "space_norm_wer": 14.04
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 72.54,
    "wer_norm": 68.62,
    "wer_numcanon": 68.62,
    "mer": 75.04,
    "cer_norm": 73.65,
    "wer_norm_nonum": 18.32,
    "mer_nonum": 4.52,
    "space_norm_wer": 13.75
  },
  "__meta__": {
    "checkpoint": "api/gemini-3-flash-preview",
    "checkpoint_name": "baseline",
    "model_id": "gemini-3-flash-preview",
    "model_type": "gemini-3-flash-preview",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 1,
    "inference_time_sec": 0,
    "total_audio_sec": 40354.42,
    "rtf": 0,
    "timestamp": "2026-03-27T07:17:38Z",
    "normalization_version": "v1",
    "framework": "api"
  }
}