{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 82.8,
    "wer_norm": 82.39,
    "wer_numcanon": 82.39,
    "space_norm_wer": 53.47,
    "mer": 52.08,
    "cer_norm": 50.99,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 0.41,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 28.92,
      "norm_to_mer": 30.31
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 76.56,
    "wer_norm": 75.82,
    "wer_numcanon": 75.82,
    "space_norm_wer": 57.19,
    "mer": 39.93,
    "cer_norm": 38.44,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.74,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 18.63,
      "norm_to_mer": 35.89
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 81.51,
    "wer_norm": 74.38,
    "wer_numcanon": 74.21,
    "space_norm_wer": 50.72,
    "mer": 47.41,
    "cer_norm": 46.38,
    "empty_hypotheses": 4,
    "normalization_delta": {
      "raw_to_norm": 7.13,
      "norm_to_numcanon": 0.17,
      "norm_to_space_norm": 23.66,
      "norm_to_mer": 26.97
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 69.48,
    "wer_norm": 69.22,
    "wer_numcanon": 69.22,
    "space_norm_wer": 53.37,
    "mer": 43.79,
    "cer_norm": 41.25,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.26,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 15.85,
      "norm_to_mer": 25.43
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 61.92,
    "wer_norm": 61.39,
    "wer_numcanon": 61.39,
    "space_norm_wer": 45.93,
    "mer": 47.65,
    "cer_norm": 43.42,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.53,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 15.46,
      "norm_to_mer": 13.74
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 118.07,
    "wer_norm": 117.8,
    "wer_numcanon": 117.8,
    "space_norm_wer": 70.86,
    "mer": 67.76,
    "cer_norm": 67.58,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.27,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 46.94,
      "norm_to_mer": 50.04
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 93.64,
    "wer_norm": 92.77,
    "wer_numcanon": 92.77,
    "space_norm_wer": 70.65,
    "mer": 47.46,
    "cer_norm": 46.23,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.87,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 22.12,
      "norm_to_mer": 45.31
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 88.76,
    "wer_norm": 88.18,
    "wer_numcanon": 88.18,
    "space_norm_wer": 61.19,
    "mer": 52.15,
    "cer_norm": 50.96,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.58,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 26.99,
      "norm_to_mer": 36.03
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 72.76,
    "wer_norm": 72.43,
    "wer_numcanon": 72.43,
    "space_norm_wer": 52.48,
    "mer": 43.14,
    "cer_norm": 41.44,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.33,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 19.95,
      "norm_to_mer": 29.29
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 89.09,
    "wer_norm": 88.83,
    "wer_numcanon": 88.83,
    "space_norm_wer": 54.95,
    "mer": 55.53,
    "cer_norm": 54.04,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.26,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 33.88,
      "norm_to_mer": 33.3
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 117.76,
    "wer_norm": 117.02,
    "wer_numcanon": 117.02,
    "space_norm_wer": 79.57,
    "mer": 65.94,
    "cer_norm": 65.22,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.74,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 37.45,
      "norm_to_mer": 51.08
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 105.79,
    "wer_norm": 104.94,
    "wer_numcanon": 104.94,
    "space_norm_wer": 68.53,
    "mer": 63.36,
    "cer_norm": 62.16,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.85,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 36.41,
      "norm_to_mer": 41.58
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 87.81,
    "wer_norm": 86.77,
    "wer_numcanon": 86.76,
    "space_norm_wer": 59.61,
    "mer": 53.07,
    "cer_norm": 51.54
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 88.18,
    "wer_norm": 87.1,
    "wer_numcanon": 87.08,
    "space_norm_wer": 59.91,
    "mer": 52.18,
    "cer_norm": 50.68
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-20000",
    "checkpoint_name": "ckpt-20000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 190.46,
    "total_audio_sec": 40354.46,
    "rtf": 0.0047,
    "timestamp": "2026-04-07T17:24:06Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}