{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 48.55,
    "wer_norm": 48.04,
    "wer_numcanon": 48.04,
    "space_norm_wer": 37.84,
    "mer": 27.09,
    "cer_norm": 25.82,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.51,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.2,
      "norm_to_mer": 20.95
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 44.59,
    "wer_norm": 44.03,
    "wer_numcanon": 44.03,
    "space_norm_wer": 34.1,
    "mer": 15.79,
    "cer_norm": 15.37,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.56,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.93,
      "norm_to_mer": 28.24
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 62.1,
    "wer_norm": 53.74,
    "wer_numcanon": 53.4,
    "space_norm_wer": 28.92,
    "mer": 32.83,
    "cer_norm": 33.18,
    "empty_hypotheses": 2,
    "normalization_delta": {
      "raw_to_norm": 8.36,
      "norm_to_numcanon": 0.34,
      "norm_to_space_norm": 24.82,
      "norm_to_mer": 20.91
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 45.77,
    "wer_norm": 44.71,
    "wer_numcanon": 44.71,
    "space_norm_wer": 31.98,
    "mer": 18.3,
    "cer_norm": 18.29,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.06,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 12.73,
      "norm_to_mer": 26.41
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 38.11,
    "wer_norm": 38.04,
    "wer_numcanon": 38.04,
    "space_norm_wer": 29.56,
    "mer": 19.02,
    "cer_norm": 17.68,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.48,
      "norm_to_mer": 19.02
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 74.34,
    "wer_norm": 73.62,
    "wer_numcanon": 73.62,
    "space_norm_wer": 50.74,
    "mer": 33.6,
    "cer_norm": 33.41,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.72,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 22.88,
      "norm_to_mer": 40.02
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 70.87,
    "wer_norm": 69.79,
    "wer_numcanon": 69.79,
    "space_norm_wer": 51.82,
    "mer": 22.19,
    "cer_norm": 22.15,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.08,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 17.97,
      "norm_to_mer": 47.6
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 54.98,
    "wer_norm": 54.85,
    "wer_numcanon": 54.85,
    "space_norm_wer": 41.93,
    "mer": 26.28,
    "cer_norm": 25.48,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.13,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 12.92,
      "norm_to_mer": 28.57
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 52.65,
    "wer_norm": 52.45,
    "wer_numcanon": 52.45,
    "space_norm_wer": 33.9,
    "mer": 21.81,
    "cer_norm": 21.78,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.2,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 18.55,
      "norm_to_mer": 30.64
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 44.85,
    "wer_norm": 44.71,
    "wer_numcanon": 44.71,
    "space_norm_wer": 33.52,
    "mer": 21.56,
    "cer_norm": 20.55,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.14,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.19,
      "norm_to_mer": 23.15
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 73.98,
    "wer_norm": 72.67,
    "wer_numcanon": 72.67,
    "space_norm_wer": 56.21,
    "mer": 26.94,
    "cer_norm": 26.77,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.31,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 16.46,
      "norm_to_mer": 45.73
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 63.55,
    "wer_norm": 61.92,
    "wer_numcanon": 61.92,
    "space_norm_wer": 43.89,
    "mer": 22.99,
    "cer_norm": 22.95,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.63,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 18.03,
      "norm_to_mer": 38.93
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 55.77,
    "wer_norm": 54.5,
    "wer_numcanon": 54.48,
    "space_norm_wer": 39.38,
    "mer": 24.34,
    "cer_norm": 23.92
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 56.19,
    "wer_norm": 54.88,
    "wer_numcanon": 54.85,
    "space_norm_wer": 39.53,
    "mer": 24.03,
    "cer_norm": 23.62
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-40000",
    "checkpoint_name": "ckpt-40000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 137.34,
    "total_audio_sec": 40354.46,
    "rtf": 0.0034,
    "timestamp": "2026-04-07T17:26:53Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}