{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 41.55,
    "wer_norm": 41.06,
    "wer_numcanon": 41.06,
    "space_norm_wer": 29.52,
    "mer": 19.87,
    "cer_norm": 19.51,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 0.49,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.54,
      "norm_to_mer": 21.19
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 35.05,
    "wer_norm": 34.46,
    "wer_numcanon": 34.46,
    "space_norm_wer": 26.48,
    "mer": 11.35,
    "cer_norm": 11.03,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.59,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.98,
      "norm_to_mer": 23.11
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 40.59,
    "wer_norm": 29.96,
    "wer_numcanon": 29.55,
    "space_norm_wer": 18.07,
    "mer": 18.32,
    "cer_norm": 18.33,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 10.63,
      "norm_to_numcanon": 0.41,
      "norm_to_space_norm": 11.89,
      "norm_to_mer": 11.64
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 29.21,
    "wer_norm": 29.15,
    "wer_numcanon": 29.15,
    "space_norm_wer": 23.3,
    "mer": 11.0,
    "cer_norm": 10.55,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.06,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.85,
      "norm_to_mer": 18.15
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 22.36,
    "wer_norm": 22.24,
    "wer_numcanon": 22.24,
    "space_norm_wer": 18.26,
    "mer": 11.07,
    "cer_norm": 10.02,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.12,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.98,
      "norm_to_mer": 11.17
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 55.04,
    "wer_norm": 54.73,
    "wer_numcanon": 54.73,
    "space_norm_wer": 39.53,
    "mer": 20.15,
    "cer_norm": 20.24,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.31,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 15.2,
      "norm_to_mer": 34.58
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 50.26,
    "wer_norm": 48.12,
    "wer_numcanon": 48.12,
    "space_norm_wer": 38.65,
    "mer": 13.1,
    "cer_norm": 12.77,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 2.14,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.47,
      "norm_to_mer": 35.02
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 44.53,
    "wer_norm": 44.46,
    "wer_numcanon": 44.46,
    "space_norm_wer": 31.67,
    "mer": 16.85,
    "cer_norm": 16.93,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 12.79,
      "norm_to_mer": 27.61
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 32.64,
    "wer_norm": 32.53,
    "wer_numcanon": 32.53,
    "space_norm_wer": 24.49,
    "mer": 10.75,
    "cer_norm": 10.48,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.11,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.04,
      "norm_to_mer": 21.78
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 30.59,
    "wer_norm": 30.35,
    "wer_numcanon": 30.35,
    "space_norm_wer": 25.17,
    "mer": 14.21,
    "cer_norm": 12.98,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.24,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.18,
      "norm_to_mer": 16.14
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 52.66,
    "wer_norm": 51.16,
    "wer_numcanon": 51.16,
    "space_norm_wer": 36.78,
    "mer": 15.59,
    "cer_norm": 15.75,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.5,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 14.38,
      "norm_to_mer": 35.57
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 44.63,
    "wer_norm": 43.63,
    "wer_numcanon": 43.63,
    "space_norm_wer": 31.8,
    "mer": 13.96,
    "cer_norm": 13.92,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.83,
      "norm_to_mer": 29.67
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 39.8,
    "wer_norm": 38.42,
    "wer_numcanon": 38.39,
    "space_norm_wer": 28.6,
    "mer": 14.86,
    "cer_norm": 14.59
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 39.93,
    "wer_norm": 38.49,
    "wer_numcanon": 38.45,
    "space_norm_wer": 28.64,
    "mer": 14.69,
    "cer_norm": 14.38
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-160000",
    "checkpoint_name": "ckpt-160000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 192.54,
    "total_audio_sec": 40354.46,
    "rtf": 0.0048,
    "timestamp": "2026-04-08T16:27:22Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}