{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 50.69,
    "wer_norm": 50.41,
    "wer_numcanon": 50.41,
    "space_norm_wer": 37.32,
    "mer": 29.5,
    "cer_norm": 28.3,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 0.28,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 13.09,
      "norm_to_mer": 20.91
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 39.53,
    "wer_norm": 38.84,
    "wer_numcanon": 38.84,
    "space_norm_wer": 29.7,
    "mer": 15.77,
    "cer_norm": 15.1,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.69,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.14,
      "norm_to_mer": 23.07
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 76.15,
    "wer_norm": 65.66,
    "wer_numcanon": 65.25,
    "space_norm_wer": 26.22,
    "mer": 44.24,
    "cer_norm": 45.26,
    "empty_hypotheses": 3,
    "normalization_delta": {
      "raw_to_norm": 10.49,
      "norm_to_numcanon": 0.41,
      "norm_to_space_norm": 39.44,
      "norm_to_mer": 21.42
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 36.72,
    "wer_norm": 36.57,
    "wer_numcanon": 36.57,
    "space_norm_wer": 26.04,
    "mer": 16.22,
    "cer_norm": 15.86,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.15,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.53,
      "norm_to_mer": 20.35
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 27.86,
    "wer_norm": 27.76,
    "wer_numcanon": 27.76,
    "space_norm_wer": 22.36,
    "mer": 13.63,
    "cer_norm": 12.55,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.1,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.4,
      "norm_to_mer": 14.13
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 49.34,
    "wer_norm": 49.16,
    "wer_numcanon": 49.16,
    "space_norm_wer": 40.08,
    "mer": 18.25,
    "cer_norm": 17.9,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.18,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.08,
      "norm_to_mer": 30.91
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 54.89,
    "wer_norm": 54.34,
    "wer_numcanon": 54.34,
    "space_norm_wer": 43.71,
    "mer": 14.89,
    "cer_norm": 14.67,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.55,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.63,
      "norm_to_mer": 39.45
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 43.04,
    "wer_norm": 42.97,
    "wer_numcanon": 42.97,
    "space_norm_wer": 34.14,
    "mer": 15.9,
    "cer_norm": 15.68,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.83,
      "norm_to_mer": 27.07
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 38.73,
    "wer_norm": 38.53,
    "wer_numcanon": 38.53,
    "space_norm_wer": 29.34,
    "mer": 13.99,
    "cer_norm": 13.58,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.2,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.19,
      "norm_to_mer": 24.54
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 34.9,
    "wer_norm": 34.71,
    "wer_numcanon": 34.71,
    "space_norm_wer": 27.49,
    "mer": 19.08,
    "cer_norm": 17.39,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.19,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.22,
      "norm_to_mer": 15.63
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 55.3,
    "wer_norm": 54.93,
    "wer_numcanon": 54.93,
    "space_norm_wer": 44.52,
    "mer": 17.67,
    "cer_norm": 17.48,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.37,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.41,
      "norm_to_mer": 37.26
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 45.0,
    "wer_norm": 44.03,
    "wer_numcanon": 44.03,
    "space_norm_wer": 33.01,
    "mer": 13.08,
    "cer_norm": 13.02,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.97,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.02,
      "norm_to_mer": 30.95
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 45.91,
    "wer_norm": 44.77,
    "wer_numcanon": 44.74,
    "space_norm_wer": 32.85,
    "mer": 18.83,
    "cer_norm": 18.51
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 46.01,
    "wer_norm": 44.83,
    "wer_numcanon": 44.79,
    "space_norm_wer": 32.83,
    "mer": 19.35,
    "cer_norm": 18.9
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-80000",
    "checkpoint_name": "ckpt-80000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 122.58,
    "total_audio_sec": 40354.46,
    "rtf": 0.003,
    "timestamp": "2026-04-07T23:12:08Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}