{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 41.56,
    "wer_norm": 40.9,
    "wer_numcanon": 40.9,
    "space_norm_wer": 30.03,
    "mer": 20.01,
    "cer_norm": 19.42,
    "empty_hypotheses": 3,
    "normalization_delta": {
      "raw_to_norm": 0.66,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.87,
      "norm_to_mer": 20.89
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 37.07,
    "wer_norm": 36.22,
    "wer_numcanon": 36.22,
    "space_norm_wer": 27.46,
    "mer": 12.6,
    "cer_norm": 12.25,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.85,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.76,
      "norm_to_mer": 23.62
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 53.25,
    "wer_norm": 43.32,
    "wer_numcanon": 42.89,
    "space_norm_wer": 22.84,
    "mer": 28.94,
    "cer_norm": 28.85,
    "empty_hypotheses": 5,
    "normalization_delta": {
      "raw_to_norm": 9.93,
      "norm_to_numcanon": 0.43,
      "norm_to_space_norm": 20.48,
      "norm_to_mer": 14.38
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 31.6,
    "wer_norm": 31.35,
    "wer_numcanon": 31.35,
    "space_norm_wer": 24.44,
    "mer": 11.95,
    "cer_norm": 11.59,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.25,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.91,
      "norm_to_mer": 19.4
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 25.8,
    "wer_norm": 25.72,
    "wer_numcanon": 25.7,
    "space_norm_wer": 20.99,
    "mer": 12.55,
    "cer_norm": 11.48,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.08,
      "norm_to_numcanon": 0.02,
      "norm_to_space_norm": 4.73,
      "norm_to_mer": 13.17
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 45.27,
    "wer_norm": 44.72,
    "wer_numcanon": 44.72,
    "space_norm_wer": 35.25,
    "mer": 13.36,
    "cer_norm": 13.35,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.55,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.47,
      "norm_to_mer": 31.36
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 59.91,
    "wer_norm": 57.93,
    "wer_numcanon": 57.93,
    "space_norm_wer": 43.21,
    "mer": 15.17,
    "cer_norm": 15.29,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.98,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 14.72,
      "norm_to_mer": 42.76
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 43.97,
    "wer_norm": 43.74,
    "wer_numcanon": 43.74,
    "space_norm_wer": 34.91,
    "mer": 16.96,
    "cer_norm": 16.58,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.23,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.83,
      "norm_to_mer": 26.78
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 37.81,
    "wer_norm": 37.57,
    "wer_numcanon": 37.57,
    "space_norm_wer": 28.1,
    "mer": 12.86,
    "cer_norm": 12.55,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.24,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.47,
      "norm_to_mer": 24.71
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 33.81,
    "wer_norm": 33.64,
    "wer_numcanon": 33.64,
    "space_norm_wer": 26.59,
    "mer": 16.92,
    "cer_norm": 15.54,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.17,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.05,
      "norm_to_mer": 16.72
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 50.75,
    "wer_norm": 49.51,
    "wer_numcanon": 49.51,
    "space_norm_wer": 38.75,
    "mer": 14.5,
    "cer_norm": 14.44,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.24,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.76,
      "norm_to_mer": 35.01
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 47.33,
    "wer_norm": 45.59,
    "wer_numcanon": 45.59,
    "space_norm_wer": 33.51,
    "mer": 12.96,
    "cer_norm": 13.02,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.74,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 12.08,
      "norm_to_mer": 32.63
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 42.13,
    "wer_norm": 40.69,
    "wer_numcanon": 40.66,
    "space_norm_wer": 30.42,
    "mer": 15.5,
    "cer_norm": 15.23
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 42.34,
    "wer_norm": 40.85,
    "wer_numcanon": 40.81,
    "space_norm_wer": 30.51,
    "mer": 15.73,
    "cer_norm": 15.36
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-100000",
    "checkpoint_name": "ckpt-100000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 104.52,
    "total_audio_sec": 40354.46,
    "rtf": 0.0026,
    "timestamp": "2026-04-08T01:02:12Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}