{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 45.08,
    "wer_norm": 43.7,
    "wer_numcanon": 43.7,
    "space_norm_wer": 34.52,
    "mer": 24.44,
    "cer_norm": 23.11,
    "empty_hypotheses": 2,
    "normalization_delta": {
      "raw_to_norm": 1.38,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.18,
      "norm_to_mer": 19.26
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 42.13,
    "wer_norm": 40.57,
    "wer_numcanon": 40.57,
    "space_norm_wer": 30.7,
    "mer": 14.41,
    "cer_norm": 13.99,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.56,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.87,
      "norm_to_mer": 26.16
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 58.36,
    "wer_norm": 47.74,
    "wer_numcanon": 47.38,
    "space_norm_wer": 25.28,
    "mer": 27.22,
    "cer_norm": 28.0,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 10.62,
      "norm_to_numcanon": 0.36,
      "norm_to_space_norm": 22.46,
      "norm_to_mer": 20.52
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 43.75,
    "wer_norm": 43.25,
    "wer_numcanon": 43.25,
    "space_norm_wer": 34.14,
    "mer": 16.08,
    "cer_norm": 15.72,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.5,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.11,
      "norm_to_mer": 27.17
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 28.44,
    "wer_norm": 28.42,
    "wer_numcanon": 28.42,
    "space_norm_wer": 21.91,
    "mer": 12.2,
    "cer_norm": 11.47,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.51,
      "norm_to_mer": 16.22
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 52.99,
    "wer_norm": 52.55,
    "wer_numcanon": 52.55,
    "space_norm_wer": 42.46,
    "mer": 15.98,
    "cer_norm": 16.14,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.44,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.09,
      "norm_to_mer": 36.57
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 60.28,
    "wer_norm": 58.09,
    "wer_numcanon": 58.09,
    "space_norm_wer": 46.87,
    "mer": 15.43,
    "cer_norm": 15.22,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 2.19,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.22,
      "norm_to_mer": 42.66
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 46.83,
    "wer_norm": 46.83,
    "wer_numcanon": 46.83,
    "space_norm_wer": 37.16,
    "mer": 18.39,
    "cer_norm": 17.94,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.67,
      "norm_to_mer": 28.44
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 39.51,
    "wer_norm": 39.38,
    "wer_numcanon": 39.38,
    "space_norm_wer": 30.3,
    "mer": 14.65,
    "cer_norm": 14.22,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.13,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.08,
      "norm_to_mer": 24.73
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 34.77,
    "wer_norm": 34.57,
    "wer_numcanon": 34.57,
    "space_norm_wer": 27.68,
    "mer": 15.06,
    "cer_norm": 14.19,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.2,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.89,
      "norm_to_mer": 19.51
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 57.1,
    "wer_norm": 56.78,
    "wer_numcanon": 56.78,
    "space_norm_wer": 45.76,
    "mer": 17.29,
    "cer_norm": 17.26,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.32,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.02,
      "norm_to_mer": 39.49
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 49.53,
    "wer_norm": 46.86,
    "wer_numcanon": 46.86,
    "space_norm_wer": 35.0,
    "mer": 13.97,
    "cer_norm": 14.04,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 2.67,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.86,
      "norm_to_mer": 32.89
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 46.35,
    "wer_norm": 44.73,
    "wer_numcanon": 44.7,
    "space_norm_wer": 34.25,
    "mer": 17.02,
    "cer_norm": 16.75
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 46.56,
    "wer_norm": 44.9,
    "wer_numcanon": 44.86,
    "space_norm_wer": 34.32,
    "mer": 17.09,
    "cer_norm": 16.77
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-60000",
    "checkpoint_name": "ckpt-60000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 111.54,
    "total_audio_sec": 40354.46,
    "rtf": 0.0028,
    "timestamp": "2026-04-07T17:29:13Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}