{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 44.54,
    "wer_norm": 44.21,
    "wer_numcanon": 44.21,
    "space_norm_wer": 29.35,
    "mer": 22.19,
    "cer_norm": 21.98,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.33,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 14.86,
      "norm_to_mer": 22.02
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 32.82,
    "wer_norm": 32.42,
    "wer_numcanon": 32.42,
    "space_norm_wer": 23.57,
    "mer": 8.82,
    "cer_norm": 8.73,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.4,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.85,
      "norm_to_mer": 23.6
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 44.05,
    "wer_norm": 33.54,
    "wer_numcanon": 33.18,
    "space_norm_wer": 20.09,
    "mer": 20.6,
    "cer_norm": 20.68,
    "empty_hypotheses": 2,
    "normalization_delta": {
      "raw_to_norm": 10.51,
      "norm_to_numcanon": 0.36,
      "norm_to_space_norm": 13.45,
      "norm_to_mer": 12.94
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 34.65,
    "wer_norm": 34.64,
    "wer_numcanon": 34.64,
    "space_norm_wer": 23.64,
    "mer": 12.32,
    "cer_norm": 12.45,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.01,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.0,
      "norm_to_mer": 22.32
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 27.31,
    "wer_norm": 27.24,
    "wer_numcanon": 27.24,
    "space_norm_wer": 19.28,
    "mer": 12.07,
    "cer_norm": 11.52,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.96,
      "norm_to_mer": 15.17
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 46.56,
    "wer_norm": 46.52,
    "wer_numcanon": 46.52,
    "space_norm_wer": 35.95,
    "mer": 12.86,
    "cer_norm": 12.98,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.57,
      "norm_to_mer": 33.66
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 53.18,
    "wer_norm": 51.4,
    "wer_numcanon": 51.4,
    "space_norm_wer": 40.27,
    "mer": 11.73,
    "cer_norm": 11.7,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.78,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.13,
      "norm_to_mer": 39.67
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 38.74,
    "wer_norm": 38.73,
    "wer_numcanon": 38.73,
    "space_norm_wer": 29.62,
    "mer": 13.58,
    "cer_norm": 13.28,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.01,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.11,
      "norm_to_mer": 25.15
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 35.04,
    "wer_norm": 35.02,
    "wer_numcanon": 35.02,
    "space_norm_wer": 25.76,
    "mer": 11.39,
    "cer_norm": 11.15,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.26,
      "norm_to_mer": 23.63
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 33.74,
    "wer_norm": 33.45,
    "wer_numcanon": 33.45,
    "space_norm_wer": 24.27,
    "mer": 18.39,
    "cer_norm": 17.14,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.29,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.18,
      "norm_to_mer": 15.06
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 49.36,
    "wer_norm": 47.41,
    "wer_numcanon": 47.41,
    "space_norm_wer": 36.46,
    "mer": 12.69,
    "cer_norm": 12.75,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.95,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.95,
      "norm_to_mer": 34.72
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 43.05,
    "wer_norm": 42.55,
    "wer_numcanon": 42.55,
    "space_norm_wer": 30.71,
    "mer": 10.67,
    "cer_norm": 10.87,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.5,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.84,
      "norm_to_mer": 31.88
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 40.24,
    "wer_norm": 38.98,
    "wer_numcanon": 38.95,
    "space_norm_wer": 28.21,
    "mer": 13.67,
    "cer_norm": 13.61
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 40.25,
    "wer_norm": 38.93,
    "wer_numcanon": 38.9,
    "space_norm_wer": 28.25,
    "mer": 13.94,
    "cer_norm": 13.77
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-120000",
    "checkpoint_name": "ckpt-120000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 108.98,
    "total_audio_sec": 40354.46,
    "rtf": 0.0027,
    "timestamp": "2026-04-08T07:04:06Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}