{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 46.7,
    "wer_norm": 45.87,
    "wer_numcanon": 45.87,
    "space_norm_wer": 28.65,
    "mer": 21.71,
    "cer_norm": 21.98,
    "empty_hypotheses": 1,
    "normalization_delta": {
      "raw_to_norm": 0.83,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 17.22,
      "norm_to_mer": 24.16
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 33.04,
    "wer_norm": 32.4,
    "wer_numcanon": 32.4,
    "space_norm_wer": 23.13,
    "mer": 9.18,
    "cer_norm": 9.15,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.64,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.27,
      "norm_to_mer": 23.22
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 37.77,
    "wer_norm": 27.52,
    "wer_numcanon": 27.17,
    "space_norm_wer": 19.88,
    "mer": 21.07,
    "cer_norm": 19.93,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 10.25,
      "norm_to_numcanon": 0.35,
      "norm_to_space_norm": 7.64,
      "norm_to_mer": 6.45
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 27.87,
    "wer_norm": 27.85,
    "wer_numcanon": 27.85,
    "space_norm_wer": 21.72,
    "mer": 9.8,
    "cer_norm": 9.57,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.13,
      "norm_to_mer": 18.05
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 22.14,
    "wer_norm": 21.98,
    "wer_numcanon": 21.98,
    "space_norm_wer": 17.48,
    "mer": 9.83,
    "cer_norm": 9.03,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.16,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.5,
      "norm_to_mer": 12.15
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 41.91,
    "wer_norm": 41.44,
    "wer_numcanon": 41.44,
    "space_norm_wer": 31.47,
    "mer": 11.15,
    "cer_norm": 11.32,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.47,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.97,
      "norm_to_mer": 30.29
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 54.08,
    "wer_norm": 49.06,
    "wer_numcanon": 49.06,
    "space_norm_wer": 38.78,
    "mer": 11.95,
    "cer_norm": 11.78,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 5.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.28,
      "norm_to_mer": 37.11
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 36.93,
    "wer_norm": 36.92,
    "wer_numcanon": 36.92,
    "space_norm_wer": 28.84,
    "mer": 12.52,
    "cer_norm": 12.32,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.01,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.08,
      "norm_to_mer": 24.4
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 32.83,
    "wer_norm": 32.7,
    "wer_numcanon": 32.7,
    "space_norm_wer": 24.38,
    "mer": 10.3,
    "cer_norm": 10.13,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.13,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.32,
      "norm_to_mer": 22.4
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 26.92,
    "wer_norm": 26.73,
    "wer_numcanon": 26.73,
    "space_norm_wer": 21.32,
    "mer": 10.12,
    "cer_norm": 9.46,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.19,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.41,
      "norm_to_mer": 16.61
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 47.23,
    "wer_norm": 44.37,
    "wer_numcanon": 44.37,
    "space_norm_wer": 34.74,
    "mer": 11.79,
    "cer_norm": 11.77,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 2.86,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.63,
      "norm_to_mer": 32.58
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 42.24,
    "wer_norm": 40.87,
    "wer_numcanon": 40.87,
    "space_norm_wer": 29.6,
    "mer": 10.37,
    "cer_norm": 10.56,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.37,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.27,
      "norm_to_mer": 30.5
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 37.54,
    "wer_norm": 35.78,
    "wer_numcanon": 35.76,
    "space_norm_wer": 26.64,
    "mer": 12.45,
    "cer_norm": 12.31
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 37.47,
    "wer_norm": 35.64,
    "wer_numcanon": 35.61,
    "space_norm_wer": 26.67,
    "mer": 12.48,
    "cer_norm": 12.25
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-200000",
    "checkpoint_name": "ckpt-200000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 105.14,
    "total_audio_sec": 40354.46,
    "rtf": 0.0026,
    "timestamp": "2026-04-09T00:49:48Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}