{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 29.11,
    "wer_norm": 28.96,
    "wer_numcanon": 28.96,
    "space_norm_wer": 24.21,
    "mer": 12.88,
    "cer_norm": 12.17,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.15,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.75,
      "norm_to_mer": 16.08
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 23.04,
    "wer_norm": 22.27,
    "wer_numcanon": 22.27,
    "space_norm_wer": 15.37,
    "mer": 6.09,
    "cer_norm": 6.17,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.77,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.9,
      "norm_to_mer": 16.18
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 26.82,
    "wer_norm": 14.98,
    "wer_numcanon": 14.5,
    "space_norm_wer": 8.92,
    "mer": 8.83,
    "cer_norm": 8.78,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 11.84,
      "norm_to_numcanon": 0.48,
      "norm_to_space_norm": 6.06,
      "norm_to_mer": 6.15
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 20.35,
    "wer_norm": 19.85,
    "wer_numcanon": 19.85,
    "space_norm_wer": 14.85,
    "mer": 6.06,
    "cer_norm": 5.97,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.5,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.0,
      "norm_to_mer": 13.79
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 13.46,
    "wer_norm": 13.45,
    "wer_numcanon": 13.45,
    "space_norm_wer": 10.31,
    "mer": 5.01,
    "cer_norm": 4.65,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.01,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.14,
      "norm_to_mer": 8.44
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 34.86,
    "wer_norm": 34.1,
    "wer_numcanon": 34.1,
    "space_norm_wer": 24.59,
    "mer": 8.16,
    "cer_norm": 8.42,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.76,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.51,
      "norm_to_mer": 25.94
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 45.33,
    "wer_norm": 44.48,
    "wer_numcanon": 44.48,
    "space_norm_wer": 33.47,
    "mer": 9.13,
    "cer_norm": 9.17,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.85,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.01,
      "norm_to_mer": 35.35
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 28.6,
    "wer_norm": 28.37,
    "wer_numcanon": 28.37,
    "space_norm_wer": 21.83,
    "mer": 8.69,
    "cer_norm": 8.71,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.23,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.54,
      "norm_to_mer": 19.68
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 27.33,
    "wer_norm": 27.02,
    "wer_numcanon": 27.02,
    "space_norm_wer": 17.73,
    "mer": 6.85,
    "cer_norm": 7.07,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.31,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.29,
      "norm_to_mer": 20.17
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 20.82,
    "wer_norm": 20.67,
    "wer_numcanon": 20.67,
    "space_norm_wer": 15.78,
    "mer": 6.67,
    "cer_norm": 6.38,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.15,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.89,
      "norm_to_mer": 14.0
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 39.45,
    "wer_norm": 38.39,
    "wer_numcanon": 38.39,
    "space_norm_wer": 28.04,
    "mer": 8.93,
    "cer_norm": 9.07,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.06,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.35,
      "norm_to_mer": 29.46
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 32.97,
    "wer_norm": 31.8,
    "wer_numcanon": 31.8,
    "space_norm_wer": 20.06,
    "mer": 6.52,
    "cer_norm": 6.92,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.17,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.74,
      "norm_to_mer": 25.28
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 28.37,
    "wer_norm": 26.96,
    "wer_numcanon": 26.92,
    "space_norm_wer": 19.65,
    "mer": 8.03,
    "cer_norm": 8.01
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 28.51,
    "wer_norm": 27.03,
    "wer_numcanon": 26.99,
    "space_norm_wer": 19.6,
    "mer": 7.82,
    "cer_norm": 7.79
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-ckpt-300000",
    "checkpoint_name": "ckpt-300000",
    "model_id": "qwen3-asr",
    "model_type": "Qwen3-ASR-1.7B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 260.71,
    "total_audio_sec": 40354.46,
    "rtf": 0.0065,
    "timestamp": "2026-03-28T06:51:25Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "vllm",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}