{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 52.96,
    "wer_norm": 52.78,
    "wer_numcanon": 52.78,
    "space_norm_wer": 43.94,
    "mer": 25.48,
    "cer_norm": 23.91,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.18,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.84,
      "norm_to_mer": 27.3
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 33.07,
    "wer_norm": 32.42,
    "wer_numcanon": 32.42,
    "space_norm_wer": 24.33,
    "mer": 11.2,
    "cer_norm": 10.89,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.65,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.09,
      "norm_to_mer": 21.22
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 26.17,
    "wer_norm": 14.63,
    "wer_numcanon": 14.14,
    "space_norm_wer": 8.7,
    "mer": 8.83,
    "cer_norm": 8.89,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 11.54,
      "norm_to_numcanon": 0.49,
      "norm_to_space_norm": 5.93,
      "norm_to_mer": 5.8
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 26.88,
    "wer_norm": 26.58,
    "wer_numcanon": 26.58,
    "space_norm_wer": 20.52,
    "mer": 8.75,
    "cer_norm": 8.57,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.3,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.06,
      "norm_to_mer": 17.83
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 14.02,
    "wer_norm": 13.93,
    "wer_numcanon": 13.91,
    "space_norm_wer": 10.85,
    "mer": 5.42,
    "cer_norm": 5.0,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.09,
      "norm_to_numcanon": 0.02,
      "norm_to_space_norm": 3.08,
      "norm_to_mer": 8.51
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 47.09,
    "wer_norm": 46.4,
    "wer_numcanon": 46.4,
    "space_norm_wer": 34.7,
    "mer": 12.68,
    "cer_norm": 12.95,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.69,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.7,
      "norm_to_mer": 33.72
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 53.6,
    "wer_norm": 52.34,
    "wer_numcanon": 52.34,
    "space_norm_wer": 41.61,
    "mer": 12.79,
    "cer_norm": 12.63,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.26,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.73,
      "norm_to_mer": 39.55
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 40.19,
    "wer_norm": 39.91,
    "wer_numcanon": 39.91,
    "space_norm_wer": 31.41,
    "mer": 13.27,
    "cer_norm": 13.25,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.28,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.5,
      "norm_to_mer": 26.64
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 41.04,
    "wer_norm": 40.91,
    "wer_numcanon": 40.91,
    "space_norm_wer": 31.21,
    "mer": 13.42,
    "cer_norm": 13.17,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.13,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.7,
      "norm_to_mer": 27.49
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 28.7,
    "wer_norm": 28.53,
    "wer_numcanon": 28.53,
    "space_norm_wer": 21.5,
    "mer": 12.18,
    "cer_norm": 11.53,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.17,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.03,
      "norm_to_mer": 16.35
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 50.68,
    "wer_norm": 49.16,
    "wer_numcanon": 49.16,
    "space_norm_wer": 37.87,
    "mer": 13.32,
    "cer_norm": 13.36,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.52,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 11.29,
      "norm_to_mer": 35.84
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 41.55,
    "wer_norm": 40.24,
    "wer_numcanon": 40.24,
    "space_norm_wer": 27.55,
    "mer": 9.72,
    "cer_norm": 10.08,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 1.31,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 12.69,
      "norm_to_mer": 30.52
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 38.2,
    "wer_norm": 36.77,
    "wer_numcanon": 36.73,
    "space_norm_wer": 28.18,
    "mer": 12.61,
    "cer_norm": 12.4
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 38.0,
    "wer_norm": 36.49,
    "wer_numcanon": 36.44,
    "space_norm_wer": 27.85,
    "mer": 12.25,
    "cer_norm": 12.02
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-ckpt-72000",
    "checkpoint_name": "ckpt-72000",
    "model_id": "qwen3-asr",
    "model_type": "Qwen3-ASR-1.7B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 148.35,
    "total_audio_sec": 40354.46,
    "rtf": 0.0037,
    "timestamp": "2026-03-28T06:50:04Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "vllm",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}