{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 27.26,
    "wer_norm": 27.11,
    "wer_numcanon": 27.11,
    "space_norm_wer": 22.88,
    "mer": 12.29,
    "cer_norm": 11.58,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.15,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.23,
      "norm_to_mer": 14.82
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 20.58,
    "wer_norm": 20.05,
    "wer_numcanon": 20.05,
    "space_norm_wer": 13.29,
    "mer": 4.5,
    "cer_norm": 4.65,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.52,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.76,
      "norm_to_mer": 15.55
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 25.78,
    "wer_norm": 16.88,
    "wer_numcanon": 16.88,
    "space_norm_wer": 11.83,
    "mer": 11.02,
    "cer_norm": 10.67,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 8.9,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.05,
      "norm_to_mer": 5.86
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 19.7,
    "wer_norm": 19.63,
    "wer_numcanon": 19.63,
    "space_norm_wer": 14.23,
    "mer": 5.47,
    "cer_norm": 5.5,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.4,
      "norm_to_mer": 14.16
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 15.49,
    "wer_norm": 15.47,
    "wer_numcanon": 15.47,
    "space_norm_wer": 11.08,
    "mer": 4.95,
    "cer_norm": 4.77,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.4,
      "norm_to_mer": 10.53
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 31.36,
    "wer_norm": 31.36,
    "wer_numcanon": 31.36,
    "space_norm_wer": 21.7,
    "mer": 6.56,
    "cer_norm": 6.97,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.66,
      "norm_to_mer": 24.8
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 42.37,
    "wer_norm": 42.11,
    "wer_numcanon": 42.11,
    "space_norm_wer": 31.74,
    "mer": 8.16,
    "cer_norm": 8.28,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.26,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.37,
      "norm_to_mer": 33.95
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 27.84,
    "wer_norm": 27.84,
    "wer_numcanon": 27.84,
    "space_norm_wer": 19.87,
    "mer": 7.38,
    "cer_norm": 7.56,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.97,
      "norm_to_mer": 20.46
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 26.22,
    "wer_norm": 26.11,
    "wer_numcanon": 26.11,
    "space_norm_wer": 16.39,
    "mer": 5.59,
    "cer_norm": 5.98,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.11,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.73,
      "norm_to_mer": 20.52
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 20.3,
    "wer_norm": 20.18,
    "wer_numcanon": 20.18,
    "space_norm_wer": 15.33,
    "mer": 6.3,
    "cer_norm": 6.09,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.12,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.85,
      "norm_to_mer": 13.88
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 37.47,
    "wer_norm": 36.93,
    "wer_numcanon": 36.93,
    "space_norm_wer": 26.54,
    "mer": 8.05,
    "cer_norm": 8.21,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.54,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.39,
      "norm_to_mer": 28.88
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 29.94,
    "wer_norm": 29.9,
    "wer_numcanon": 29.9,
    "space_norm_wer": 19.32,
    "mer": 5.72,
    "cer_norm": 6.12,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.58,
      "norm_to_mer": 24.18
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 26.9,
    "wer_norm": 26.05,
    "wer_numcanon": 26.05,
    "space_norm_wer": 18.73,
    "mer": 7.27,
    "cer_norm": 7.33
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 27.03,
    "wer_norm": 26.13,
    "wer_numcanon": 26.13,
    "space_norm_wer": 18.68,
    "mer": 7.17,
    "cer_norm": 7.2
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-ckpt-50000",
    "checkpoint_name": "ckpt-50000",
    "model_id": "qwen3-asr-mixed",
    "model_type": "qwen3-asr-1.7B-mixed",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 8,
    "inference_time_sec": 1824.59,
    "total_audio_sec": 40354.46,
    "rtf": 0.0452,
    "timestamp": "2026-03-30T21:07:40Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}