{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 17.06,
    "wer_norm": 16.91,
    "wer_numcanon": 16.91,
    "space_norm_wer": 14.21,
    "mer": 7.25,
    "cer_norm": 6.79,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.15,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 2.71,
      "norm_to_mer": 9.67
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 17.93,
    "wer_norm": 17.37,
    "wer_numcanon": 17.37,
    "space_norm_wer": 11.29,
    "mer": 3.49,
    "cer_norm": 3.68,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.57,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.07,
      "norm_to_mer": 13.88
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 28.66,
    "wer_norm": 12.86,
    "wer_numcanon": 12.86,
    "space_norm_wer": 8.46,
    "mer": 8.74,
    "cer_norm": 8.52,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 15.81,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.4,
      "norm_to_mer": 4.12
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 17.04,
    "wer_norm": 17.04,
    "wer_numcanon": 17.04,
    "space_norm_wer": 11.97,
    "mer": 4.15,
    "cer_norm": 4.19,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.07,
      "norm_to_mer": 12.9
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 13.05,
    "wer_norm": 13.03,
    "wer_numcanon": 13.03,
    "space_norm_wer": 9.02,
    "mer": 3.83,
    "cer_norm": 3.76,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.02,
      "norm_to_mer": 9.2
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 26.64,
    "wer_norm": 26.51,
    "wer_numcanon": 26.51,
    "space_norm_wer": 17.87,
    "mer": 5.02,
    "cer_norm": 5.42,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.13,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.64,
      "norm_to_mer": 21.49
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 40.75,
    "wer_norm": 40.7,
    "wer_numcanon": 40.7,
    "space_norm_wer": 30.67,
    "mer": 7.38,
    "cer_norm": 7.49,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.06,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.02,
      "norm_to_mer": 33.31
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 22.16,
    "wer_norm": 22.16,
    "wer_numcanon": 22.16,
    "space_norm_wer": 15.65,
    "mer": 5.57,
    "cer_norm": 5.74,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.51,
      "norm_to_mer": 16.59
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 22.96,
    "wer_norm": 22.88,
    "wer_numcanon": 22.88,
    "space_norm_wer": 13.75,
    "mer": 4.43,
    "cer_norm": 4.83,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.13,
      "norm_to_mer": 18.45
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 17.22,
    "wer_norm": 16.75,
    "wer_numcanon": 16.75,
    "space_norm_wer": 12.96,
    "mer": 4.89,
    "cer_norm": 4.63,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.47,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.79,
      "norm_to_mer": 11.86
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 30.4,
    "wer_norm": 30.17,
    "wer_numcanon": 30.17,
    "space_norm_wer": 20.28,
    "mer": 5.71,
    "cer_norm": 5.93,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.22,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.89,
      "norm_to_mer": 24.46
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 25.0,
    "wer_norm": 24.77,
    "wer_numcanon": 24.77,
    "space_norm_wer": 15.3,
    "mer": 4.38,
    "cer_norm": 4.78,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.22,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.47,
      "norm_to_mer": 20.4
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 22.94,
    "wer_norm": 21.54,
    "wer_numcanon": 21.54,
    "space_norm_wer": 15.04,
    "mer": 5.5,
    "cer_norm": 5.59
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 23.24,
    "wer_norm": 21.76,
    "wer_numcanon": 21.76,
    "space_norm_wer": 15.12,
    "mer": 5.4,
    "cer_norm": 5.48
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-v2-ckpt-130000",
    "checkpoint_name": "ckpt-130000",
    "model_id": "qwen3-asr-mixed-v2",
    "model_type": "qwen3-asr-1.7B-mixed-v2",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 128,
    "inference_time_sec": 684.1,
    "total_audio_sec": 40354.46,
    "rtf": 0.017,
    "timestamp": "2026-04-04T19:21:53Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}