{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 16.67,
    "wer_norm": 16.49,
    "wer_numcanon": 16.49,
    "space_norm_wer": 13.98,
    "mer": 7.19,
    "cer_norm": 6.73,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.18,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 2.52,
      "norm_to_mer": 9.31
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 17.16,
    "wer_norm": 16.65,
    "wer_numcanon": 16.65,
    "space_norm_wer": 10.61,
    "mer": 3.22,
    "cer_norm": 3.42,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.51,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.04,
      "norm_to_mer": 13.43
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 27.08,
    "wer_norm": 12.95,
    "wer_numcanon": 12.95,
    "space_norm_wer": 8.42,
    "mer": 8.73,
    "cer_norm": 8.54,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 14.13,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.53,
      "norm_to_mer": 4.22
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 15.9,
    "wer_norm": 15.9,
    "wer_numcanon": 15.9,
    "space_norm_wer": 11.24,
    "mer": 3.94,
    "cer_norm": 3.98,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.66,
      "norm_to_mer": 11.96
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 13.4,
    "wer_norm": 13.4,
    "wer_numcanon": 13.4,
    "space_norm_wer": 9.78,
    "mer": 4.01,
    "cer_norm": 3.87,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.62,
      "norm_to_mer": 9.38
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 25.82,
    "wer_norm": 25.77,
    "wer_numcanon": 25.77,
    "space_norm_wer": 16.85,
    "mer": 4.76,
    "cer_norm": 5.15,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.05,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.92,
      "norm_to_mer": 21.01
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 39.61,
    "wer_norm": 39.55,
    "wer_numcanon": 39.55,
    "space_norm_wer": 29.61,
    "mer": 7.16,
    "cer_norm": 7.27,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.06,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.95,
      "norm_to_mer": 32.39
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 22.86,
    "wer_norm": 22.85,
    "wer_numcanon": 22.85,
    "space_norm_wer": 15.85,
    "mer": 5.68,
    "cer_norm": 5.9,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.0,
      "norm_to_mer": 17.17
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 22.81,
    "wer_norm": 22.75,
    "wer_numcanon": 22.75,
    "space_norm_wer": 13.51,
    "mer": 4.23,
    "cer_norm": 4.65,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.06,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.25,
      "norm_to_mer": 18.53
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 16.82,
    "wer_norm": 16.42,
    "wer_numcanon": 16.42,
    "space_norm_wer": 12.63,
    "mer": 4.63,
    "cer_norm": 4.43,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.4,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.79,
      "norm_to_mer": 11.79
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 29.84,
    "wer_norm": 29.73,
    "wer_numcanon": 29.73,
    "space_norm_wer": 20.52,
    "mer": 5.7,
    "cer_norm": 5.91,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.11,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.2,
      "norm_to_mer": 24.03
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 25.31,
    "wer_norm": 24.98,
    "wer_numcanon": 24.98,
    "space_norm_wer": 15.62,
    "mer": 4.4,
    "cer_norm": 4.8,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.33,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.36,
      "norm_to_mer": 20.57
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 22.48,
    "wer_norm": 21.23,
    "wer_numcanon": 21.23,
    "space_norm_wer": 14.81,
    "mer": 5.4,
    "cer_norm": 5.49
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 22.77,
    "wer_norm": 21.45,
    "wer_numcanon": 21.45,
    "space_norm_wer": 14.88,
    "mer": 5.3,
    "cer_norm": 5.39
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-v2-ckpt-150000",
    "checkpoint_name": "ckpt-150000",
    "model_id": "qwen3-asr-mixed-v2",
    "model_type": "qwen3-asr-1.7B-mixed-v2",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 128,
    "inference_time_sec": 690.39,
    "total_audio_sec": 40354.46,
    "rtf": 0.0171,
    "timestamp": "2026-04-05T00:48:53Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}