{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 18.08,
    "wer_norm": 17.89,
    "wer_numcanon": 17.89,
    "space_norm_wer": 14.81,
    "mer": 7.6,
    "cer_norm": 7.18,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.19,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.09,
      "norm_to_mer": 10.29
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 15.73,
    "wer_norm": 15.24,
    "wer_numcanon": 15.24,
    "space_norm_wer": 9.39,
    "mer": 2.63,
    "cer_norm": 2.85,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.5,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.85,
      "norm_to_mer": 12.61
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 25.2,
    "wer_norm": 12.06,
    "wer_numcanon": 12.06,
    "space_norm_wer": 7.49,
    "mer": 8.17,
    "cer_norm": 8.03,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 13.15,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.56,
      "norm_to_mer": 3.88
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 15.07,
    "wer_norm": 15.07,
    "wer_numcanon": 15.07,
    "space_norm_wer": 10.83,
    "mer": 3.63,
    "cer_norm": 3.62,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.25,
      "norm_to_mer": 11.44
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 13.03,
    "wer_norm": 13.03,
    "wer_numcanon": 13.03,
    "space_norm_wer": 9.26,
    "mer": 3.8,
    "cer_norm": 3.69,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.77,
      "norm_to_mer": 9.23
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 24.77,
    "wer_norm": 24.73,
    "wer_numcanon": 24.73,
    "space_norm_wer": 15.93,
    "mer": 4.22,
    "cer_norm": 4.65,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.81,
      "norm_to_mer": 20.51
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 38.86,
    "wer_norm": 38.86,
    "wer_numcanon": 38.86,
    "space_norm_wer": 29.28,
    "mer": 6.85,
    "cer_norm": 6.95,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.58,
      "norm_to_mer": 32.01
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 21.26,
    "wer_norm": 21.26,
    "wer_numcanon": 21.26,
    "space_norm_wer": 14.54,
    "mer": 5.17,
    "cer_norm": 5.41,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.72,
      "norm_to_mer": 16.09
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 23.44,
    "wer_norm": 23.4,
    "wer_numcanon": 23.4,
    "space_norm_wer": 13.29,
    "mer": 4.04,
    "cer_norm": 4.55,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.11,
      "norm_to_mer": 19.36
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 15.88,
    "wer_norm": 15.36,
    "wer_numcanon": 15.36,
    "space_norm_wer": 11.99,
    "mer": 4.45,
    "cer_norm": 4.19,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.52,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.38,
      "norm_to_mer": 10.91
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 28.54,
    "wer_norm": 28.54,
    "wer_numcanon": 28.54,
    "space_norm_wer": 19.02,
    "mer": 5.18,
    "cer_norm": 5.41,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.52,
      "norm_to_mer": 23.36
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 23.05,
    "wer_norm": 23.05,
    "wer_numcanon": 23.05,
    "space_norm_wer": 13.93,
    "mer": 3.85,
    "cer_norm": 4.25,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.12,
      "norm_to_mer": 19.2
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 21.68,
    "wer_norm": 20.54,
    "wer_numcanon": 20.54,
    "space_norm_wer": 14.12,
    "mer": 5.05,
    "cer_norm": 5.17
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 21.91,
    "wer_norm": 20.71,
    "wer_numcanon": 20.71,
    "space_norm_wer": 14.15,
    "mer": 4.97,
    "cer_norm": 5.07
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-v2-ckpt-350000",
    "checkpoint_name": "ckpt-350000",
    "model_id": "qwen3-asr-mixed-v2",
    "model_type": "qwen3-asr-1.7B-mixed-v2",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 128,
    "inference_time_sec": 681.06,
    "total_audio_sec": 40354.46,
    "rtf": 0.0169,
    "timestamp": "2026-04-07T08:14:07Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}