{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 18.16,
    "wer_norm": 18.0,
    "wer_numcanon": 18.0,
    "space_norm_wer": 15.11,
    "mer": 7.65,
    "cer_norm": 7.22,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.16,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 2.9,
      "norm_to_mer": 10.35
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 18.93,
    "wer_norm": 18.46,
    "wer_numcanon": 18.46,
    "space_norm_wer": 11.89,
    "mer": 3.67,
    "cer_norm": 3.88,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.47,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 6.57,
      "norm_to_mer": 14.79
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 31.45,
    "wer_norm": 13.4,
    "wer_numcanon": 13.4,
    "space_norm_wer": 8.72,
    "mer": 8.85,
    "cer_norm": 8.69,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 18.05,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.68,
      "norm_to_mer": 4.54
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 16.7,
    "wer_norm": 16.7,
    "wer_numcanon": 16.7,
    "space_norm_wer": 12.28,
    "mer": 4.22,
    "cer_norm": 4.16,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.42,
      "norm_to_mer": 12.48
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 13.45,
    "wer_norm": 13.45,
    "wer_numcanon": 13.45,
    "space_norm_wer": 9.36,
    "mer": 4.18,
    "cer_norm": 4.06,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.08,
      "norm_to_mer": 9.27
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 27.67,
    "wer_norm": 27.64,
    "wer_numcanon": 27.64,
    "space_norm_wer": 18.36,
    "mer": 5.15,
    "cer_norm": 5.57,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.28,
      "norm_to_mer": 22.49
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 39.81,
    "wer_norm": 39.81,
    "wer_numcanon": 39.81,
    "space_norm_wer": 30.47,
    "mer": 7.31,
    "cer_norm": 7.39,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.34,
      "norm_to_mer": 32.5
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 23.71,
    "wer_norm": 23.69,
    "wer_numcanon": 23.69,
    "space_norm_wer": 16.69,
    "mer": 6.03,
    "cer_norm": 6.18,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.0,
      "norm_to_mer": 17.66
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 23.18,
    "wer_norm": 23.09,
    "wer_numcanon": 23.09,
    "space_norm_wer": 13.47,
    "mer": 4.23,
    "cer_norm": 4.68,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.09,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.61,
      "norm_to_mer": 18.86
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 17.15,
    "wer_norm": 16.66,
    "wer_numcanon": 16.66,
    "space_norm_wer": 13.16,
    "mer": 4.95,
    "cer_norm": 4.63,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.49,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 3.5,
      "norm_to_mer": 11.72
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 31.86,
    "wer_norm": 31.86,
    "wer_numcanon": 31.86,
    "space_norm_wer": 21.93,
    "mer": 5.93,
    "cer_norm": 6.16,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.93,
      "norm_to_mer": 25.93
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 25.48,
    "wer_norm": 25.48,
    "wer_numcanon": 25.48,
    "space_norm_wer": 15.71,
    "mer": 4.4,
    "cer_norm": 4.82,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.0,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.77,
      "norm_to_mer": 21.08
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 23.66,
    "wer_norm": 22.14,
    "wer_numcanon": 22.14,
    "space_norm_wer": 15.53,
    "mer": 5.64,
    "cer_norm": 5.73
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 23.96,
    "wer_norm": 22.35,
    "wer_numcanon": 22.35,
    "space_norm_wer": 15.6,
    "mer": 5.55,
    "cer_norm": 5.62
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/qwen3-asr-mixed-v2-ckpt-100000",
    "checkpoint_name": "ckpt-100000",
    "model_id": "qwen3-asr-mixed-v2",
    "model_type": "qwen3-asr-1.7B-mixed-v2",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 128,
    "inference_time_sec": 687.69,
    "total_audio_sec": 40354.46,
    "rtf": 0.017,
    "timestamp": "2026-04-04T19:09:46Z",
    "gpu": "NVIDIA A100-SXM4-80GB",
    "framework": "transformers",
    "normalization_version": "v1"
  }
}