#!/usr/bin/env python3
"""Extract speaker WAVs preserving original timeline (silence where speaker is not talking)."""
import json, os, numpy as np, soundfile as sf

INPUT_FILE = "/home/ubuntu/bob5_vocals_16k.wav"
DIAR_JSON = "/home/ubuntu/bob5_pyannote/diarization_exclusive.json"
OUTPUT_DIR = "/home/ubuntu/bob5_pyannote_timeline"
os.makedirs(OUTPUT_DIR, exist_ok=True)

with open(DIAR_JSON) as f:
    segments = json.load(f)

audio, sr = sf.read(INPUT_FILE, dtype='float32')
if audio.ndim > 1:
    audio = audio.mean(axis=1)

total_samples = len(audio)
print(f"Audio: {total_samples/sr:.1f}s at {sr}Hz")

speakers = {}
for seg in segments:
    speakers.setdefault(seg["speaker"], []).append((seg["start"], seg["end"]))

print(f"Found {len(speakers)} speakers (exclusive, no overlaps)\n")

for spk in sorted(speakers, key=lambda s: sum(e-st for st,e in speakers[s]), reverse=True):
    total_speech = sum(e - s for s, e in speakers[spk])
    
    # Create silent track of full duration, then paste in only this speaker's segments
    track = np.zeros(total_samples, dtype=np.float32)
    for s, e in speakers[spk]:
        si, ei = int(s * sr), min(int(e * sr), total_samples)
        track[si:ei] = audio[si:ei]
    
    path = os.path.join(OUTPUT_DIR, f"{spk}.wav")
    sf.write(path, track, sr)
    print(f"  {spk}: {total_speech:.1f}s speech in {total_samples/sr:.1f}s track -> {path}")

with open(os.path.join(OUTPUT_DIR, "diarization_exclusive.json"), "w") as f:
    json.dump(segments, f, indent=2)

print("\nDone! Each track is full-length with silence where speaker isn't talking.")
