o
    1i1                     @  sf   d Z ddlmZ ddlmZmZ ddlmZmZm	Z	 ddl
Z
eG dd dZe	G dd	 d	eZdS )
zEUnified codec interface: TokenBatch dataclass + NeuralCodec protocol.    )annotations)	dataclassfield)AnyProtocolruntime_checkableNc                   @  sl   e Zd ZU dZded< ded< ded< eedZd	ed
< edddZ	edddZ
dddZdddZdS )
TokenBatchaX  Container for codec token output, supporting diverse token structures.

    tokens can be:
      - torch.LongTensor [B, T]           single-stream (XCodec2, WavTokenizer)
      - list[torch.LongTensor]            multi-scale (SNAC: different T per level)
      - dict[str, torch.LongTensor]       named streams (BiCodec: semantic + global)
    str
codec_nameintsample_rater   tokens)default_factoryzdict[str, Any]auxreturnc                 C  sr   t | jtjr| jjd S t | jtrtt| j jd S t | jt	t
fr/| jd jd S tdt| j )Nr   z)Cannot infer batch_size from tokens type )
isinstancer   torchTensorshapedictnextitervalueslisttuple
ValueErrortypeself r   3/home/ubuntu/bench-codecs/codecbench/codecs/base.py
batch_size   s   zTokenBatch.batch_sizec                 C  sd   t | jtjr| j S t | jtrtdd | j D S t | jtt	fr0tdd | jD S dS )z9Total number of token elements across all streams/levels.c                 s      | ]}|  V  qd S Nnumel).0vr   r   r    	<genexpr>*       z)TokenBatch.token_count.<locals>.<genexpr>c                 s  r"   r#   r$   r&   tr   r   r    r(   ,   r)   r   )
r   r   r   r   r%   r   sumr   r   r   r   r   r   r    token_count$   s   
zTokenBatch.token_counttuple[int, int]c                 C  s   g }t | jtjr| jg}nt | jtrt| j }nt | jttfr)t| j}tdd |D }t	dd |D }t
|t
|fS )z@Return (min_token, max_token) observed across all token tensors.c                 s      | ]	}|   V  qd S r#   )minitemr*   r   r   r    r(   8       z,TokenBatch.observed_vocab.<locals>.<genexpr>c                 s  r/   r#   )maxr1   r*   r   r   r    r(   9   r2   )r   r   r   r   r   r   r   r   r0   r3   r   )r   tensorsall_minall_maxr   r   r    observed_vocab/   s   

zTokenBatch.observed_vocabc                 C  sj   t | jtjrtt| jjS t | jtr!tdd | j D S t | jtt	fr3tdd | jD S dS )Nc                 S  s   i | ]
\}}|t |jqS r   r   r   )r&   kr'   r   r   r    
<dictcomp>@   s    z-TokenBatch.shapes_summary.<locals>.<dictcomp>c                 S  s   g | ]}t |jqS r   r8   r*   r   r   r    
<listcomp>B   s    z-TokenBatch.shapes_summary.<locals>.<listcomp>unknown)
r   r   r   r   r	   r   r   r   itemsr   r   r   r   r    shapes_summary<   s   zTokenBatch.shapes_summaryN)r   r   )r   r.   )r   r	   )__name__
__module____qualname____doc____annotations__r   r   r   propertyr!   r-   r7   r>   r   r   r   r    r      s   
 	

r   c                   @  sT   e Zd ZU dZded< ded< dddZd ddZd!ddZd"ddZd#ddZ	dS )$NeuralCodecz3Unified interface every codec wrapper must satisfy.r	   namer   	native_srdevicedtypetorch.dtyper   Nonec                 C     dS )z1Load model weights and move to device. Call once.Nr   )r   rH   rI   r   r   r    loadM      zNeuralCodec.loadbatch_secondsfloatr!   c                 C  rL   )z9Run throwaway forward passes so JIT/CUDA caches are warm.Nr   )r   rO   r!   r   r   r    warmupQ   rN   zNeuralCodec.warmupwavtorch.Tensorsrr   c                 C  rL   )zEncode waveform to tokens.

        Args:
            wav: float32 [-1, 1], shape [B, 1, T]
            sr:  sample rate of wav (wrapper resamples if needed)
        Nr   )r   rR   rT   r   r   r    encodeU   s   zNeuralCodec.encodetbc                 C  rL   )z3Decode tokens back to waveform. Returns [B, 1, T'].Nr   r   rV   r   r   r    decode^   rN   zNeuralCodec.decodetorch.LongTensorc                 C  rL   )zFlatten token structure to a single 1-D sequence per batch element.

        Returns [B, T_flat]. Used to estimate modeling complexity.
        Nr   rW   r   r   r    flatten_for_lmb   s   zNeuralCodec.flatten_for_lmN)rH   r	   rI   rJ   r   rK   )rO   rP   r!   r   r   rK   )rR   rS   rT   r   r   r   )rV   r   r   rS   )rV   r   r   rY   )
r?   r@   rA   rB   rC   rM   rQ   rU   rX   rZ   r   r   r   r    rE   F   s   
 



	rE   )rB   
__future__r   dataclassesr   r   typingr   r   r   r   r   rE   r   r   r   r    <module>   s    :