o
    iKT                     @   s   d dl mZmZmZmZ d dlZd dlmZ ddl	m
Z
mZmZmZmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ e e!Z"G dd deZ#dS )    )AnyCallableOptionalUnionN   )(DiaClassifierFreeGuidanceLogitsProcessor"DiaEOSChannelFilterLogitsProcessor!DiaEOSDelayPatternLogitsProcessorLogitsProcessorListTemperatureLogitsWarper)StoppingCriteriaList)BaseStreamer)GenerateOutputGenerationConfigGenerationMixinGenerationMode)is_deepspeed_zero3_enabled)is_fsdp_managed_module)PreTrainedModel)loggingc                       s  e Zd ZdZ								d-dedee deej dee	eej
gee f  dee dee deeeef  d	eej
 d
eej
 def fddZ	d.dee dee dedeeef f fddZ			d/deej
 deej
 deeeej
f  deej
ee eeej
f f f fddZ	d.dededeeej
f dej
deej deejeeej
f f fddZ		d0 fdd	Zedej
dedeej
 dej
fd d!Z												d1deej
 dee dee d"ee dee	eej
gee f  d#ee d$ed% d&ed' d	eej
 d
eej
 dee d(ee fd)d*Ze 												d1deej
 dee dee d"ee dee	eej
gee f  d#ee d$ed% d&ed' d	eej
 d
eej
 dee d(ee deeejf fd+d,Z  Z S )2DiaGenerationMixinNgeneration_configinput_ids_seq_lengthencoder_input_idsprefix_allowed_tokens_fnlogits_processordevicemodel_kwargsnegative_prompt_idsnegative_prompt_attention_maskreturnc
                    s   |j }
|j}d |_ d |_t }|d ur|dkr|t| |tt| jj| jj	d t
 j|||d |||||	d	}|
d urQ|
dkrQt|
|jd}|d| |t| jj| jj	|j|d |
|_ ||_|S )Ng      ?)num_channelseos_token_id	r   r   r   r   r   r   r   r   r      )guidance_scaleguidance_top_kr   )delay_patternr"   max_generation_lenr   )r%   temperaturer
   appendr   r   lenconfigr'   r"   super_get_logits_processorr   top_kinsertr	   
max_length)selfr   r   r   r   r   r   r   r   r   original_guidance_scaleoriginal_temperaturecustom_processorsmerged_processorscfg_processor	__class__ c/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/models/dia/generation_dia.pyr.   ,   sR   

z(DiaGenerationMixin._get_logits_processoruse_model_defaultskwargsc                    sN   t  j||fi |\}}| jt| jj7  _|jd uo!|jdk| _||fS Nr$   )r-   _prepare_generation_configr1   maxr,   r'   r%   	_uses_cfg)r2   r   r<   r=   r   r8   r:   r;   r?   o   s   
z-DiaGenerationMixin._prepare_generation_configinputsbos_token_idc                    sh   t  j|||d\}}}| jr/t|}tj||gdd}|dd d ur/|d dd|d< |||fS )N)rB   rC   r   r   dimattention_mask   r$   )r-   _prepare_model_inputsrA   torch
zeros_likecatgetrepeat)r2   rB   rC   r   
input_nameunconditioned_inputsr8   r:   r;   rH      s   

z(DiaGenerationMixin._prepare_model_inputs
batch_sizemodel_input_namedecoder_start_token_idc                 C   sR  d }}|durd|v r| d}|durd|v r| d}|du s&|du rctd|du d|du d | jjj}| jrB|d n|}	|du rUtj|	d|f|tj	|d	}tj
|	|jd ftj	|d
}|	 }
|jd |dddddf | jjkjdd  }|
ddd|f dd	 }|ddd|f 	 }||d< |
|d< ||fS )zGPrepares `decoder_input_ids` for generation with encoder-decoder modelsNdecoder_input_idsdecoder_attention_maskz[In order to generate with Dia, we need the processed audio input: Got `decoder_input_ids`: z" and got `decoder_attention_mask`=z]. This can be achieved via the [`DiaProcessor`] but now defaulting to non-delayed generation.rG   r$   )dtyper   )sizerU   r   r   rD   decoder_delay_mask)poploggerwarning_oncer,   decoder_configr!   rA   rI   fulllongonesshapepad_token_idsumr@   	transpose)r2   rP   rQ   r   rR   r   rS   rT   r!   real_batch_size
delay_maskvalid_input_sizer:   r:   r;   )_prepare_decoder_input_ids_for_generation   s<   



2 z<DiaGenerationMixin._prepare_decoder_input_ids_for_generationc           	         s   | j r|d jd d n|d jd }||| jjjddd}t j|fd|i|}| 	|| jj
||d< |ddr^|d	 d dkr^|d d d dd d f d d d d d f |d< |d  |d< | j rd
D ]"}||d d urtdgdg|| jd   }|| j| ||< qk|S )Nr   rG   rW   r$   encoder_outputsrS   	use_cacheFcache_position)rS   rT   decoder_position_ids)rA   r`   reshaper,   r\   r!   rc   r-   prepare_inputs_for_generationapply_delay_maskra   rL   
contiguoustuplendimrM   )	r2   	input_idsrh   rX   r=   rP   model_inputskeyrepeat_patternr8   r:   r;   rm      s    &
0z0DiaGenerationMixin.prepare_inputs_for_generationrr   pad_idre   c                 C   s   |d u r| S t | jd |jd }|d d d |d d f }| d d d |d d f }t||k||| d d d |d d f< | S r>   )minr`   rI   where)rr   rv   re   mask_len
valid_maskvalid_inputr:   r:   r;   rn      s   (z#DiaGenerationMixin.apply_delay_maskstopping_criteriasynced_gpusassistant_modelr   streamerr   custom_generatec                 K   s  |  |||||}| j||fi |\}}||}| |  | ||| |d u r9t s3t| o8t	 dk}|d ur?|nt
 }|d urH|nt }|dd d u}| ||j|\}}}|jd }|j}| j|||d d|vrz| ||||}| j||||j|jd\}}|jr| ||d}|d ur||  |jd }|d	d u o|jd u}|d
d u o|jd u}| j||||||d}|  rd|vrd|d< | ||| |jd }|jd |kr|dkr| jjs||jd 7 }|  ||||| | j!||||||j||	|
d	}| j"|||dd}|j#|d< |$d|jd }|t%j&t%j'fv rH|j(dkr9t)d| j*|f|||d||S t)d)Nr$   rF   r   )r   rh   )rP   rQ   r   rR   r   	tokenizerrW   r1   
min_length)r   has_default_max_lengthhas_default_min_lengthrQ   inputs_tensorinput_ids_lengthlogits_to_keepinputs_embedsr#   )r   r|   r   ri   z2`num_return_sequences>1` is incompatible with Dia.)r   r|   r   zGot incompatible mode for generation, should be one of greedy or sampling. Ensure that beam search is de-activated by setting `num_beams=1`.)+_extract_generation_mode_kwargsr?   get_generation_mode_validate_model_kwargscopy_validate_generation_moder   r   distget_world_sizer
   r   rL   rH   rC   r`   r   _prepare_special_tokens._prepare_encoder_decoder_kwargs_for_generationrg   _decoder_start_token_tensortoken_healingheal_tokensputcpur1   r   _prepare_generated_length_supports_logits_to_keep_validate_generated_lengthr,   is_encoder_decoder_prepare_cache_for_generationr.   _get_stopping_criteriari   rl   r   SAMPLEGREEDY_SEARCHnum_return_sequences
ValueError_sample)r2   rB   r   r   r|   r   r}   r~   r   r   r   r<   r   r=   generation_mode_kwargsr   generation_modekwargs_has_attention_maskr   rQ   rP   r   rr   r   r   r   max_cache_lengthprepared_logits_processorprepared_stopping_criteriar:   r:   r;   _main_generate_loop   s   








	z&DiaGenerationMixin._main_generate_loopc                 K   s   | d}|d ur| }| jd|||||||||	|
||d|}t|tj }|r/|j}n|}| jjj	}|j
d | }|||ddd}| || jj|}|rX||_|S |}|S )NrS   )rB   r   r   r|   r   r}   r~   r   r   r   r<   r   r   rW   r$   rG   r:   )rL   cloner   
isinstancerI   Tensor	sequencesr,   r\   r!   r`   rl   rc   rn   ra   )r2   rB   r   r   r|   r   r}   r~   r   r   r   r<   r   r=   re   outputreturn_dict_in_generateoutput_sequencesr!   bszr:   r:   r;   generate  s@   

zDiaGenerationMixin.generate)NNNNNNNN)N)NNN)NN)NNNNNNNNNNNN)!__name__
__module____qualname__rA   r   r   intrI   
LongTensorr   r   listr
   strdictr   r.   boolrp   r?   rH   r   rg   rm   staticmethodrn   r   r   no_gradr   r   r   __classcell__r:   r:   r8   r;   r   (   s,   	
D

5%&	

 	
r   )$typingr   r   r   r   rI   torch.distributeddistributedr   generation.logits_processr   r   r	   r
   r   generation.stopping_criteriar   generation.streamersr   generation.utilsr   r   r   r   integrations.deepspeedr   integrations.fsdpr   modeling_utilsr   utilsr   
get_loggerr   rZ   r   r:   r:   r:   r;   <module>   s   
