o
    㥵i3P                  3   @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZmZ d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ de j d< dej!j"_#dej!j"j$_%e&ej!j"drdej!j"_'d dl(m)Z)m*Z* d dl+m,Z,m-Z-m.Z. dd Z/	dhdej0dej0dej0deej0 dej0f
ddZ1	dhdej0dej0dej0deej0 deej0ej0f f
ddZ2	dhde-dej0dej0dej0dej0dej0dej0d ej0dej0dej0fd!d"Z3e3fde.d#ej0dej0d$e4dej0dej0dej0dej0d ej0fd%d&Z5e6 e7 e3d'd(de,d)ej0d*e4dej0d ej0d+e4fd,d-Z8did/d0Z9eG d1d2 d2Z:d'd d3d4d3d.dd5ddd6
d7e;ej<B d8e=d9e;d+e4d*e4de4de>de>d:e?d;e?d<e4d=ee;e@e; B  d>eej0e@ej0 B  fd?d@ZAeG dAdB dBZBeG dCdD dDZC	.did:e?fdEdFZDeE ejFdGe;dHdIejFdJe;dddKejFdLej
e
ddMdddKejFdNe4d'dIejFdOe4d dIejFdPe>d3dIejFdQe>d4dIejFdRe>d3dIejFdSej
e
ddMdTdIejFdUe;dVdIejFdWd.dXejFdYe4dZdIejFd[d.dXejFd\ddXejFd]e4d^dIejFd_e
d`dId9e;d=ee@e;  d>ee@e
  d+e4d*e4de4de>de>dae
d7e;d:e?dbe4dce?d;e?d<e4dde
ddf"dedfZGeHdgkrZeG  dS dS )j    N)nullcontext)	dataclass)Path)LiteralOptionalTupleUnion)logger)tqdm)AutoTokenizer)ContentSequenceTextPartVQPart)IM_END_TOKENfalseTOKENIZERS_PARALLELISMTfx_graph_cache)
SDPBackendsdpa_kernel)BaseTransformerDualARTransformerNaiveTransformerc                 C   s.   t | d}t j| | dddjt jdS )N   T)dimkeepdim)dtype)torch
empty_likeexponential_argmaxtoint)
probs_sortq r%   ^/home/ubuntu/.local/lib/python3.10/site-packages/fish_speech/models/text2semantic/inference.pymultinomial_sample_one_no_sync,   s   r'   temperaturetop_prepetition_penaltyprevious_tokensreturnc                 C   s   |d ur%|  }tj| d|d}t|dk || || }| jd||d tj| dd\}}tjtjjj	|dddd}||k}	d|	d< |	j
d||	d}
| |
td	 } | tj|d
d } tjjj	| dd}|S )Nr   )r   indexr   )r   r-   srcT)
descendingr   FInfgh㈵>)min)longr   gatherwherescatter_sortcumsumnn
functionalsoftmaxscattermasked_fillfloatclip)logitsr(   r)   r*   r+   scoresorted_logitssorted_indices	cum_probssorted_indices_to_removeindices_to_removeprobsr%   r%   r&   logits_to_probs3   s$   rH   c                 C   s&   t | d ||||d}t|}||fS )N)r   r   )r@   r(   r)   r*   r+   )rH   r'   )r@   r(   r)   r*   r+   rG   idx_nextr%   r%   r&   sampleR   s   rJ   modelx	input_posaudio_masksaudio_partsc	              	   C   sn  | j ||||d}|j}	|j}
t|	||||d ur |d d df nd dd g}| jD ]}|jjjd |jjj	d q*t
jdg|
jt
jd}| |
| |d | jj }d||dk < | |}
|| td| jjD ]?}t
j|g|
jt
jd}| |
|}	|	d d d d d df }t|||||d ur||d  nd dd }| |}
|| qmt
j|dd}|jS )N)rN   rO   r   )r(   r)   r*   r+   devicer   r   i   r0   )forward_generater@   hidden_statesrJ   fast_layers	attentionkv_cachek_cachefill_v_cacher   tensorrQ   r3   forward_generate_fast	tokenizersemantic_begin_idfast_embeddingsappendrangeconfignum_codebooksstackT)rK   rL   rM   r(   r)   r*   rN   rO   r+   r@   rS   	codebookslayeracodebook_idxshort_logitsr%   r%   r&   decode_one_token_ard   s`   




rj   	cur_tokennum_new_tokensc
                 C   s.  t j| jjd | jjft j|jd}
tt|D ]r}d}||k r+|
d d d |f }n|
d d || |f }t	t
j |	| ||||||||d	 }W d    n1 sVw   Y  |d7 }|d| jjd d}|| jjd d|
d d ||d f< |d | jtkr nq|
d d d |d f S )Nr   r   rQ      )	rK   rL   rM   r+   r(   r)   r*   rN   rO   r   )r   r   r   )r   zerosra   rb   max_seq_lenr"   rQ   r
   r`   r   r   MATHcloneviewr\   get_token_idr   )rK   rk   rM   rl   r(   r)   r*   rN   rO   decode_one_tokenr+   iwin_sizewindow
next_tokenr%   r%   r&   decode_n_tokens   sH   
rz   r   )ru   num_samplespromptmax_new_tokensr{   c                 K   s  | d}|d |dd}|| jjkr td| d| jj |r5|| | jjkr0| jj| }|| }	n| jj}	|	| }|j|j}
}t|
 | j|| jjt	| 
 jd W d   n1 scw   Y  d| jj }tjd||
d}tj|| jjf||
d}||ddd|f< |}tj|d	 |
tjd
}tj|d |
tjd
}tj|d |
tjd
}t}|| |d|d||||||}||dd||d f< tj|g|
tjd
}t| |d|d||d ||||||d
}|ddd|d | d f }||dd|d df< |S )zp
    Takes a conditioning sequence (prompt) as input and continues to generate as many tokens as requested.
    r   NzInput sequence length z exceeds max_seq_len max_batch_sizerp   r   r   rQ   rm   r(   rP   r)   r*   r   )r(   r)   r*   rN   rO   ru   )sizerepeatra   rp   
ValueErrorrQ   r   r   setup_cachesnext
parametersrb   arangeemptyrZ   bfloat16rj   rs   r"   rz   )rK   r|   r}   rN   rO   ru   r{   sampling_kwargsrd   T_newrQ   r   codebook_dimrM   r   seqr(   r)   r*   prefill_decodefirst_tokenrL   r%   r%   r&   generate   s|   


"r   Fc                 C   s   t j| dd}|j||d}td t|t r"t}t}td ntdt	| |j
d|jjt| jd W d    n1 sEw   Y  |rhtd	 tj|tj r[d
ndtj rcdnd dd}| |fS )NT)load_weightsrP   zRestored model from checkpointzUsing DualARTransformerzUnsupported model typer   r~   zCompiling function...inductor	aot_eagerzreduce-overhead)backendmode	fullgraph)r   from_pretrainedr!   r	   info
isinstancerj   r   r   rQ   r   ra   rp   r   r   r   compilecudais_availableeval)checkpoint_pathrQ   	precisionr   rK   ru   prefill_n_tokensr%   r%   r&   
init_model=  s0   


r   c                   @   s<   e Zd ZU ed ed< dZeej ed< dZ	ee
 ed< dS )GenerateResponse)rJ   r   actionNcodestext)__name__
__module____qualname__r   __annotations__r   r   r   Tensorr   strr%   r%   r%   r&   r   _  s   
 r   g?g?i   )
r{   r}   r)   r*   r(   r   iterative_promptchunk_lengthprompt_textprompt_tokensrQ   ru   r   r   r   r   r   r   c           "      c   s   d|  k rdksJ d J dd|  k r dk s%J d J dd|  k r2dk s7J d J d|d uo>|d u}|rLt |trL|g}|g}|du s\t|t|ks\J dd	d
 |D }tdd |  D }| j}tdd}| jj}|rt	||D ]\}}|j
t|dt|dgddd q|j
t|dgddd |j|| jjd\}}}|d|d krtd|d d|d  |j|d}td|  tj||tjd}tj||tjd}tj||tjd}t|D ]}tj rtj  g }d}|d}t }t| ||||||||d	}|dkr4|dkr4|	r4tdt | dd tj r?tj  t | }|d| }|| }td| d|d d!|d d" td#|| d$ d d% tj rtd&tj d$ d d' |dd |d(f  } | dk  sJ d)|d d |d f  }!|
|!!  | dk  sJ d*|  t"d+| |d,V  |d7 }t"d-d.V  qd S )/Nr   r   ztop_p must be in (0, 1]   z$repetition_penalty must be in (0, 2)ztemperature must be in (0, 2)Fz0Prompt text and tokens must have the same lengthc                 S   s   g | ]}|  qS r%   )cpu).0rv   r%   r%   r&   
<listcomp>  s    z!generate_long.<locals>.<listcomp>c                 s   s    | ]
}|j r| V  qd S N)requires_gradnumelr   pr%   r%   r&   	<genexpr>  s    z generate_long.<locals>.<genexpr>
interleave)modality)r   )r   T)add_endspeaker)rb   i   zPrompt is too long: z > r   zEncoded text: rP   )	rK   r|   r}   rN   rO   ru   r(   r)   r*   zCompilation time: z.2f secondsz
Generated z tokens in .02fz
 seconds, z tokens/seczBandwidth achieved: g    eAz GB/szGPU Memory used: z GBr   zNegative code foundzNegative code found: rJ   )r   r   r   r   )r   )#r   r   lensumr   r\   r   ra   rp   zipr_   r   r   encode_for_inferencerb   r   r   r!   r	   r   r   rZ   r>   r`   r   r   synchronizetimeperf_counterr   max_memory_reservedrr   allr   r   )"rK   rQ   ru   r   r{   r}   r)   r*   r(   r   r   r   r   r   
use_prompt
model_sizer\   base_content_sequence
max_lengthtcencodedrN   rO   
sample_idxglobal_encodedseg_idxprompt_lengtht0ytokens_generated
tokens_secr   decodedr%   r%   r&   generate_longf  s   $$$




r   c                   @   s.   e Zd ZU ed ed< dZeeeB  ed< dS )WrappedGenerateResponse)successerrorstatusNresponse)	r   r   r   r   r   r   r   r   	Exceptionr%   r%   r%   r&   r     s   
 r   c                   @   s    e Zd ZU eed< ejed< dS )GenerateRequestrequestresponse_queueN)r   r   r   dictr   queueQueuer%   r%   r%   r&   r     s   
 r   c                    sD   t  t  fdd}tj|dd    S )Nc               
      s   t  d\} }t | jd| jjt|  jd W d    n1 s)w   Y  	  	 
 }|d u r=d S |j}|j}ztd	| |d|D ]}|td|d qNW n" ty} } ztt  |td|d W Y d }~nd }~ww q3)
Nr   r   r~   T)rK   ru   r   )r   r   r   r%   )r   r   rQ   r   ra   rp   r   r   r   setgetr   r   r   putr   r   r	   r   	traceback
format_exc)rK   ru   itemkwargsr   chunker   r   rQ   
init_eventinput_queuer   r%   r&   worker  sB   


z(launch_thread_safe_queue.<locals>.workerT)targetdaemon)r   r   	threadingEventThreadstartwait)r   rQ   r   r   r   r%   r   r&   launch_thread_safe_queue  s   r   z--textuQ   你说的对, 但是原神是一款由米哈游自主研发的开放世界手游.)typedefaultz--prompt-text)r   r   multiplez--prompt-tokens)	path_typeexistsz--num-samplesz--max-new-tokensz--top-pz--repetition-penaltyz--temperaturez--checkpoint-pathzcheckpoints/openaudio-s1-miniz--devicer   z--compile/--no-compile)r   z--seed*   z--half/--no-halfz(--iterative-prompt/--no-iterative-promptz--chunk-lengthi,  z--output-dirtempr   seedhalf
output_dirc                 C   s  t j|dd |rtjntj}|d ur*t|t|kr*tdt| dt| dtd t		 }t
||	||
d\}}t|	 |jd|jjt| jd	 W d    n1 s\w   Y  tj rktj  td
t		 | dd |d urdd |D }t| tj rtj| t||	|| ||||||
||||d}d}g }|D ]T}|jdkr||j td|j  q|jdkr|rt j|d| d}t|tj|dd  !  td|  td g }|d7 }qt"d|  qd S )NT)exist_okzNumber of prompt text (z) and prompt tokens (z) should be the samezLoading model ...r   r   r~   zTime to load model: r   r   c                 S   s   g | ]
}t t|qS r%   )r   
from_numpynploadr   r%   r%   r&   r   k  s    zmain.<locals>.<listcomp>)rK   rQ   ru   r   r{   r}   r)   r*   r(   r   r   r   r   r   r   rJ   zSampled text: r   codes_z.npyr0   zSaved codes to zNext samplezError: )#osmakedirsr   r  r   r   r   r	   r   r   r   rQ   r   ra   rp   r   r   r   r   r   r   manual_seedr   r   r_   r   r   pathjoinr  savecatr   numpyr   )r   r   r   r{   r}   r)   r*   r(   r   rQ   r   r  r  r   r   r  r   r   rK   ru   	generatoridxr   r   codes_npy_pathr%   r%   r&   main"  st   0









r  __main__r   )F)Ir  r   r   r   r   
contextlibr   dataclassesr   pathlibr   typingr   r   r   r   clickr  r  r   torch._inductor.configlogurur	   r
   transformersr   fish_speech.content_sequencer   r   r   fish_speech.tokenizerr   environ	_inductorra   coordinate_descent_tuningtritonunique_kernel_nameshasattrr   torch.nn.attentionr   r   &fish_speech.models.text2semantic.llamar   r   r   r'   r   rH   rJ   rj   r"   rz   no_gradinference_moder   r   r   r   rQ   callabler>   boollistr   r   r   r   commandoptionr  r   r%   r%   r%   r&   <module>   s   



$
	

S	
5
Y"	

 	
.

	
,
V
