o
    ‹½µiås  ã                   @  sü  d Z ddlmZ ddlZddlZddlmZ ddlZddlm	  m
Z ddlZddlmZ ddlm	Z	 ddlmZ ddlmZ i Zi Z			
							d>dd„Z			
				d?dd„ZG dd„ de	jƒZG dd„ de	jƒZG dd„ de	jƒZd@dAdd „ZdBd!d"„ZG d#d$„ d$e	jƒZG d%d&„ d&e	jƒZG d'd(„ d(e	jƒZ G d)d*„ d*e	jƒZ!G d+d,„ d,e	jƒZ"G d-d.„ d.e	jƒZ#G d/d0„ d0e	jƒZ$ed1ƒrÓdd2l%m&Z&m'Z' dd3l(m)Z)m*Z* G d4d5„ d5ƒZ+G d6d7„ d7ƒZ,G d8d9„ d9e	jƒZ-G d:d;„ d;e	jƒZ.G d<d=„ d=e	jƒZ/dS )Cz\
ein notation:
b - batch
n - sequence
nt - text sequence
nw - raw wave length
d - dimension
é    )ÚannotationsN)ÚOptional)Úmel)Únn)Úapply_rotary_pos_emb)Úis_package_availableé   éd   éÀ]  é   Fc	                 C  s   | j }	|› d|› d|› d|› d|› d|› d|› d|	› }
|
tvr?t|||||d}t |¡ ¡  |	¡t|
< t |¡ |	¡t|
< t|
 }t|
 }|| d }tj	j
j|  d¡||fdd d¡} tj| |||||ddddd	
}t t |¡ d¡ d
¡d ¡}t ||¡}t tj|dd¡}|S )NÚ_)ÚsrÚn_fftÚn_melsÚfminÚfmaxé   é   Úreflect)ÚmodeFT)Ú
hop_lengthÚ
win_lengthÚwindowÚcenterÚpad_modeÚ
normalizedÚonesidedÚreturn_complexéÿÿÿÿg•Ö&è.>çñhãˆµøä>©Úmin)ÚdeviceÚmel_basis_cacheÚlibrosa_mel_fnÚtorchÚ
from_numpyÚfloatÚtoÚhann_windowÚhann_window_cacher   Ú
functionalÚpadÚ	unsqueezeÚsqueezeÚstftÚsqrtÚview_as_realÚpowÚsumÚmatmulÚlogÚclamp)Úwaveformr   Ún_mel_channelsÚtarget_sample_rater   r   r   r   r   r"   Úkeyr   Ú	mel_basisr)   ÚpaddingÚspecÚmel_spec© r?   úH/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/model/modules.pyÚget_bigvgan_mel_spectrogram"   s4   2$ö rA   c                 C  sl   t jj|||||dddd d	 | j¡}t| jƒdkr|  d¡} t| jƒdks(J ‚|| ƒ}|jdd 	¡ }|S )	Nr   TF)	Úsample_rater   r   r   r   Úpowerr   r   Únormé   r   r   r    )
Ú
torchaudioÚ
transformsÚMelSpectrogramr(   r"   ÚlenÚshaper.   r6   r5   )r7   r   r8   r9   r   r   Úmel_stftr   r?   r?   r@   Úget_vocos_mel_spectrogramO   s&   ÷
ö
rL   c                      s2   e Zd Z						d
‡ fdd„	Zdd	„ Z‡  ZS )ÚMelSpecr   r   r	   r
   Úvocosc                   st   t ƒ  ¡  |dv sJ tdƒƒ‚|| _|| _|| _|| _|| _|dkr&t| _	n|dkr-t
| _	| jdt d¡dd d S )	N)rN   Úbigvganz9We only support two extract mel backend: vocos or bigvganrN   rO   Údummyr   F)Ú
persistent)ÚsuperÚ__init__Úprintr   r   r   r8   r9   rL   Ú	extractorrA   Úregister_bufferr%   Útensor)Úselfr   r   r   r8   r9   Úmel_spec_type©Ú	__class__r?   r@   rS   m   s   
	zMelSpec.__init__c                 C  s>   | j j|jkr|  |j¡ | j|| j| j| j| j| jd}|S )N)r7   r   r8   r9   r   r   )	rP   r"   r(   rU   r   r8   r9   r   r   )rX   Úwavr   r?   r?   r@   Úforward†   s   ú	zMelSpec.forward)r   r   r   r	   r
   rN   ©Ú__name__Ú
__module__Ú__qualname__rS   r]   Ú__classcell__r?   r?   rZ   r@   rM   l   s    ùrM   c                      ó&   e Zd Z‡ fdd„Zddd„Z‡  ZS )ÚSinusPositionEmbeddingc                   s   t ƒ  ¡  || _d S ©N)rR   rS   Údim©rX   rf   rZ   r?   r@   rS   š   s   

zSinusPositionEmbedding.__init__éè  c                 C  sv   |j }| jd }t d¡|d  }t tj||d ¡ |  ¡}|| d¡ | d¡ }tj	| 
¡ | ¡ fdd}|S )Nr   i'  r   ©r"   r   r   ©rf   )r"   rf   Úmathr5   r%   ÚexpÚaranger'   r-   ÚcatÚsinÚcos)rX   ÚxÚscaler"   Úhalf_dimÚembr?   r?   r@   r]   ž   s   
zSinusPositionEmbedding.forward)rh   r^   r?   r?   rZ   r@   rd   ™   s    rd   c                      s*   e Zd Zd‡ fdd„	Zddd
d„Z‡  ZS )ÚConvPositionEmbeddingé   é   c                   sv   t ƒ  ¡  |d dksJ ‚t tj|||||d dt ¡ tj|||||d dt ¡ ¡| _dd„ t| jƒD ƒ| _d S )Nr   r   )Úgroupsr<   c                 S  s    g | ]\}}t |tjƒr|‘qS r?   )Ú
isinstancer   ÚConv1d)Ú.0ÚiÚlayerr?   r?   r@   Ú
<listcomp>µ   s     z2ConvPositionEmbedding.__init__.<locals>.<listcomp>)	rR   rS   r   Ú
Sequentialrz   ÚMishÚconv1dÚ	enumerateÚlayer_need_mask_idx)rX   rf   Úkernel_sizerx   rZ   r?   r@   rS   ¬   s   
üzConvPositionEmbedding.__init__Nrq   úfloat['b n d']Úmaskúbool['b n'] | Nonec                 C  s„   |d ur	|  d¡}| ddd¡}|d ur| | d¡}t| jƒD ]\}}||ƒ}|d ur8|| jv r8| | d¡}q | ddd¡}|S )Nr   r   r   ç        )r-   ÚpermuteÚmasked_fillr‚   r   rƒ   )rX   rq   r†   r|   Úblockr?   r?   r@   r]   ·   s   
€zConvPositionEmbedding.forward)rv   rw   re   )rq   r…   r†   r‡   r^   r?   r?   rZ   r@   ru   «   s    ru   ç     ˆÃ@ç      ð?rf   ÚintÚendÚthetar'   c                 C  s„   ||| | d   9 }d|t  d| d¡d | d …  ¡ |    }t j||jd}t  ||¡ ¡ }t  |¡}t  |¡}t j||gddS )Nr   r   r   ri   r   rj   )r%   rm   r'   r"   Úouterrp   ro   rn   )rf   r   r   Útheta_rescale_factorÚfreqsÚtÚ	freqs_cosÚ	freqs_sinr?   r?   r@   Úprecompute_freqs_cisË   s   *

r—   c                 C  s`   |t j| t jd }|  d¡t j|| jt jd d¡| d¡  ¡  }t  ||k ||d ¡}|S )N)Údtyper   )r"   r˜   r   )r%   Ú	ones_likeÚfloat32r-   rm   r"   ÚlongÚwhere)ÚstartÚlengthÚmax_posrr   Úposr?   r?   r@   Úget_pos_embed_indicesÙ   s   &ÿÿr¡   c                      ó$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )ÚGRNc                   s:   t ƒ  ¡  t t dd|¡¡| _t t dd|¡¡| _d S )Nr   )rR   rS   r   Ú	Parameterr%   ÚzerosÚgammaÚbetarg   rZ   r?   r@   rS   é   s   
zGRN.__init__c                 C  s@   t j|dddd}||jdddd  }| j||  | j | S )Nr   r   T)Úprf   Úkeepdimr   )rf   r©   çíµ ÷Æ°>)r%   rD   Úmeanr¦   r§   )rX   rq   ÚGxÚNxr?   r?   r@   r]   î   s   zGRN.forwardr^   r?   r?   rZ   r@   r£   è   s    r£   c                      s,   e Zd Z	dd‡ fdd„Zddd„Z‡  ZS )ÚConvNeXtV2Blockr   rf   rŽ   Úintermediate_dimÚdilationc                   sr   t ƒ  ¡  |d d }tj||d|||d| _tj|dd| _t ||¡| _t 	¡ | _
t|ƒ| _t ||¡| _d S )Né   r   é   )r„   r<   rx   r°   rª   ©Úeps)rR   rS   r   rz   ÚdwconvÚ	LayerNormrD   ÚLinearÚpwconv1ÚGELUÚactr£   ÚgrnÚpwconv2)rX   rf   r¯   r°   r<   rZ   r?   r@   rS   ù   s   
ÿ

zConvNeXtV2Block.__init__rq   útorch.TensorÚreturnc                 C  s`   |}|  dd¡}|  |¡}|  dd¡}|  |¡}|  |¡}|  |¡}|  |¡}|  |¡}|| S )Nr   r   )Ú	transposerµ   rD   r¸   rº   r»   r¼   )rX   rq   Úresidualr?   r?   r@   r]   
  s   





zConvNeXtV2Block.forward)r   )rf   rŽ   r¯   rŽ   r°   rŽ   )rq   r½   r¾   r½   r^   r?   r?   rZ   r@   r®   ø   s    ür®   c                      s&   e Zd Zd	‡ fdd„Zdd„ Z‡  ZS )
ÚRMSNormrf   rŽ   r´   r'   c                   s>   t ƒ  ¡  || _t t |¡¡| _ttj	d d… ƒdk| _
d S )NrE   g333333@)rR   rS   r´   r   r¤   r%   ÚonesÚweightr'   Ú__version__Únative_rms_norm)rX   rf   r´   rZ   r?   r@   rS     s   
zRMSNorm.__init__c                 C  sª   | j r%| jjtjtjfv r| | jj¡}tj||j	d f| j| j
d}|S | tj¡ d¡jddd}|t || j
 ¡ }| jjtjtjfv rN| | jj¡}|| j }|S )Nr   )Únormalized_shaperÃ   r´   r   T)r©   )rÅ   rÃ   r˜   r%   Úfloat16Úbfloat16r(   ÚFÚrms_normrJ   r´   rš   r2   r«   Úrsqrt)rX   rq   Úvariancer?   r?   r@   r]   !  s   ú
zRMSNorm.forward)rf   rŽ   r´   r'   r^   r?   r?   rZ   r@   rÁ     s    rÁ   c                      rc   )ÚAdaLayerNormc                   ó<   t ƒ  ¡  t ¡ | _t ||d ¡| _tj|ddd| _d S )Nr±   Frª   ©Úelementwise_affiner´   ©	rR   rS   r   ÚSiLUÚsilur·   Úlinearr¶   rD   rg   rZ   r?   r@   rS   5  ó   

zAdaLayerNorm.__init__Nc           	      C  sh   |   |  |¡¡}tj|ddd\}}}}}}|  |¡d|d d …d f   |d d …d f  }|||||fS )Nr±   r   rj   ©rÔ   rÓ   r%   ÚchunkrD   )	rX   rq   rt   Ú	shift_msaÚ	scale_msaÚgate_msaÚ	shift_mlpÚ	scale_mlpÚgate_mlpr?   r?   r@   r]   =  s   .zAdaLayerNorm.forwardre   r^   r?   r?   rZ   r@   rÍ   4  s    rÍ   c                      r¢   )ÚAdaLayerNorm_Finalc                   rÎ   )Nr   Frª   rÏ   rÑ   rg   rZ   r?   r@   rS   J  rÕ   zAdaLayerNorm_Final.__init__c                 C  sb   |   |  |¡¡}tj|ddd\}}|  |¡d| d d …d d d …f  |d d …d d d …f  }|S )Nr   r   rj   rÖ   )rX   rq   rt   rr   Úshiftr?   r?   r@   r]   R  s   :zAdaLayerNorm_Final.forwardr^   r?   r?   rZ   r@   rÞ   I  s    rÞ   c                      s(   e Zd Zdd‡ fdd„Zd	d
„ Z‡  ZS )ÚFeedForwardNé   rˆ   ÚnoneÚapproximateÚstrc           	        sh   t ƒ  ¡  t|| ƒ}|d ur|n|}tj|d}t t ||¡|¡}t |t |¡t ||¡¡| _d S )N)rã   )	rR   rS   rŽ   r   r¹   r   r·   ÚDropoutÚff)	rX   rf   Údim_outÚmultÚdropoutrã   Ú	inner_dimÚ
activationÚ
project_inrZ   r?   r@   rS   ^  s   
"zFeedForward.__init__c                 C  s
   |   |¡S re   )ræ   )rX   rq   r?   r?   r@   r]   g  s   
zFeedForward.forward)Nrá   rˆ   râ   )rã   rä   r^   r?   r?   rZ   r@   rà   ]  s    	rà   c                      sB   e Zd Z						d!d"‡ fdd„Z					d#d$dd „Z‡  ZS )%Ú	Attentioné   é@   rˆ   NFÚ	processorú"JointAttnProcessor | AttnProcessorrf   rŽ   ÚheadsÚdim_headré   r'   Úcontext_dimúOptional[int]Úcontext_pre_onlyÚboolÚqk_normúOptional[str]c	           	        s˜  t ƒ  ¡  ttdƒstdƒ‚|| _|| _|| _|| | _|| _	|| _
|| _t || j¡| _t || j¡| _t || j¡| _|d u rHd | _d | _n|dkr[t|dd| _t|dd| _ntd|› ƒ‚| j
d urœt || j¡| _t || j¡| _t || j¡| _|d u rŠd | _d | _n|dkrœt|dd| _t|dd| _t g ¡| _| j t | j|¡¡ | j t |¡¡ | j
d urÈ| jsÊt | j|¡| _d S d S d S )NÚscaled_dot_product_attentionzHAttention equires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.rÊ   rª   r³   zUnimplemented qk_norm: )rR   rS   ÚhasattrrÉ   ÚImportErrorrð   rf   rò   rê   ré   rô   rö   r   r·   Úto_qÚto_kÚto_vÚq_normÚk_normrÁ   Ú
ValueErrorÚto_q_cÚto_k_cÚto_v_cÚc_q_normÚc_k_normÚ
ModuleListÚto_outÚappendrå   Úto_out_c)	rX   rð   rf   rò   ró   ré   rô   rö   rø   rZ   r?   r@   rS   p  sH   



ÿzAttention.__init__rq   r…   Úcr†   r‡   Úc_maskúbool['b nt'] | Noner¾   r½   c              	   C  s2   |d ur| j | ||||||dS | j | |||dS )N)r  r†   ÚropeÚc_roper  )r†   r  )rð   )rX   rq   r  r†   r  r  r  r?   r?   r@   r]   ©  s   	zAttention.forward)rî   rï   rˆ   NFN)rð   rñ   rf   rŽ   rò   rŽ   ró   rŽ   ré   r'   rô   rõ   rö   r÷   rø   rù   ©NNNNN)
rq   r…   r  r…   r†   r‡   r  r  r¾   r½   r^   r?   r?   rZ   r@   rí   o  s    ÷<ùrí   Ú
flash_attn)Úflash_attn_funcÚflash_attn_varlen_func)Ú	pad_inputÚunpad_inputc                   @  s.   e Zd Z			ddd
d„Z		dddd„ZdS )ÚAttnProcessorNr%   TÚpe_attn_headú
int | NoneÚattn_backendrä   Úattn_mask_enabledr÷   c                 C  sF   |dkrt dƒsJ dƒ‚|dkr|rt dt¡ || _|| _|| _d S ©Nr  z Please install flash-attn first.r%   zzattn_mask_enabled=True with attn_backend='torch' can consume large GPU memory. Please switch attn_backend to 'flash_attn'.)r   ÚwarningsÚwarnÚUserWarningr  r  r  )rX   r  r  r  r?   r?   r@   rS   À  s   ý
zAttnProcessor.__init__Úattnrí   rq   r…   r†   r‡   r¾   útorch.FloatTensorc                 C  s@  |j d }| |¡}| |¡}| |¡}|j d }	|	|j }
| |d|j|
¡ dd¡}| |d|j|
¡ dd¡}| |d|j|
¡ dd¡}|jd urO| |¡}|jd urY| |¡}|d urÈ|\}}|d urk||d fnd\}}| j	d ur¼| j	}t
|d d …d |…d d …d d …f ||ƒ|d d …d |…d d …d d …f< t
|d d …d |…d d …d d …f ||ƒ|d d …d |…d d …d d …f< nt
|||ƒ}t
|||ƒ}| jdkr| jrï|d urï|}| d¡ d¡}| ||j|j d |j d ¡}nd }tj||||d	d
d}| dd¡ |d|j|
 ¡}nn| jdkry| dd¡}| dd¡}| dd¡}| jrf|d urft||ƒ\}}}}}t||ƒ\}}}}}t||ƒ\}}}}}t|||||||ƒ}t||||ƒ}| |d|j|
 ¡}nt|||d	d
d}| |d|j|
 ¡}| |j¡}|jd |ƒ}|jd |ƒ}|d urž| d¡}| | d	¡}|S )Nr   r   r   r   ç      ð¿©r   r   r%   éþÿÿÿrˆ   F©Ú	attn_maskÚ	dropout_pÚ	is_causalr  ©r'  Úcausal)rJ   rý   rþ   rÿ   rò   Úviewr¿   r   r  r  r   r  r  r-   ÚexpandrÉ   rú   Úreshaper  r  r  r  r(   r˜   r	  rŠ   )rX   r   rq   r†   r  Ú
batch_sizeÚqueryr:   Úvaluerê   Úhead_dimr“   Ú
xpos_scaleÚq_xpos_scaleÚk_xpos_scaleÚpnr&  ÚindicesÚq_cu_seqlensÚq_max_seqlen_in_batchr   Úk_cu_seqlensÚk_max_seqlen_in_batchr?   r?   r@   Ú__call__Ó  st   










DF ù	

zAttnProcessor.__call__)Nr%   T)r  r  r  rä   r  r÷   ©NN)r   rí   rq   r…   r†   r‡   r¾   r!  ©r_   r`   ra   rS   r;  r?   r?   r?   r@   r  ¿  s    üûr  c                   @  s2   e Zd Z		dddd„Z										dddd„Zd	S )ÚJointAttnProcessorr%   Tr  rä   r  r÷   c                 C  s@   |dkrt dƒsJ dƒ‚|dkr|rt dt¡ || _|| _d S r  )r   r  r  r  r  r  )rX   r  r  r?   r?   r@   rS   0  s   ý
zJointAttnProcessor.__init__Nr   rí   rq   r…   r  úfloat['b nt d']r†   r‡   r  r  r¾   r!  c                 C  sz  |}|}	|j d }
| |¡}| |¡}| |¡}| |¡}| |¡}| |¡}|j d }||j }| |
d|j|¡ 	dd¡}| |
d|j|¡ 	dd¡}| |
d|j|¡ 	dd¡}| |
d|j|¡ 	dd¡}| |
d|j|¡ 	dd¡}| |
d|j|¡ 	dd¡}|j
d ur‰| 
|¡}|jd ur“| |¡}|jd ur| |¡}|jd ur§| |¡}|d urÉ|\}}|d ur¹||d fnd\}}t|||ƒ}t|||ƒ}|d urë|\}}|d urÛ||d fnd\}}t|||ƒ}t|||ƒ}tj||gdd}tj||gdd}tj||gdd}| jr+|d ur+|d urtj||gdd}ntj|d|j d fdd	}| jd
krp| jrT|d urT|}| d¡ d¡}| |
|j|j d |j d ¡}nd }tj||||ddd}| 	dd¡ |
d|j| ¡}ns| jdkrã| 	dd¡}| 	dd¡}| 	dd¡}| jrÐ|d urÐ|j d }t||ƒ\}}}}}t||ƒ\}}}}}t||ƒ\}}}}}t|||||||ƒ}t|||
|ƒ}| |
d|j| ¡}nt|||ddd}| |
d|j| ¡}| |j¡}|d d …d |j d …f |d d …|j d d …f }}|jd |ƒ}|jd |ƒ}|j s| !|¡}|	d ur*| "|	 d¡ d¡}|d ur9| "| d¡ d¡}||fS )Nr   r   r   r   r"  r#  rj   T)r0  r%   r$  rˆ   Fr%  r  r)  )#rJ   rý   rþ   rÿ   r  r  r  rò   r+  r¿   r   r  r  r  r   r%   rn   r  rÉ   r,   r  r-   r,  rú   r-  r  r  r  r  r(   r˜   r	  rö   r  rŠ   )rX   r   rq   r  r†   r  r  r  rÀ   Ú
audio_maskr.  r/  r:   r0  Úc_queryÚc_keyÚc_valuerê   r1  r“   r2  r3  r4  r&  Útotal_seq_lenr6  r7  r8  r   r9  r:  r?   r?   r@   r;  A  sª   


















 
ù	þ


zJointAttnProcessor.__call__)r%   T)r  rä   r  r÷   r  )r   rí   rq   r…   r  r?  r†   r‡   r  r  r¾   r!  r=  r?   r?   r?   r@   r>  /  s    ýør>  c                      s4   e Zd Z						d
‡ fdd„	Zddd	„Z‡  ZS )ÚDiTBlockrá   çš™™™™™¹?Nr%   Tc
           
        s\   t ƒ  ¡  t|ƒ| _tt|||	d|||||d| _tj|ddd| _	t
|||dd| _d S )N)r  r  r  )rð   rf   rò   ró   ré   rø   Frª   rÏ   Útanh©rf   rè   ré   rã   )rR   rS   rÍ   Ú	attn_normrí   r  r   r   r¶   Úff_normrà   ræ   )
rX   rf   rò   ró   Úff_multré   rø   r  r  r  rZ   r?   r@   rS   Ä  s    

ýözDiTBlock.__init__c                 C  sˆ   | j ||d\}}}}}	| j|||d}
|| d¡|
  }|  |¡d|d d …d f   |d d …d f  }|  |¡}||	 d¡|  }|S )N©rt   )rq   r†   r  r   )rI  r   r-   rJ  ræ   )rX   rq   r”   r†   r  rD   rÚ   rÛ   rÜ   rÝ   Úattn_outputÚ	ff_outputr?   r?   r@   r]   ã  s   .
zDiTBlock.forward)rá   rF  NNr%   Tr<  r^   r?   r?   rZ   r@   rE  Ã  s    örE  c                      s<   e Zd ZdZ							d‡ fdd„	Z	dd	d
„Z‡  ZS )Ú
MMDiTBlocka  
    modified from diffusers/src/diffusers/models/attention.py

    notes.
    _c: context related. text, cond, etc. (left part in sd3 fig2.b)
    _x: noised input related. (right part)
    context_pre_only: last layer only do prenorm + modulation cuz no more ffn
    rá   rF  NFr%   c              
     s¼   t ƒ  ¡  |d u r|}|| _|rt|ƒnt|ƒ| _t|ƒ| _tt|	|
d|||||||d| _	|sDt
j|ddd| _t|||dd| _nd | _d | _t
j|ddd| _t|||dd| _d S )N)r  r  )rð   rf   rò   ró   ré   rô   rö   rø   Frª   rÏ   rG  rH  )rR   rS   rö   rÞ   rÍ   Úattn_norm_cÚattn_norm_xrí   r>  r   r   r¶   Ú	ff_norm_crà   Úff_cÚ	ff_norm_xÚff_x)rX   rf   rò   ró   rK  ré   rô   rö   rø   r  r  rZ   r?   r@   rS     s4   

þõzMMDiTBlock.__init__c                 C  s*  | j r
|  ||¡}n| j||d\}}	}
}}| j||d\}}}}}| j||||||d\}}| j r5d }n.||	 d¡|  }|  |¡d|d d …d f   |
d d …d f  }|  |¡}|| d¡|  }|| d¡|  }|  |¡d|d d …d f   |d d …d f  }|  |¡}|| d¡|  }||fS )NrL  )rq   r  r†   r  r  r  r   )	rö   rP  rQ  r   r-   rR  rS  rT  rU  )rX   rq   r  r”   r†   r  r  r  Únorm_cÚ
c_gate_msaÚc_shift_mlpÚc_scale_mlpÚ
c_gate_mlpÚnorm_xÚ
x_gate_msaÚx_shift_mlpÚx_scale_mlpÚ
x_gate_mlpÚx_attn_outputÚc_attn_outputÚc_ff_outputÚx_ff_outputr?   r?   r@   r]   ,  s    .
.
zMMDiTBlock.forward)rá   rF  NFNr%   F)NNNN)r_   r`   ra   Ú__doc__rS   r]   rb   r?   r?   rZ   r@   rO  ÷  s    õ,ÿrO  c                      s(   e Zd Zd‡ fdd„	Zd	dd„Z‡  ZS )
ÚTimestepEmbeddingr   c                   s<   t ƒ  ¡  t|ƒ| _t t ||¡t ¡ t ||¡¡| _d S re   )	rR   rS   rd   Ú
time_embedr   r   r·   rÒ   Útime_mlp)rX   rf   Úfreq_embed_dimrZ   r?   r@   rS   Q  s   

(zTimestepEmbedding.__init__Útimestepú
float['b']c                 C  s$   |   |¡}| |j¡}|  |¡}|S re   )rf  r(   r˜   rg  )rX   ri  Útime_hiddenÚtimer?   r?   r@   r]   V  s   

zTimestepEmbedding.forward)r   )ri  rj  r^   r?   r?   rZ   r@   re  P  s    re  )r   r	   r
   r   r   r   NF)r   r	   r
   r   r   )rŒ   r   )rf   rŽ   r   rŽ   r   r'   )r   )0rd  Ú
__future__r   rk   r  Útypingr   r%   Útorch.nn.functionalr   r+   rÉ   rF   Úlibrosa.filtersr   r$   Úx_transformers.x_transformersr   Úf5_tts.model.utilsr   r#   r*   rA   rL   ÚModulerM   rd   ru   r—   r¡   r£   r®   rÁ   rÍ   rÞ   rà   rí   r  r  r  Úflash_attn.bert_paddingr  r  r  r>  rE  rO  re  r?   r?   r?   r@   Ú<module>   sd    

÷/
ú- 
"Kp 4Y