o
    GÆÏid`  ã                	   @   s  d dl Z d dlZd dlmZ d dlZd dlmZ d dlm  mZ	 d dlm
Z
 ddlmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ e e ¡Z!d;dd„Z"d<dd„Z#d<dd„Z$d=dd„Z%d=dd„Z&	d>de
de
de
de'fdd „Z(G d!d"„ d"ej)ƒZ*G d#d$„ d$ej)ƒZ+G d%d&„ d&ej)ƒZ,G d'd(„ d(ej)ƒZ-G d)d*„ d*ej)ƒZ.G d+d,„ d,ej)ƒZ/G d-d.„ d.ƒZ0G d/d0„ d0ej)eƒZ1G d1d2„ d2ej)ƒZ2G d3d4„ d4ej)ƒZ3G d5d6„ d6ej)ƒZ4G d7d8„ d8ej)ƒZ5G d9d:„ d:eeeeeeƒZ6dS )?é    N)ÚAny)ÚTensoré   )ÚConfigMixinÚregister_to_config)ÚFromOriginalModelMixinÚPeftAdapterMixin)Úloggingé   )ÚAttentionMixinÚAttentionModuleMixin)Ú_CAN_USE_FLEX_ATTNÚdispatch_attention_fn)Ú
CacheMixin)ÚTransformer2DModelOutput)Ú
ModelMixinç     ˆÃ@c                 C   s,   t  t |¡ t jd| t jd |  ¡}|S )Nr   )ÚstartÚendÚdtype)ÚtorchÚexpÚmathÚlogÚarangeÚfloat32)ÚdimÚ
max_periodÚfreqs© r   úg/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/models/transformers/transformer_kandinsky.pyÚ	get_freqs'   s   (r!   Fc                 C   st   |r*d}t | |d||fdd} t ||d||fdd}|  dd¡} | dd¡}| |fS |  dd¡} | dd¡}| |fS )Né   é   ©r   r
   r   )Úlocal_patchingÚflatten)ÚxÚropeÚshapeÚ
block_maskÚ
pixel_sizer   r   r    Úfractal_flatten,   s   þr,   c                 C   sn   |r&d}| j | jd d|d g| jdd … ¢R Ž } t| |d||fdd} | S | j g |¢| jdd … ¢R Ž } | S )Nr"   r   éÿÿÿÿr
   r#   r$   )Úreshaper)   Úlocal_merge)r'   r)   r*   r+   r   r   r    Úfractal_unflatten9   s   *ÿr0   c                 C   sì   |\}}}}|\}}	}
| j g | jd |… ¢|| ‘|‘||	 ‘|	‘||
 ‘|
‘| j|d d … ¢R Ž } | jg tt| jd |… ƒƒ¢|‘|d ‘|d ‘|d ‘|d ‘|d ‘t|d t| jƒƒ¢R Ž } |  ||d ¡ |d |d ¡} | S )Nr   r
   é   r#   é   é   ©r.   r)   ÚpermuteÚrangeÚlenr&   ©r'   r)   Ú
group_sizer   Ú
batch_sizeÚdurationÚheightÚwidthÚg1Úg2Úg3r   r   r    r%   C   sL   
ÿþýüûúùø
ÿþýüûúùø 
r%   c                 C   sü   |\}}}}|\}}	}
| j g | jd |… ¢|| ‘||	 ‘||
 ‘|‘|	‘|
‘| j|d d … ¢R Ž } | jg tt| jd |… ƒƒ¢|‘|d ‘|d ‘|d ‘|d ‘|d ‘t|d t| jƒƒ¢R Ž } |  ||d ¡ |d |d ¡ |d |d ¡} | S )Nr
   r   r#   r1   r2   r3   r4   r8   r   r   r    r/   ^   sL   
ÿþýüûúùø
ÿþýüûúùø0
r/   çÍÌÌÌÌÌì?ÚqÚkÚstaÚthrc                 C   s2  t r	ddlm} ntdƒ‚|  dd¡ ¡ } | dd¡ ¡ }| j\}}}}|d }	|  |||	d|¡ d¡}
| |||	d|¡ d¡ dd¡}|
| }t	j
|t |¡ dd	}| d¡\}}| d¡}|d| k ¡ }| d| d¡¡}t	 ||¡}| d¡ t	j¡}|jdd
d t	j¡}|jt	 |¡|||dd dS )Nr   )Ú	BlockMaskz=Nabla attention is not supported with this version of PyTorchr#   r
   é@   éþÿÿÿr-   r$   T)r   Ú
descending)Ú
BLOCK_SIZEÚmask_mod)r   Ú!torch.nn.attention.flex_attentionrF   Ú
ValueErrorÚ	transposeÚ
contiguousr)   r.   Úmeanr   Úsoftmaxr   ÚsqrtÚsortÚcumsum_ÚintÚgatherÚargsortÚ
logical_orÚsumÚtoÚint32Úfrom_kv_blocksÚ
zeros_like)rB   rC   rD   rE   rF   ÚBÚhÚSÚDÚs1ÚqaÚkaÚmapÚvalsÚindsÚcvalsÚmaskÚkv_nbÚkv_indsr   r   r    Ú	nablaT_v2y   s&    
rl   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )ÚKandinsky5TimeEmbeddingsr   c                    sl   t ƒ  ¡  |d dksJ ‚|| _|| _t| jd | jƒ| _tj||dd| _t 	¡ | _
tj||dd| _d S )Nr
   r   T©Úbias)ÚsuperÚ__init__Ú	model_dimr   r!   r   ÚnnÚLinearÚin_layerÚSiLUÚ
activationÚ	out_layer)Úselfrr   Útime_dimr   ©Ú	__class__r   r    rq   ž   s   

z!Kandinsky5TimeEmbeddings.__init__c                 C   sX   t  | t j¡| jj|jd¡}t jt  |¡t  |¡gdd}|  	|  
|  |¡¡¡}|S )N)Údevicer-   r$   )r   ÚouterrZ   r   r   r}   ÚcatÚcosÚsinrx   rw   ru   )ry   ÚtimeÚargsÚ
time_embedr   r   r    Úforward¨   s    z Kandinsky5TimeEmbeddings.forward©r   ©Ú__name__Ú
__module__Ú__qualname__rq   r…   Ú__classcell__r   r   r{   r    rm      s    
rm   c                       ó$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )ÚKandinsky5TextEmbeddingsc                    s0   t ƒ  ¡  tj||dd| _tj|dd| _d S )NTrn   ©Úelementwise_affine)rp   rq   rs   rt   ru   Ú	LayerNormÚnorm)ry   Útext_dimrr   r{   r   r    rq   °   s   
z!Kandinsky5TextEmbeddings.__init__c                 C   s   |   |¡}|  |¡ |¡S ©N)ru   r‘   Útype_as)ry   Ú
text_embedr   r   r    r…   µ   s   
z Kandinsky5TextEmbeddings.forwardr‡   r   r   r{   r    r   ¯   ó    r   c                       rŒ   )ÚKandinsky5VisualEmbeddingsc                    s,   t ƒ  ¡  || _t t |¡| |¡| _d S r“   )rp   rq   Ú
patch_sizers   rt   r   Úprodru   )ry   Ú
visual_dimrr   r˜   r{   r   r    rq   »   s   
z#Kandinsky5VisualEmbeddings.__init__c              
   C   s~   |j \}}}}}| ||| jd  | jd || jd  | jd || jd  | jd |¡ dddddddd¡ dd¡}|  |¡S )	Nr   r#   r
   r   r2   r1   r3   é   )r)   Úviewr˜   r5   r&   ru   )ry   r'   r:   r;   r<   r=   r   r   r   r    r…   À   s   ø
ô
z"Kandinsky5VisualEmbeddings.forwardr‡   r   r   r{   r    r—   º   r–   r—   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )ÚKandinsky5RoPE1Dé   r   c                    sV   t ƒ  ¡  || _|| _|| _t|d |ƒ}tj||jd}| j	dt 
||¡dd d S )Nr
   ©r   rƒ   F©Ú
persistent)rp   rq   r   r   Úmax_posr!   r   r   r   Úregister_bufferr~   )ry   r   r¢   r   ÚfreqÚposr{   r   r    rq   Ô   s   
zKandinsky5RoPE1D.__init__c                 C   sb   | j | }t |¡}t |¡}tj|| ||gdd}|jg |jd d… ¢d‘d‘R Ž }| d¡S )Nr-   r$   r
   éüÿÿÿ)rƒ   r   r€   r   Ústackrœ   r)   Ú	unsqueeze)ry   r¥   rƒ   ÚcosineÚsiner(   r   r   r    r…   Ý   s   


"
zKandinsky5RoPE1D.forward)rž   r   r‡   r   r   r{   r    r   Ó   s    	r   c                       s(   e Zd Zd‡ fdd„	Zd	dd„Z‡  ZS )
ÚKandinsky5RoPE3D©é€   r­   r­   r   c           	         sx   t ƒ  ¡  || _|| _|| _tt||ƒƒD ]$\}\}}t|d |ƒ}tj	||j
d}| jd|› t ||¡dd qd S )Nr
   rŸ   Úargs_Fr    )rp   rq   Ú	axes_dimsr¢   r   Ú	enumerateÚzipr!   r   r   r   r£   r~   )	ry   r¯   r¢   r   ÚiÚaxes_dimÚ
ax_max_posr¤   r¥   r{   r   r    rq   ç   s   
 ýzKandinsky5RoPE3D.__init__©ç      ð?r¶   r¶   c              
   C   s  |\}}}}| j |d  |d  }| j|d  |d  }	| j|d  |d  }
tj| d|ddd¡ |d||d¡|	 dd|dd¡ ||d|d¡|
 ddd|d¡ |||dd¡gdd}t |¡}t |¡}tj	|| ||gdd}|jg |j
d d… ¢d‘d‘R Ž }| d¡S )Nr   r#   r
   r-   r$   r¦   )Úargs_0Úargs_1Úargs_2r   r   rœ   Úrepeatr€   r   r§   r)   r¨   )ry   r)   r¥   Úscale_factorr:   r;   r<   r=   Úargs_tÚargs_hÚargs_wrƒ   r©   rª   r(   r   r   r    r…   ò   s    ýú

"
zKandinsky5RoPE3D.forward)r¬   r   )rµ   r‡   r   r   r{   r    r«   æ   s    r«   c                       rŒ   )ÚKandinsky5Modulationc                    sF   t ƒ  ¡  t ¡ | _t ||| ¡| _| jjj 	¡  | jj
j 	¡  d S r“   )rp   rq   rs   rv   rw   rt   rx   ÚweightÚdataÚzero_ro   )ry   rz   rr   Ú
num_paramsr{   r   r    rq     s
   

zKandinsky5Modulation.__init__c                 C   s   |   |  |¡¡S r“   )rx   rw   ©ry   r'   r   r   r    r…     s   zKandinsky5Modulation.forwardr‡   r   r   r{   r    r¿     ó    r¿   c                   @   s&   e Zd ZdZdZdd„ Zddd„ZdS )ÚKandinsky5AttnProcessorNc                 C   s    t tdƒst| jj› dƒ‚d S )NÚscaled_dot_product_attentionz; requires PyTorch 2.0. Please upgrade your pytorch version.)ÚhasattrÚFÚImportErrorr|   rˆ   )ry   r   r   r    rq     s   
ÿz Kandinsky5AttnProcessor.__init__c                 C   s´  |  |¡}|d urJ| |¡}| |¡}|jd d… |jd d… }	}
|jg |	¢|j‘d‘R Ž }|jg |
¢|j‘d‘R Ž }|jg |
¢|j‘d‘R Ž }n8| |¡}| |¡}|jd d… }	|jg |	¢|j‘d‘R Ž }|jg |	¢|j‘d‘R Ž }|jg |	¢|j‘d‘R Ž }| | ¡ ¡ |¡}| 	| ¡ ¡ |¡}dd„ }|d ur®|||ƒ |¡}|||ƒ |¡}|d ur¿t
|||d |d d}nd }t||||| j| jd}| dd¡}| |¡}|S )	Nr-   c                 S   sR   | j g | jd d… ¢d‘d‘d‘R Ž  tj¡}|| jdd}|j | jŽ  tj¡S )Nr-   r#   r
   r$   )r.   r)   rZ   r   r   rY   Úbfloat16)r'   r(   Úx_Úx_outr   r   r    Úapply_rotary5  s   .z6Kandinsky5AttnProcessor.__call__.<locals>.apply_rotaryÚsta_maskÚP)rE   )Ú	attn_maskÚbackendÚparallel_configrH   )Úto_queryÚto_keyÚto_valuer)   r.   Ú	num_headsÚ
query_normÚfloatr”   Úkey_normrl   r   Ú_attention_backendÚ_parallel_configr&   rx   )ry   ÚattnÚhidden_statesÚencoder_hidden_statesÚ
rotary_embÚsparse_paramsÚqueryÚkeyÚvaluer)   Ú
cond_shaperÎ   rÑ   Úattn_outr   r   r    Ú__call__  sN   




üú	
z Kandinsky5AttnProcessor.__call__©NNN)rˆ   r‰   rŠ   rÛ   rÜ   rq   rç   r   r   r   r    rÆ     s
    rÆ   c                       sn   e Zd ZeZegZd‡ fdd„	Z			ddejdejdB dejdB de	ejejf dB dejf
d	d
„Z
‡  ZS )ÚKandinsky5AttentionNc                    s¢   t ƒ  ¡  || dksJ ‚|| | _tj||dd| _tj||dd| _tj||dd| _t |¡| _	t |¡| _
tj||dd| _|d u rJ|  ¡ }|  |¡ d S )Nr   Trn   )rp   rq   r×   rs   rt   rÔ   rÕ   rÖ   ÚRMSNormrØ   rÚ   rx   Ú_default_processor_clsÚset_processor)ry   Únum_channelsÚhead_dimÚ	processorr{   r   r    rq   ^  s   

zKandinsky5Attention.__init__rÞ   rß   rá   rà   Úreturnc                    s   t t | jj¡j ¡ ƒ‰ i ‰‡ ‡fdd„| ¡ D ƒ}t|ƒdkr/t	 
d|› d| jjj› d¡ ‡ fdd„| ¡ D ƒ}| j| |f|||d	œ|¤ŽS )
Nc                    s$   g | ]\}}|ˆ vr|ˆvr|‘qS r   r   )Ú.0rC   Ú_©Úattn_parametersÚquiet_attn_parametersr   r    Ú
<listcomp>x  s   $ z/Kandinsky5Attention.forward.<locals>.<listcomp>r   zattention_processor_kwargs z are not expected by z and will be ignored.c                    s   i | ]\}}|ˆ v r||“qS r   r   )rñ   rC   Úw)rô   r   r    Ú
<dictcomp>}  s    z/Kandinsky5Attention.forward.<locals>.<dictcomp>)rß   rá   rà   )ÚsetÚinspectÚ	signaturerï   rç   Ú
parametersÚkeysÚitemsr7   ÚloggerÚwarningr|   rˆ   )ry   rÞ   rß   rá   rà   ÚkwargsÚunused_kwargsr   ró   r    r…   n  s$   ÿþûúzKandinsky5Attention.forwardr“   rè   )rˆ   r‰   rŠ   rÆ   rë   Ú_available_processorsrq   r   r   Útupler…   r‹   r   r   r{   r    ré   X  s&    ÿûþýüûùré   c                       rŒ   )ÚKandinsky5FeedForwardc                    s<   t ƒ  ¡  tj||dd| _t ¡ | _tj||dd| _d S )NFrn   )rp   rq   rs   rt   ru   ÚGELUrw   rx   )ry   r   Úff_dimr{   r   r    rq   Š  s   

zKandinsky5FeedForward.__init__c                 C   s   |   |  |  |¡¡¡S r“   )rx   rw   ru   rÄ   r   r   r    r…     s   zKandinsky5FeedForward.forwardr‡   r   r   r{   r    r  ‰  s    r  c                       rŒ   )ÚKandinsky5OutLayerc                    sN   t ƒ  ¡  || _t||dƒ| _tj|dd| _tj|t	 
|¡| dd| _d S )Nr
   FrŽ   Trn   )rp   rq   r˜   r¿   Ú
modulationrs   r   r‘   rt   r   r™   rx   )ry   rr   rz   rš   r˜   r{   r   r    rq   •  s
   
 zKandinsky5OutLayer.__init__c                 C   sÚ   t j|  |¡jddddd\}}|  | ¡ ¡| ¡ d d …d d f d  | ¡ d d …d d f   |¡}|  |¡}|j\}}}	}
}| 	|||	|
d| j
d | j
d | j
d ¡ dddddd	d
d¡ dd¡ dd	¡ d	d¡}|S )Nr#   r$   r
   r-   r¶   r   r2   r3   r   r›   r1   )r   Úchunkr	  r¨   r‘   rÙ   r”   rx   r)   rœ   r˜   r5   r&   )ry   Úvisual_embedr•   r„   ÚshiftÚscaler'   r:   r;   r<   r=   rò   r   r   r    r…   œ  s,   "<þ
ø
òzKandinsky5OutLayer.forwardr‡   r   r   r{   r    r  ”  rÅ   r  c                       rŒ   )Ú!Kandinsky5TransformerEncoderBlockc                    sZ   t ƒ  ¡  t||dƒ| _tj|dd| _t||tƒ d| _	tj|dd| _
t||ƒ| _d S )Nr3   FrŽ   ©rï   )rp   rq   r¿   Útext_modulationrs   r   Úself_attention_normré   rÆ   Úself_attentionÚfeed_forward_normr  Úfeed_forward©ry   rr   rz   r  rî   r{   r   r    rq   º  s   
z*Kandinsky5TransformerEncoderBlock.__init__c           
      C   sö   t j|  |¡jddddd\}}t j|ddd\}}}|  | ¡ ¡| ¡ d  | ¡   |¡}	| j|	|d}	| ¡ | ¡ |	 ¡    |¡}t j|ddd\}}}|  | ¡ ¡| ¡ d  | ¡   |¡}	|  	|	¡}	| ¡ | ¡ |	 ¡    |¡}|S )Nr#   r$   r
   r-   r   r¶   )rà   )
r   r
  r  r¨   r  rÙ   r”   r  r  r  )
ry   r'   r„   r(   Úself_attn_paramsÚ	ff_paramsr  r  ÚgateÚoutr   r   r    r…   Ä  s   "((
z)Kandinsky5TransformerEncoderBlock.forwardr‡   r   r   r{   r    r  ¹  s    
r  c                       rŒ   )Ú!Kandinsky5TransformerDecoderBlockc                    s|   t ƒ  ¡  t||dƒ| _tj|dd| _t||tƒ d| _	tj|dd| _
t||tƒ d| _tj|dd| _t||ƒ| _d S )Né	   FrŽ   r  )rp   rq   r¿   Úvisual_modulationrs   r   r  ré   rÆ   r  Úcross_attention_normÚcross_attentionr  r  r  r  r{   r   r    rq   Ô  s   
z*Kandinsky5TransformerDecoderBlock.__init__c                 C   sd  t j|  |¡jddddd\}}}t j|ddd\}	}
}|  | ¡ ¡|
 ¡ d  |	 ¡   |¡}| j|||d}| ¡ | ¡ | ¡    |¡}t j|ddd\}	}
}|  | ¡ ¡|
 ¡ d  |	 ¡   |¡}| j	||d}| ¡ | ¡ | ¡    |¡}t j|ddd\}	}
}|  
| ¡ ¡|
 ¡ d  |	 ¡   |¡}|  |¡}| ¡ | ¡ | ¡    |¡}|S )Nr#   r$   r   r-   r¶   )rà   rá   )rß   )r   r
  r  r¨   r  rÙ   r”   r  r  r  r  r  )ry   r  r•   r„   r(   rá   r  Úcross_attn_paramsr  r  r  r  Ú
visual_outr   r   r    r…   á  s,   ÿ"ÿ"ÿ"ÿ
z)Kandinsky5TransformerDecoderBlock.forwardr‡   r   r   r{   r    r  Ó  s    r  c                       s   e Zd ZdZddgZg d¢ZdZe							
																	d-dede	de	de	de
dede
de
de
de	def‡ fdd„ƒZ	 		d.d!ejd"ejd#ejd$ejd%ee
e
e
f d&ejd'eeeef d(eeef dB d)e	d*eejB fd+d,„Z‡  ZS )/ÚKandinsky5Transformer3DModelz?
    A 3D Diffusion Transformer model for video-like data.
    r  r  )Útime_embeddingsr	  r  r  Tr1   é   é   é   ©r#   r
   r
   é   é   r
   é    ©é   é   r,  FÚregularNÚattention_typeÚattention_causalÚattention_localÚattention_globÚattention_windowÚattention_PÚattention_wTÚattention_wWÚattention_wHÚattention_add_staÚattention_methodc                    sì   t ƒ  ¡  t|ƒ‰|| _ˆ| _|| _|| _|| _|r d| d n|}tˆˆƒ| _	t
|ˆƒ| _t
|ˆƒ| _t|ˆ|ƒ| _tˆƒ| _t|ƒ| _t ‡ ‡‡‡fdd„t|	ƒD ƒ¡| _t ‡ ‡‡‡fdd„t|
ƒD ƒ¡| _tˆˆ||ƒ| _d| _d S )Nr
   r#   c                    ó   g | ]	}t ˆˆˆ ˆƒ‘qS r   )r  ©rñ   rò   ©r  rî   rr   rz   r   r    rö   C  s    z9Kandinsky5Transformer3DModel.__init__.<locals>.<listcomp>c                    r9  r   )r  r:  r;  r   r    rö   G  s    ÿÿF)rp   rq   rY   Úin_visual_dimrr   r˜   Úvisual_condr.  rm   r"  r   Útext_embeddingsÚpooled_text_embeddingsr—   Úvisual_embeddingsr   Útext_rope_embeddingsr«   Úvisual_rope_embeddingsrs   Ú
ModuleListr6   Útext_transformer_blocksÚvisual_transformer_blocksr  rx   Úgradient_checkpointing)ry   r<  Úin_text_dimÚin_text_dim2rz   Úout_visual_dimr˜   rr   r  Únum_text_blocksÚnum_visual_blocksr¯   r=  r.  r/  r0  r1  r2  r3  r4  r5  r6  r7  r8  Úvisual_embed_dimr{   r;  r    rq     s0   


ÿþÿ
z%Kandinsky5Transformer3DModel.__init__rµ   rÞ   rß   ÚtimestepÚpooled_projectionsÚvisual_rope_posÚtext_rope_posr»   rá   Úreturn_dictrð   c
              	   C   s<  |}
|}|}|}|   |¡}|  |¡}||  |¡ }|  |
¡}|  |¡}|jdd}| jD ]}t ¡ r>| j	r>|  
||||¡}q,||||ƒ}q,|jdd… }|  |||¡}|dur[|d nd}t||||d\}}| jD ]}t ¡ r~| j	r~|  
||||||¡}qj||||||ƒ}qjt|||d}|  |||¡}
|	s™|
S t|
dS )	a  
        Forward pass of the Kandinsky5 3D Transformer.

        Args:
            hidden_states (`torch.FloatTensor`): Input visual states
            encoder_hidden_states (`torch.FloatTensor`): Text embeddings
            timestep (`torch.Tensor` or `float` or `int`): Current timestep
            pooled_projections (`torch.FloatTensor`): Pooled text embeddings
            visual_rope_pos (`tuple[int, int, int]`): Position for visual RoPE
            text_rope_pos (`torch.LongTensor`): Position for text RoPE
            scale_factor (`tuple[float, float, float]`, optional): Scale factor for RoPE
            sparse_params (`dict[str, Any]`, optional): Parameters for sparse attention
            return_dict (`bool`, optional): Whether to return a dictionary

        Returns:
            [`~models.transformer_2d.Transformer2DModelOutput`] or `torch.FloatTensor`: The output of the transformer
        r   r$   Nr-   Ú
to_fractalF)r*   )Úsample)r>  r"  r?  r@  rA  r¨   rD  r   Úis_grad_enabledrF  Ú_gradient_checkpointing_funcr)   rB  r,   rE  r0   rx   r   )ry   rÞ   rß   rM  rN  rO  rP  r»   rá   rQ  r'   r•   r‚   Úpooled_text_embedr„   r  Ú	text_ropeÚtext_transformer_blockÚvisual_shapeÚvisual_roperR  Úvisual_transformer_blockr   r   r    r…   Q  sL   




ÿ
ú	
ÿ
z$Kandinsky5Transformer3DModel.forward)r1   r#  r$  r%  r1   r&  r'  r(  r
   r)  r*  Fr-  NNNNNNNNNN)rµ   NT)rˆ   r‰   rŠ   Ú__doc__Ú_repeated_blocksÚ_keep_in_fp32_modulesÚ _supports_gradient_checkpointingr   ÚstrÚboolrU   rÙ   rq   r   r   r  Ú
LongTensorÚdictr   r   ÚFloatTensorr…   r‹   r   r   r{   r    r!  þ  sœ    þèòñðïîíìëêéèGöþýüûúùø	÷
öõr!  r†   )F)r   )rA   )7rú   r   Útypingr   r   Útorch.nnrs   Útorch.nn.functionalÚ
functionalrÉ   r   Úconfiguration_utilsr   r   Úloadersr   r   Úutilsr	   Ú	attentionr   r   Úattention_dispatchr   r   Úcache_utilsr   Úmodeling_outputsr   Úmodeling_utilsr   Ú
get_loggerrˆ   rÿ   r!   r,   r0   r%   r/   rÙ   rl   ÚModulerm   r   r—   r   r«   r¿   rÆ   ré   r  r  r  r  r!  r   r   r   r    Ú<module>   sf   






üÿþý
ü$!E1%
+ú