o
    پi@                     @   s   d dl Z d dlZd dlZd dl mZ d dlmZ dd Zdd
dZdddZd ddZ	dd Z
			d!dejdedededef
ddZ		d"dejdededefddZdS )#    N)nn)_calculate_fan_in_and_fan_outc                 C   s   dd }||d|  k s||d|  krt jddd ||| | }||| | }| d| d d| d  |   | |td  | | | j||d | S )	Nc                 S   s   dt | t d  d S )N      ?       @)matherfsqrt)x r
   K/home/ubuntu/.local/lib/python3.10/site-packages/timm/layers/weight_init.pynorm_cdf   s   z _trunc_normal_.<locals>.norm_cdf   zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.)
stacklevel   r   )minmax)	warningswarnuniform_erfinv_mul_r   r   add_clamp_)tensormeanstdabr   lur
   r
   r   _trunc_normal_   s    
r            r          r   c                 C   s<   t   t| ||||W  d   S 1 sw   Y  dS )a  Fills the input Tensor with values drawn from a truncated
    normal distribution. The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are
    applied while sampling the normal with mean/std applied, therefore a, b args
    should be adjusted to match the range of mean, std args.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    N)torchno_gradr    r   r   r   r   r   r
   r
   r   trunc_normal_+   s   
$r&   c                 C   sN   t   t| dd|| | || W d   | S 1 s w   Y  | S )a  Fills the input Tensor with values drawn from a truncated
    normal distribution. The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the
    bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0
    and the result is subsequently scaled and shifted by the mean and std args.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    r   r   N)r#   r$   r    r   r   r%   r
   r
   r   trunc_normal_tf_F   s   

r'   fan_innormalc           	      C   s  t | \}}|dkr|}n|dkr|}n
|dkr|| d }|| }|dkr3t| t|d d d S |dkrWt  | jt|d W d    d S 1 sPw   Y  d S |d	krtd
| }t  | | | W d    d S 1 syw   Y  d S td| )Nr(   fan_outfan_avgr   truncated_normalg۶%?r   r)   uniform   zinvalid distribution )	r   r'   r   r   r#   r$   normal_r   
ValueError)	r   scalemodedistributionr(   r*   denomvarianceboundr
   r
   r   variance_scaling_c   s(   
"
"r8   c                 C   s   t | ddd d S )Nr(   r,   )r3   r4   )r8   )r   r
   r
   r   lecun_normal_|   s   r9   {Gz?headmodulename	init_bias	head_biasclassifier_namec                 C   s   t | tjtjtjtjfrE||r#tj| j	 tj
| j| d S tjj| j	dd t | tjrA| jd urCtj
| j| d S d S d S t| drP|   d S d S )Nr:   r-   init_weights)
isinstancer   LinearConv1dConv2dConv3d
startswithinitzeros_weight	constant_biasr&   hasattrrA   )r<   r=   r>   r?   r@   r
   r
   r   init_weight_vit   s   

rN   c                 C   s   t | tjr@||rtj| j tj| j| d S tj	| j | jd ur>d|v r5tjj
| jddntj| j d S d S t | tjr[t| j | jd urYtj| j d S d S t| drf|   d S d S )Nmlpgư>r-   rA   )rB   r   rC   rG   rH   rI   rJ   rK   rL   xavier_uniform_r0   rE   r9   rM   rA   )r<   r=   r?   r@   r
   r
   r   init_weight_jax   s$   

& 


rQ   )r!   r   r"   r   )r   r(   r)   )r:   r!   r;   )r!   r;   )r#   r   r   r   torch.nn.initr   r    r&   r'   r8   r9   ModulestrfloatrN   rQ   r
   r
   r
   r   <module>   sF    
#


