o
    iC+                     @   s   d dl Z d dlZd dlmZmZmZmZ d dlmZ d dl	m
Z
 d dlmZmZ dedefdd	ZG d
d dZG dd deZdS )    N)AnyDictListOptional)EinopsError)_product)ParsedExpression	_ellipsisaxesreport_messagec                 C   s   t | dkrt|| d S )Nr   )lenr   format)r
   r    r   R/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/einops/layers/_einmix.py_report_axes
   s   r   c                	       s   e Zd Zddededee def fddZdededee defdd	Zd
ee dee	 dee dee	 fddZ
dd Zdd Z  ZS )_EinmixMixinNpatternweight_shape
bias_shapeaxes_lengthsc                    s8   t    || _|| _|| _|| _| j||||d dS )a	  
        EinMix - Einstein summation with automated tensor management and axis packing/unpacking.

        EinMix is a combination of einops and MLP, see tutorial:
        https://github.com/arogozhnikov/einops/blob/main/docs/3-einmix-layer.ipynb

        Imagine taking einsum with two arguments, one of each input, and one - tensor with weights
        >>> einsum('time batch channel_in, channel_in channel_out -> time batch channel_out', input, weight)

        This layer manages weights for you, syntax highlights a special role of weight matrix
        >>> EinMix('time batch channel_in -> time batch channel_out', weight_shape='channel_in channel_out')
        But otherwise it is the same einsum under the hood. Plus einops-rearrange.

        Simple linear layer with a bias term (you have one like that in your framework)
        >>> EinMix('t b cin -> t b cout', weight_shape='cin cout', bias_shape='cout', cin=10, cout=20)
        There is no restriction to mix the last axis. Let's mix along height
        >>> EinMix('h w c-> hout w c', weight_shape='h hout', bias_shape='hout', h=32, hout=32)
        Example of channel-wise multiplication (like one used in normalizations)
        >>> EinMix('t b c -> t b c', weight_shape='c', c=128)
        Multi-head linear layer (each head is own linear layer):
        >>> EinMix('t b (head cin) -> t b (head cout)', weight_shape='head cin cout', ...)

        ... and yes, you need to specify all dimensions of weight shape/bias shape in parameters.

        Use cases:
        - when channel dimension is not last, use EinMix, not transposition
        - patch/segment embeddings
        - when need only within-group connections to reduce number of weights and computations
        - next-gen MLPs (follow tutorial link above to learn more!)
        - in general, any time you want to combine linear layer and einops.rearrange

        Uniform He initialization is applied to weight tensor.
        This accounts for the number of elements mixed and produced.

        Parameters
        :param pattern: transformation pattern, left side - dimensions of input, right side - dimensions of output
        :param weight_shape: axes of weight. A tensor of this shape is created, stored, and optimized in a layer
               If bias_shape is not specified, bias is not created.
        :param bias_shape: axes of bias added to output. Weights of this shape are created and stored. If `None` (the default), no bias is added.
        :param axes_lengths: dimensions of weight tensor
        )r   r   r   r   N)super__init__r   r   r   r   initialize_einmix)selfr   r   r   r   	__class__r   r   r      s   
*
z_EinmixMixin.__init__c                    s  | d\}}t|}t|t|}ttjh |j|jd |jr*td|js0jrG|jr6js=td| |jrGtd| t	dd ||fD rWtdd	|v s_d
|v rftd| d }	d }
d }t	dd |j
D rg |j
D ]}|7 q{dd D d}| d| }	fdd  D }
t	dd j
D sjrg j
D ]}|7 qdd D d}| d| }| |	|
|i  |jD ]}| vrtd| dqttt h |j|jd tt|jh |jjd t|jdkrtjddd  fdd|j
D }t fdd|j
D }|d urt|ts<td t|}tt|jjd! tt|jt d" g }d#}j
D ];}|tkrn|rltd$q^|D ]'}|tkr|r~td$qp||jv r| |  d%}qp|d& qpq^nd }d'| d( }d&| d( }| |||| h |jj|j}t|v r|t t|}d)d ttj|D d*t< d+tffd,d-}|| d.|| d| | _d S )/Nz->z7Unrecognized identifiers on the right side of EinMix {}zKEllipsis is not supported in weight, as its shape should be fully specifiedz,Ellipsis in EinMix should be on both sides, z3Ellipsis on left side can't be in parenthesis, got c                 s   s    | ]}|j V  qd S N)has_non_unitary_anonymous_axes).0xr   r   r   	<genexpr>S   s    z1_EinmixMixin.initialize_einmix.<locals>.<genexpr>z2Anonymous axes (numbers) are not allowed in EinMix()z,Parenthesis is not allowed in weight shape: c                 s       | ]	}t |d kV  qdS    Nr   r   groupr   r   r   r    [       c                 S      g | ]
}|t kr
|nd qS ...r	   r   namer   r   r   
<listcomp>_       z2_EinmixMixin.initialize_einmix.<locals>.<listcomp> z-> c                    s   i | ]\}}| v r||qS r   r   )r   r/   length)namesr   r   
<dictcomp>b   s    z2_EinmixMixin.initialize_einmix.<locals>.<dictcomp>c                 s   r#   r$   r&   r'   r   r   r   r    d   r)   c                 S   r*   r+   r-   r.   r   r   r   r0   h   r1   z ->z
Dimension z of weight should be specifiedzAxes {} are not used in patternzWeight axes {} are redundantr   zCEinMix: weight has no dimensions (means multiplication by a number)   )
stacklevelc                    s   g | ]\} | qS r   r   r   axis)r   r   r   r0   {       c                    s    g | ]\}|j vr | qS r   )identifiersr8   )r   rightr   r   r0   }   s     zAbias shape should be string specifying which axes bias depends onz"Bias axes {} not present in outputz#Sizes not provided for bias axes {}Fz:all bias dimensions should go after ellipsis in the outputTr%      g      ?c                 S   s   i | ]\}}||qS r   r   )r   letterkr   r   r   r5      r:   r,   r
   c                    sT   g }| j D ]}t|tr| fdd|D  q|tksJ |d qd|S )Nc                    s   g | ]} | qS r   r   r8   mapping2lettersr   r   r0      s    zO_EinmixMixin.initialize_einmix.<locals>.write_flat_remapped.<locals>.<listcomp>r,    )composition
isinstancelistextendr	   appendjoin)r
   resultcomposed_axisr@   r   r   write_flat_remapped   s   


z;_EinmixMixin.initialize_einmix.<locals>.write_flat_remapped,)splitr   r   set
differencer;   has_ellipsisr   has_ellipsis_parenthesizedanyrC   rH   items_create_rearrange_layersr   warningswarnr   rD   strr	   rG   _create_parametersremovesortedzipstringascii_lowercaseeinsum_pattern)r   r   r   r   r   left_patternright_patternleftweightpre_reshape_patternpre_reshape_lengthspost_reshape_patternr(   rC   r9   _weight_shape_fan_inbias_bias_shapeused_non_trivial_sizer
   weight_bound
bias_boundmapped_identifiersrK   r   )r   rA   r4   r<   r   r   C   s   












z_EinmixMixin.initialize_einmixrc   rd   re   post_reshape_lengthsc                 C      t d)N.Should be defined in framework implementationsNotImplementedErrorr   rc   rd   re   rn   r   r   r   rT      s   z%_EinmixMixin._create_rearrange_layersc                 C   ro   )zShape and implementationsrp   rq   r   r   rk   r   rl   r   r   r   rX      s   z_EinmixMixin._create_parametersc                 C   st   t | j}|d| j d7 }| jd ur|d| j d7 }| j D ]\}}|d| d| 7 }q!| jj d| dS )Nz, ''z, =r!   r"   )reprr   r   r   r   rS   r   __name__)r   paramsr9   r3   r   r   r   __repr__   s   

z_EinmixMixin.__repr__r   )rx   
__module____qualname__rW   r   r   r   dictr   r   rT   rX   rz   __classcell__r   r   r   r   r      s    $3x
	r   c                	   @   sB   e Zd ZdZdee dee dee dee fddZdd	 Zd
S )_EinmixDebuggerzUsed only to test mixinrc   rd   re   rn   c                 C   s   || _ || _|| _|| _d S r   )rc   rd   re   rn   rs   r   r   r   rT      s   
z(_EinmixDebugger._create_rearrange_layersc                 C   s   || _ || _d S r   )saved_weight_shapesaved_bias_shapert   r   r   r   rX      s   
z"_EinmixDebugger._create_parametersN)	rx   r{   r|   __doc__r   rW   r   rT   rX   r   r   r   r   r      s    
r   )r\   rU   typingr   r   r   r   einopsr   einops.einopsr   einops.parsingr   r	   rN   rW   r   r   r   r   r   r   r   <module>   s     D