o
    wiu+                     @   s   d dl mZmZmZmZ d dlmZ d dlmZm	Z	 d dl
Z
d dlZddlmZ dedefd	d
ZG dd dZG dd deZdS )    )AnyListOptionalDict)EinopsError)ParsedExpression	_ellipsisN   )_productaxesreport_messagec                 C   s   t | dkrt|| d S )Nr   )lenr   format)r   r    r   R/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/einops/layers/_einmix.py_report_axes
   s   r   c                	       s   e Zd Zddededee def fddZdededee defdd	Zd
ee dee	 dee dee	 fddZ
dd Zdd Z  ZS )_EinmixMixinNpatternweight_shape
bias_shapeaxes_lengthsc                    s8   t    || _|| _|| _|| _| j||||d dS )a	  
        EinMix - Einstein summation with automated tensor management and axis packing/unpacking.

        EinMix is a combination of einops and MLP, see tutorial:
        https://github.com/arogozhnikov/einops/blob/main/docs/3-einmix-layer.ipynb

        Imagine taking einsum with two arguments, one of each input, and one - tensor with weights
        >>> einsum('time batch channel_in, channel_in channel_out -> time batch channel_out', input, weight)

        This layer manages weights for you, syntax highlights a special role of weight matrix
        >>> EinMix('time batch channel_in -> time batch channel_out', weight_shape='channel_in channel_out')
        But otherwise it is the same einsum under the hood. Plus einops-rearrange.

        Simple linear layer with a bias term (you have one like that in your framework)
        >>> EinMix('t b cin -> t b cout', weight_shape='cin cout', bias_shape='cout', cin=10, cout=20)
        There is no restriction to mix the last axis. Let's mix along height
        >>> EinMix('h w c-> hout w c', weight_shape='h hout', bias_shape='hout', h=32, hout=32)
        Example of channel-wise multiplication (like one used in normalizations)
        >>> EinMix('t b c -> t b c', weight_shape='c', c=128)
        Multi-head linear layer (each head is own linear layer):
        >>> EinMix('t b (head cin) -> t b (head cout)', weight_shape='head cin cout', ...)

        ... and yes, you need to specify all dimensions of weight shape/bias shape in parameters.

        Use cases:
        - when channel dimension is not last, use EinMix, not transposition
        - patch/segment embeddings
        - when need only within-group connections to reduce number of weights and computations
        - next-gen MLPs (follow tutorial link above to learn more!)
        - in general, any time you want to combine linear layer and einops.rearrange

        Uniform He initialization is applied to weight tensor.
        This accounts for the number of elements mixed and produced.

        Parameters
        :param pattern: transformation pattern, left side - dimensions of input, right side - dimensions of output
        :param weight_shape: axes of weight. A tensor of this shape is created, stored, and optimized in a layer
               If bias_shape is not specified, bias is not created.
        :param bias_shape: axes of bias added to output. Weights of this shape are created and stored. If `None` (the default), no bias is added.
        :param axes_lengths: dimensions of weight tensor
        )r   r   r   r   N)super__init__r   r   r   r   initialize_einmix)selfr   r   r   r   	__class__r   r   r      s   
*
z_EinmixMixin.__init__c                    s  | d\}}t|}t|t|}ttjh |j|jd |jr*td|js0jrG|jr6js=td| |jrGtd| t	dd ||fD rWtdd	|v s_d
|v rftd| d }	d }
d }t	dd |j
D rg |j
D ]}|7 q{dd D d}| d| }	fdd  D }
t	dd j
D sjrg j
D ]}|7 qdd D d}| d| }| |	|
|i  |jD ]}| vrtd|qttt h |j|jd tt|jh |jjd t|jdkrtd  fdd|j
D }t fdd|j
D }|d urt|ts9tdt|}tt|jjd tt|jt d g }d }j
D ];}|tkrk|ritd!q[|D ]'}|tkr}|r{td!qm||jv r| |  d"}qm|d# qmq[nd }d$| d% }d#| d% }| |||| h |jj|j}t|v r|t tt|}d&d ttj|D d't< d(tffd)d*}d+|||||| _d S ),Nz->z7Unrecognized identifiers on the right side of EinMix {}zKEllipsis is not supported in weight, as its shape should be fully specifiedz,Ellipsis in EinMix should be on both sides, z3Ellipsis on left side can't be in parenthesis, got c                 s   s    | ]}|j V  qd S N)has_non_unitary_anonymous_axes).0xr   r   r   	<genexpr>S   s    z1_EinmixMixin.initialize_einmix.<locals>.<genexpr>z2Anonymous axes (numbers) are not allowed in EinMix()z,Parenthesis is not allowed in weight shape: c                 s       | ]	}t |d kV  qdS    Nr   r   groupr   r   r   r!   [       c                 S      g | ]
}|t kr
|nd qS ...r   r   namer   r   r   
<listcomp>_       z2_EinmixMixin.initialize_einmix.<locals>.<listcomp> z-> c                    s   i | ]\}}| v r||qS r   r   )r   r0   length)namesr   r   
<dictcomp>b   s    z2_EinmixMixin.initialize_einmix.<locals>.<dictcomp>c                 s   r$   r%   r'   r(   r   r   r   r!   d   r*   c                 S   r+   r,   r.   r/   r   r   r   r1   h   r2   z ->z*Dimension {} of weight should be specifiedzAxes {} are not used in patternzWeight axes {} are redundantr   zCEinMix: weight has no dimensions (means multiplication by a number)c                    s   g | ]\} | qS r   r   r   axis)r   r   r   r1   {       c                    s    g | ]\}|j vr | qS r   )identifiersr7   )r   rightr   r   r1   }   s     zAbias shape should be string specifying which axes bias depends onz"Bias axes {} not present in outputz#Sizes not provided for bias axes {}Fz:all bias dimensions should go after ellipsis in the outputTr&      g      ?c                 S   s   i | ]\}}||qS r   r   )r   letterkr   r   r   r6      r9   r-   r   c                    sT   g }| j D ]}t|tr| fdd|D  q|tksJ |d qd|S )Nc                    s   g | ]} | qS r   r   r7   mapping2lettersr   r   r1      s    zO_EinmixMixin.initialize_einmix.<locals>.write_flat_remapped.<locals>.<listcomp>r-    )composition
isinstancelistextendr   appendjoin)r   resultcomposed_axisr?   r   r   write_flat_remapped   s   


z;_EinmixMixin.initialize_einmix.<locals>.write_flat_remappedz	{},{}->{})splitr   r   set
differencer:   has_ellipsisr   has_ellipsis_parenthesizedanyrB   rG   items_create_rearrange_layersr   r   warningswarnr
   rC   strr   rF   _create_parametersremoverD   sortedzipstringascii_lowercaseeinsum_pattern)r   r   r   r   r   left_patternright_patternleftweightpre_reshape_patternpre_reshape_lengthspost_reshape_patternr)   rB   r8   _weight_shape_fan_inbias_bias_shapeused_non_trivial_sizer   weight_bound
bias_boundmapped_identifiersrJ   r   )r   r@   r5   r;   r   r   C   s   















z_EinmixMixin.initialize_einmixra   rb   rc   post_reshape_lengthsc                 C      t d)N.Should be defined in framework implementationsNotImplementedErrorr   ra   rb   rc   rl   r   r   r   rR      s   z%_EinmixMixin._create_rearrange_layersc                 C   rm   )zShape and implementationsrn   ro   r   r   ri   r   rj   r   r   r   rV      s   z_EinmixMixin._create_parametersc                 C   sl   t | j}|d| j d7 }| jd ur|d| j d7 }| j D ]\}}|d||7 }q!d| jj|S )Nz, ''z, {}={}z{}({}))	reprr   r   r   r   rQ   r   r   __name__)r   paramsr8   r4   r   r   r   __repr__   s   

z_EinmixMixin.__repr__r   )ru   
__module____qualname__rU   r   r   r   dictr   r   rR   rV   rw   __classcell__r   r   r   r   r      s    $3z
	r   c                	   @   sB   e Zd ZdZdee dee dee dee fddZdd	 Zd
S )_EinmixDebuggerzUsed only to test mixinra   rb   rc   rl   c                 C   s   || _ || _|| _|| _d S r   )ra   rb   rc   rl   rq   r   r   r   rR      s   
z(_EinmixDebugger._create_rearrange_layersc                 C   s   || _ || _d S r   )saved_weight_shapesaved_bias_shaperr   r   r   r   rV      s   
z"_EinmixDebugger._create_parametersN)	ru   rx   ry   __doc__r   rU   r   rR   rV   r   r   r   r   r|      s    
r|   )typingr   r   r   r   einopsr   einops.parsingr   r   rS   rZ   r
   rL   rU   r   r   r|   r   r   r   r   <module>   s     F