o
    X۷iw]                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d dlmZ ddd	Zejej Zd
d Zdd Zdd Zdd Zd&ddZdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Z d$d% Z!dS )'    )annotationsN)_accelerator)_util)_greedy_path)_optimal_path)_try_use_cutensornetF)sum_ellipsisbroadcast_diagonalc                   sj   g }g }|D ] }| |r j|d  nd t fdd|D }| | q    ||dd  S )zTranspose and diagonal

    Args:
        a
        axeses (sequence of sequences of ints)

    Returns:
        ndarray: a with its axes permutated. A writeable view is returned
        whenever possible.
    r      c                 3  s    | ]} j | V  qd S N)strides.0axisa I/home/ubuntu/vllm_env/lib/python3.10/site-packages/cupy/linalg/_einsum.py	<genexpr>*       z _transpose_ex.<locals>.<genexpr>T)appendshapesumview_set_shape_and_strides)r   axesesr   r   axesstrider   r   r   _transpose_ex   s   r   c                 C  sb   d}| D ]*}|t u r|d7 }qzt|}W n ty' } ztd|d }~ww |t| 7 }q|S )N @z=For this input type lists must contain either int or Ellipsis)Ellipsisoperatorindex	TypeErroreinsum_symbols)list_subscriptstr_subscriptser   r   r   _parse_int_subscript2   s    
r*   c                 C  s  t | dkr
tdt| d tr| d }t| dd }|D ]}|dv r&q|tvr0td| q|dd}d	|v r?td
d|v sGd|v rq|ddkpT|ddk}|d}|sbt |dkrftd|\}}|dd}n|}d}|ddd}t |t |krt |t |krdnd}t|d dd |D }n8t| } g }g }t | dkr|	| 
d |	t| 
d t | dks| rt| d }nd}dd |D }|||fS )a  Parse einsum operands.

    This function is based on `numpy.core.einsumfunc._parse_einsum_input`
    function in NumPy 1.14.

    Parameters
    ----------
    args : tuple
        The non-keyword arguments to einsum

    Returns
    -------
    input_strings : str
        Parsed input strings
    output_string : str
        Parsed output string
    operands : list of array_like
        The operands to use in the contraction

    Examples
    --------
    The operand list is simplified to reduce printing:

    >>> a = np.random.rand(4, 4)
    >>> b = np.random.rand(4, 4, 4)
    >>> _parse_einsum_input(('...a,...a->...', a, b))
    (['@a, @a'], 'xz', [a, b])

    >>> _parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0]))
    (['@a, @a'], 'xz', [a, b])
    r   zmust specify the einstein sum subscripts string and at least one operand, or at least one operand and its corresponding subscripts listr
   Nz.,-> zTinvalid subscript '%s' in einstein sum subscripts string, subscripts must be lettersz...r    .zUeinstein sum subscripts string contains a '.' that is not part of an ellipsis ('...')->z->   zKeinstein sum subscript string does not contain proper '->' output specified r   ,morefewerzS operands provided to einstein sum function than specified in the subscripts stringc                 S     g | ]}t |qS r   cupyasarrayr   opr   r   r   
<listcomp>       z'_parse_einsum_input.<locals>.<listcomp>c                 S  r3   r   r4   r7   r   r   r   r9      r:   )len
ValueError
isinstancestrlistr%   replacecountsplitr   popr*   )args
subscriptsoperandsr(   invalidinput_subscriptsoutput_subscriptmsgr   r   r   _parse_einsum_inputB   sh   !

rK   c                 C  s   | dk rd|  S t | S )Nr   z...[%d])chr)labelr   r   r   _chr   s   rN   c           	      C  s  |  d}t|dkr5|\}|dur.t||kr.t||kr&td||f td||f dd |D S t|dkr{|\}}|durM|t|t|  }|d	k rZtd
|||f g }|dd |D  |t| d	 |dd |D  |S td|du rd d|  )a  Parse a subscript that may contain ellipsis

    Args:
        subscript (str): An einsum subscript of an operand or an output. '...'
            should be replaced by '@'.
        idx (int or None): For error messages, give int idx for the idx-th
            operand or None for the output.
        ndim (int, optional): ndim of the operand
        ellipsis_len (int, optional): number of broadcast dimensions of the
            output.

    Returns:
        list of ints: The parsed subscript

    r    r
   NzMeinstein sum subscripts string %s contains too many subscripts for operand %dzoperand %d has more dimensions than subscripts string %s given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.c                 S     g | ]}t |qS r   ordr   rM   r   r   r   r9          z-_parse_ellipsis_subscript.<locals>.<listcomp>r.   r   zReinstein sum subscripts string %s...%s contains too many subscripts for operand %dc                 s      | ]}t |V  qd S r   rP   rR   r   r   r   r          z,_parse_ellipsis_subscript.<locals>.<genexpr>c                 s  rT   r   rP   rR   r   r   r   r      rU   zVeinstein sum subscripts string contains a '.' that is not part of an ellipsis ('...') zin the outputzfor operand %d)rB   r;   r<   extendrange)		subscriptidxndimellipsis_lensubssubleft_sub	right_subretr   r   r   _parse_ellipsis_subscript   sN   

ra   c                   s  t t| D ]x}| | }||  tt|t|k r~i }t|D ]\}}||g | q t| }|D ]4\}}td rG fdd|D } fdd|D }t|dkrj|	 }	|	 }
t
d|t||	|
f q6t| \}}t|| |< t |||< qdS )	zGCompute diagonal for each operand

    This function mutates args.
    r	   c                   s   g | ]} j | d kr|qS )r
   r   r   arrr   r   r9      s    z%_einsum_diagonals.<locals>.<listcomp>c                   s   h | ]} j | qS r   rb   r   rc   r   r   	<setcomp>   r:   z$_einsum_diagonals.<locals>.<setcomp>r.   zIdimensions in operand %d for collapsing index '%s' don't match (%d != %d)N)rW   r;   set	enumerate
setdefaultr   r?   itemsoptionsrC   r<   rN   zipr   )rH   rF   rY   r]   r   r   rM   r   dimsdim0dim1r   rc   r   _einsum_diagonals   s4   	ro   c                 c  sl    | D ]0}t dd |D sJ t|dkr3t|dd}|d |d fV  |dd D ]}d	|fV  q+qdS )
zDecompose path into binary path

    Args:
        path (sequence of tuples of ints)

    Yields:
        tuple of ints: pair (idx0, idx1) that represents the operation
            {pop(idx0); pop(idx1); append();}
    c                 s  s    | ]}|d kV  qdS r   Nr   )r   rY   r   r   r   r     rU   z#_iter_path_pairs.<locals>.<genexpr>r.   T)reverser   r
   N)allr;   sorted)pathindicesrY   r   r   r   _iter_path_pairs  s   rw   c                   sV   g }g }|D ]}| | | fdd|D  q |tdd |D |fS )zTranspose and flatten each

    Args:
        a
        axeses (sequence of sequences of ints)

    Returns:
        aT: a with its axes permutated and flatten
        shapes: flattened shapes
    c                   s   g | ]} j | qS r   rb   r   r   r   r   r9   (  r:   z&_flatten_transpose.<locals>.<listcomp>c                 S  s   g | ]	}t jj|qS r   )r5   _coreinternalprod)r   r   r   r   r   r9   +  s    )rV   r   	transposereshapetuple)r   r   transpose_axesshapesr   r   r   r   _flatten_transpose  s   

r   c                   s>   i  t | | || D ]\}}| |< q fdd|D }|S )Nc                      g | ]} | qS r   r   )r   iextentr   r   r9   4  rS   z"_get_out_shape.<locals>.<listcomp>)rk   )shape0sub0shape1sub1sub_outsizer   	out_shaper   r   r   _get_out_shape0  s
   
r   c                 C  s^   t |}t | j}g }|D ]}||vr|| |d ||| qt| ||S )a  Return a reshaped and transposed array.

    The input array ``arr`` having ``mode`` as its modes is reshaped and
    transposed so that modes of the output becomes ``mode_out``.

    Example
        >>> import cupy
        >>> a = cupy.zeros((10, 20))
        >>> mode_a = ('A', 'B')
        >>> mode_out = ('B', 'C', 'A')
        >>> out = cupy.linalg.einsum._expand_dims_transpose(a, mode_a,
        ...                                                 mode_out)
        >>> out.shape
        (20, 1, 10)

    Args:
        arr (cupy.ndarray):
        mode (tuple or list): The modes of input array.
        mode_out (tuple or list): The modes of output array.

    Returns:
        cupy.ndarray: The reshaped and transposed array.

    r
   )r?   r   r   r#   r5   r{   r|   )rd   modemode_outr   r   r   r   r   r   _expand_dims_transpose8  s   


r   c                   s  t  }t }t|t ksJ dt|tks J dt dks,tdkr4| |   fS t |}||@ }||@ }	||	 }
t |	|
\}}}t|	|
\}}} fdd|D }|fdd|D kslJ  fdd|D }fdd|D }|| | }t ||ksJ d	t|
dkrt|t|kr|}t|  |} t||}| | |fS t D ]h}|tjkrqzdd l}dd
lm	} W n	 t
y   Y qw | j|jks| jtjtjtjtjfvrqt|t|kr|}t| j |j|}t|| j}t| } t|}|d|  |d||}||f  S t| |||g\}}t||||g\}}|d |d  |d  }|d |d ksHJ t|||}||fS )Nz%operand 0 should be reduced: diagonalz%operand 1 should be reduced: diagonalr   c                   r   r   r   r   r   r   r   r9   m  rS   z)reduced_binary_einsum.<locals>.<listcomp>c                   r   r   r   r   r   r   r   r9   n  rS   c                   r   r   r   r   r   r   r   r9   o  rS   c                   r   r   r   r   r   r   r   r9   p  rS   z%operands should be reduced: unary sum)cutensorg      ?g        r
   r.   )rf   r;   _make_transpose_axesr   r   get_routine_acceleratorsACCELERATOR_CUTENSOR cupy_backends.cuda.libs.cutensorcupyxr   ImportErrordtyper5   float32float64	complex64
complex128r   r   emptyascontiguousarraycontractionr   matmulr|   )arr0r   arr1r   
sub_othersset0set1
set_othersshared
batch_dimscontract_dimsbs0cs0ts0bs1cs1ts1sub_bsub_lsub_rr   acceleratorcupy_backendsr   r   arr_outtmp0shapes0tmp1shapes1
shapes_outr   )r   r   r   reduced_binary_einsum\  sx   


r   c                 C  sr   g }g }g }t | D ]#\}}||v r|||f q
||v r&|||f q
|||f q
t|t|t|fS r   )rg   r   _tuple_sorted_by_0)r]   b_dimsc_dimsbscstsr   rM   r   r   r   r     s   r   c                 C  s   t dd t| D S )Nc                 s  s    | ]\}}|V  qd S r   r   )r   _r   r   r   r   r     rU   z%_tuple_sorted_by_0.<locals>.<genexpr>)r}   rt   )zsr   r   r   r     s   r   c            !   	     s  t | i |}|dur|S t| \}}} t|tsJ t| ts"J |dd}i  |dd}|du r6d}|rBtdt|  |du rKtj|  n|dd	 | D } d
d	 t	t
|| D }i t	|D ]D\}}| | j}	t	|D ]6\}
}|	|
 }| v r| dkr||< qs|d| fvr| }tdt||||f qs||< qsqf|du rttj|fdd	ttD }nZtd sd|vrdv rtdt|dtdd  D d}ttj||D ]}|vrtdt| qt|tt|kr|D ]}||dkrtdt| qt||  t| dkrtdd | D rEtjtfdd|D dS tt| D ]O}| | }d|jv rg }g }t	|| D ]\}
}|j|
 dkrt||
 qa|| qa|||< tj|t|d| |< | | jt|| ksJ ~qKt| dk}t	|D ]@\}}t  |||< ttjtfddt	|D rd}fdd	t	|D ||< | | jd| |< q|rdd	 | D } n
 fd d	| D } t!t"d!}|du rttt| g}not|r"|d" d#kr"|dd }n\z&t|dkrAt|d t#t$frA||d"  }t#|d }n|| }d$}W n tt%fyZ   td%t&| w d&d	 |D }t|}||||}td'd |D r~t'(d(t)j* t+|D ];\}}| |}||}| |}||}tt|tj|}t,|||||\}}| | || ~~q| \}|\}g } |D ]}||v r| |-| q|.| /fd)d	|D }|s|j0ksJ |S )*a  einsum(subscripts, *operands, dtype=None, optimize=False)

    Evaluates the Einstein summation convention on the operands.
    Using the Einstein summation convention, many common multi-dimensional
    array operations can be represented in a simple fashion. This function
    provides a way to compute such summations.

    .. note::

       - Memory contiguity of the returned array is not always compatible with
         that of :func:`numpy.einsum`.
       - ``out``, ``order``, and ``casting`` options are not supported.
       - If :envvar:`CUPY_ACCELERATORS` includes ``cutensornet``, the `einsum`
         calculation will be performed by the cuTensorNet backend if possible.

           - The support of the ``optimize`` option is limited (currently, only
             `False`, 'cutensornet', or a custom path for pairwise contraction
             is supported, and the maximum intermediate size is ignored). If
             you need finer control for path optimization, consider replacing
             :func:`cupy.einsum` by :func:`cuquantum.contract` instead.
           - Requires `cuQuantum Python`_ (v22.03+).

       - If :envvar:`CUPY_ACCELERATORS` includes ``cutensor``, `einsum` will be
         accelerated by the cuTENSOR backend whenever possible.

    Args:
        subscripts (str): Specifies the subscripts for summation.
        operands (sequence of arrays): These are the arrays for the operation.
        dtype: If provided, forces the calculation to use the data type
            specified. Default is None.
        optimize: Valid options include {`False`, `True`, 'greedy', 'optimal'}.
            Controls if intermediate optimization should occur. No optimization
            will occur if `False`, and `True` will default to the 'greedy'
            algorithm. Also accepts an explicit contraction list from
            :func:`numpy.einsum_path`. Defaults to `False`. If a pair is
            supplied, the second argument is assumed to be the maximum
            intermediate size created.

    Returns:
        cupy.ndarray:
            The calculation based on the Einstein summation convention.

    .. seealso:: :func:`numpy.einsum`
    .. _cuQuantum Python: https://docs.nvidia.com/cuda/cuquantum/python/
    Nr   optimizeFTgreedyz+Did not understand the following kwargs: %sc                 S  r3   r   )r5   
asanyarrayr   rd   r   r   r   r9     s    zeinsum.<locals>.<listcomp>c                 S  s$   g | ]\}\}}t |||jd qS ))rZ   )ra   rZ   )r   rY   r]   rd   r   r   r   r9     s    
r
   zJSize of label '%s' for operand %d (%d) does not match previous terms (%d).c                   s&   g | ]}|d k s  |dkr|qS )r   r
   )rA   rR   )tmp_subscriptsr   r   r9     s
    r   r    rr   zoutput has more dimensions than subscripts given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.c                 s  s    | ]}|d k V  qdS rp   r   rR   r   r   r   r   -  rU   zeinsum.<locals>.<genexpr>)r[   z^einstein sum subscripts string included output subscript '%s' which never appeared in an inputr.   zLeinstein sum subscripts string includes output subscript '%s' multiple timesc                 s  s    | ]}|j d kV  qdS rp   )r   r   r   r   r   r   C  r   c                 3  s    | ]} | V  qd S r   r   rR   dimension_dictr   r   r   E  rU   )r   )r   c                 3  s     | ]\}}| vr|V  qd S r   r   r   r   rM   )other_subscriptsr   r   r   b  s    c                   s   g | ]
\}}| vr|qS r   r   r   )sum_axesr   r   r9   i  s
    )r   r   c                 S  s   g | ]}|  qS r   )r   r   r   r   r   r   r9   s  rS   c                   s"   g | ]}|j fd di qS )copyF)astyper   )casting_kwargsresult_dtyper   r   r9   u  s    )r   optimalr   einsum_pathl        z*Did not understand the path (optimize): %sc                 S  rO   r   )rf   )r   r]   r   r   r   r9     rS   c                 s  s    | ]	}t |d kV  qdS )r.   N)r;   )r   rv   r   r   r   r     s    z,memory efficient einsum is not supported yetc                   r   r   r   rR   r   r   r   r9     s    )1r   rK   r=   r?   rC   r$   keysr5   result_typerg   rk   r   r<   rN   	itertoolschainfrom_iterablert   rf   rj   ra   r   r;   rA   ro   anyzerosr}   rW   r   squeezerZ   r   r   r   intfloatKeyErrorr>   warningswarnr   PerformanceWarningrw   r   r#   r{   r|   r   )!rF   kwargsoutrH   rI   r   r   rY   r]   shr   rM   dimdim_oldrd   squeeze_indicesreturns_viewoptimize_algorithmsru   algomemory_limit
input_sets
output_setidx0idx1r   r   r   r   r   r   r   r~   r   )r   r   r   r   r   r   r   einsum  sR  .











"








r   )NN)"
__future__r   r   r   r"   stringr   r5   
cupy._corer   r   cupy.linalg._einsum_optr   r   cupy.linalg._einsum_cutnr   rj   ascii_uppercaseascii_lowercaser%   r   r*   rK   rN   ra   ro   rw   r   r   r   r   r   r   r   r   r   r   r   <module>   s<    f
2"$I