o
    װi]                     @   s0  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dl	mZ d dlmZ zd dlZd dlmZ W n eyM   dZY nw ddd	Zejej Zd
d Zdd Zdd Zdd Zd(ddZdd Zdd Zdd Zdd Zdd Z dd Z!d d! Z"d"d# Z#d$d% Z$d&d' Z%dS ))    N)_accelerator)_util)_greedy_path)_optimal_path)_try_use_cutensornet)cutensorF)sum_ellipsisbroadcast_diagonalc                    sj   g }g }|D ] }| |r j|d  nd t fdd|D }| | q    ||dd  S )zTranspose and diagonal

    Args:
        a
        axeses (sequence of sequences of ints)

    Returns:
        ndarray: a with its axes permutated. A writeable view is returned
        whenever possible.
    r      c                 3   s    | ]} j | V  qd S N)strides.0axisa G/home/ubuntu/.local/lib/python3.10/site-packages/cupy/linalg/_einsum.py	<genexpr>/       z _transpose_ex.<locals>.<genexpr>T)appendshapesumview_set_shape_and_strides)r   axesesr   r   axesstrider   r   r   _transpose_ex   s   r   c                 C   sb   d}| D ]*}|t u r|d7 }qzt|}W n ty' } ztd|d }~ww |t| 7 }q|S )N @z=For this input type lists must contain either int or Ellipsis)Ellipsisoperatorindex	TypeErroreinsum_symbols)list_subscriptstr_subscriptser   r   r   _parse_int_subscript7   s    
r*   c                 C   s  t | dkr
tdt| d tr| d }t| dd }|D ]}|dv r&q|tvr0td| q|dd}d	|v r?td
d|v sGd|v rq|ddkpT|ddk}|d}|sbt |dkrftd|\}}|dd}n|}d}|ddd}t |t |krt |t |krdnd}t|d n1t| } g }g }t | dkr|	| 
d |	t| 
d t | dks| rt| d }nd}|||fS )a  Parse einsum operands.

    This function is based on `numpy.core.einsumfunc._parse_einsum_input`
    function in NumPy 1.14.

    Parameters
    ----------
    args : tuple
        The non-keyword arguments to einsum

    Returns
    -------
    input_strings : str
        Parsed input strings
    output_string : str
        Parsed output string
    operands : list of array_like
        The operands to use in the contraction

    Examples
    --------
    The operand list is simplified to reduce printing:

    >>> a = np.random.rand(4, 4)
    >>> b = np.random.rand(4, 4, 4)
    >>> _parse_einsum_input(('...a,...a->...', a, b))
    (['@a, @a'], 'xz', [a, b])

    >>> _parse_einsum_input((a, [Ellipsis, 0], b, [Ellipsis, 0]))
    (['@a, @a'], 'xz', [a, b])
    r   zmust specify the einstein sum subscripts string and at least one operand, or at least one operand and its corresponding subscripts listr
   Nz.,-> zTinvalid subscript '%s' in einstein sum subscripts string, subscripts must be lettersz...r    .zUeinstein sum subscripts string contains a '.' that is not part of an ellipsis ('...')->z->   zKeinstein sum subscript string does not contain proper '->' output specified r   ,morefewerzS operands provided to einstein sum function than specified in the subscripts string)len
ValueError
isinstancestrlistr%   replacecountsplitr   popr*   )args
subscriptsoperandsr(   invalidinput_subscriptsoutput_subscriptmsgr   r   r   _parse_einsum_inputG   sf   !

rC   c                 C   s   | dk rd|  S t | S )Nr   z...[%d])chr)labelr   r   r   _chr   s   rF   c           	      C   s  |  d}t|dkr5|\}|dur.t||kr.t||kr&td||f td||f dd |D S t|dkr{|\}}|durM|t|t|  }|d	k rZtd
|||f g }|dd |D  |t| d	 |dd |D  |S td|du rd d|  )a  Parse a subscript that may contain ellipsis

    Args:
        subscript (str): An einsum subscript of an operand or an output. '...'
            should be replaced by '@'.
        idx (int or None): For error messages, give int idx for the idx-th
            operand or None for the output.
        ndim (int, optional): ndim of the operand
        ellipsis_len (int, optional): number of broadcast dimensions of the
            output.

    Returns:
        list of ints: The parsed subscript

    r    r
   NzMeinstein sum subscripts string %s contains too many subscripts for operand %dzoperand %d has more dimensions than subscripts string %s given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.c                 S      g | ]}t |qS r   ordr   rE   r   r   r   
<listcomp>       z-_parse_ellipsis_subscript.<locals>.<listcomp>r.   r   zReinstein sum subscripts string %s...%s contains too many subscripts for operand %dc                 s       | ]}t |V  qd S r   rH   rJ   r   r   r   r          z,_parse_ellipsis_subscript.<locals>.<genexpr>c                 s   rM   r   rH   rJ   r   r   r   r      rN   zVeinstein sum subscripts string contains a '.' that is not part of an ellipsis ('...') zin the outputzfor operand %d)r:   r3   r4   extendrange)		subscriptidxndimellipsis_lensubssubleft_sub	right_subretr   r   r   _parse_ellipsis_subscript   sN   

rZ   c                    s  t t| D ]x}| | }||  tt|t|k r~i }t|D ]\}}||g | q t| }|D ]4\}}td rG fdd|D } fdd|D }t|dkrj|	 }	|	 }
t
d|t||	|
f q6t| \}}t|| |< t |||< qdS )	zGCompute diagonal for each operand

    This function mutates args.
    r	   c                    s   g | ]} j | d kr|qS )r
   r   r   arrr   r   rK      s    z%_einsum_diagonals.<locals>.<listcomp>c                    s   h | ]} j | qS r   r[   r   r\   r   r   	<setcomp>       z$_einsum_diagonals.<locals>.<setcomp>r.   zIdimensions in operand %d for collapsing index '%s' don't match (%d != %d)N)rP   r3   set	enumerate
setdefaultr   r7   itemsoptionsr;   r4   rF   zipr   )r@   r>   rR   rV   r   r   rE   r   dimsdim0dim1r   r\   r   _einsum_diagonals   s4   	ri   c                 c   sl    | D ]0}t dd |D sJ t|dkr3t|dd}|d |d fV  |dd D ]}d	|fV  q+qdS )
zDecompose path into binary path

    Args:
        path (sequence of tuples of ints)

    Yields:
        tuple of ints: pair (idx0, idx1) that represents the operation
            {pop(idx0); pop(idx1); append();}
    c                 s   s    | ]}|d kV  qdS r   Nr   )r   rR   r   r   r   r     rN   z#_iter_path_pairs.<locals>.<genexpr>r.   T)reverser   r
   N)allr3   sorted)pathindicesrR   r   r   r   _iter_path_pairs  s   rq   c                    sV   g }g }|D ]}| | | fdd|D  q |tdd |D |fS )zTranspose and flatten each

    Args:
        a
        axeses (sequence of sequences of ints)

    Returns:
        aT: a with its axes permutated and flatten
        shapes: flattened shapes
    c                    s   g | ]} j | qS r   r[   r   r   r   r   rK   (  r_   z&_flatten_transpose.<locals>.<listcomp>c                 S   s   g | ]	}t jj|qS r   )cupy_coreinternalprod)r   r   r   r   r   rK   +  s    )rO   r   	transposereshapetuple)r   r   transpose_axesshapesr   r   r   r   _flatten_transpose  s   

r{   c                 C   s:   t dsdS | |krdS | tjtjtjtjfvrdS dS )NcontractionFT)r   check_availabilityrr   float32float64	complex64
complex128)dtype0sub0dtype1sub1
batch_dimscontract_dimsr   r   r   _use_cutensor0  s   

r   c                    s>   i  t | | || D ]\}}| |< q fdd|D }|S )Nc                       g | ]} | qS r   r   )r   iextentr   r   rK   ?  rL   z"_get_out_shape.<locals>.<listcomp>)re   )shape0r   shape1r   sub_outsizer   	out_shaper   r   r   _get_out_shape;  s
   
r   c                 C   s^   t |}t | j}g }|D ]}||vr|| |d ||| qt| ||S )a  Return a reshaped and transposed array.

    The input array ``arr`` having ``mode`` as its modes is reshaped and
    transposed so that modes of the output becomes ``mode_out``.

    Example
        >>> import cupy
        >>> a = cupy.zeros((10, 20))
        >>> mode_a = ('A', 'B')
        >>> mode_out = ('B', 'C', 'A')
        >>> out = cupy.linalg.einsum._expand_dims_transpose(a, mode_a,
        ...                                                 mode_out)
        >>> out.shape
        (20, 1, 10)

    Args:
        arr (cupy.ndarray):
        mode (tuple or list): The modes of input array.
        mode_out (tuple or list): The modes of output array.

    Returns:
        cupy.ndarray: The reshaped and transposed array.

    r
   )r7   r   r   r#   rr   rv   rw   )r]   modemode_outr   r   r   r   r   r   _expand_dims_transposeC  s   


r   c                    sz  t  }t }t|t ksJ dt|tks J dt dks,tdkr4| |   fS t |}||@ }||@ }	||	 }
t |	|
\}}}t|	|
\}}} fdd|D }|fdd|D kslJ  fdd|D }fdd|D }|| | }t ||ksJ d	t|
dkrt|t|kr|}t|  |} t||}| | |fS t D ]P}|tjkrtd urt| j	 |j	|	|
rt|t|kr|}t
| j |j|}t|| j	}t| } t|}td
|  |d||}||f  S qt| |||g\}}t||||g\}}|d |d  |d  }|d |d ks0J t|||}||fS )Nz%operand 0 should be reduced: diagonalz%operand 1 should be reduced: diagonalr   c                    r   r   r   r   r   r   r   rK   x  rL   z)reduced_binary_einsum.<locals>.<listcomp>c                    r   r   r   r   r   r   r   rK   y  rL   c                    r   r   r   r   r   r   r   rK   z  rL   c                    r   r   r   r   r   r   r   rK   {  rL   z%operands should be reduced: unary sumg      ?g        r
   r.   )r`   r3   _make_transpose_axesr   r   get_routine_acceleratorsACCELERATOR_CUTENSORr   r   dtyper   r   rr   emptyascontiguousarrayr|   r{   matmulrw   )arr0r   arr1r   
sub_othersset0set1
set_otherssharedr   r   bs0cs0ts0bs1cs1ts1sub_bsub_lsub_rr   acceleratorr   arr_outtmp0shapes0tmp1shapes1
shapes_outr   )r   r   r   reduced_binary_einsumg  sj   


r   c                 C   sr   g }g }g }t | D ]#\}}||v r|||f q
||v r&|||f q
|||f q
t|t|t|fS r   )ra   r   _tuple_sorted_by_0)rV   b_dimsc_dimsbscstsr   rE   r   r   r   r     s   r   c                 C   s   t dd t| D S )Nc                 s   s    | ]\}}|V  qd S r   r   )r   _r   r   r   r   r     rN   z%_tuple_sorted_by_0.<locals>.<genexpr>)rx   rn   )zsr   r   r   r     s   r   c            !   	      s  t | i |}|dur|S t| \}}} t|tsJ t| ts"J |dd}i  |dd}|du r6d}|rBtdt|  |du rKtj|  n|dd	 | D } d
d	 t	t
|| D }i t	|D ]D\}}| | j}	t	|D ]6\}
}|	|
 }| v r| dkr||< qs|d| fvr| }tdt||||f qs||< qsqf|du rttj|fdd	ttD }nZtd sd|vrdv rtdt|dtdd  D d}ttj||D ]}|vrtdt| qt|tt|kr|D ]}||dkrtdt| qt||  t| dkrtdd | D rEtjtfdd|D dS tt| D ]O}| | }d|jv rg }g }t	|| D ]\}
}|j|
 dkrt||
 qa|| qa|||< tj|t|d| |< | | jt|| ksJ ~qKt| dk}t	|D ]@\}}t  |||< ttjtfddt	|D rd}fdd	t	|D ||< | | jd| |< q|rdd	 | D } n
 fd d	| D } t!t"d!}|du rttt| g}not|r"|d" d#kr"|dd }n\z&t|dkrAt|d t#t$frA||d"  }t#|d }n|| }d$}W n tt%fyZ   td%t&| w d&d	 |D }t|}||||}td'd |D r~t'(d(t)j* t+|D ];\}}| |}||}| |}||}tt|tj|}t,|||||\}}| | || ~~q| \}|\}g } |D ]}||v r| |-| q|.| /fd)d	|D }|s|j0ksJ |S )*a  einsum(subscripts, *operands, dtype=None, optimize=False)

    Evaluates the Einstein summation convention on the operands.
    Using the Einstein summation convention, many common multi-dimensional
    array operations can be represented in a simple fashion. This function
    provides a way to compute such summations.

    .. note::

       - Memory contiguity of the returned array is not always compatible with
         that of :func:`numpy.einsum`.
       - ``out``, ``order``, and ``casting`` options are not supported.
       - If :envvar:`CUPY_ACCELERATORS` includes ``cutensornet``, the `einsum`
         calculation will be performed by the cuTensorNet backend if possible.

           - The support of the ``optimize`` option is limited (currently, only
             `False`, 'cutensornet', or a custom path for pairwise contraction
             is supported, and the maximum intermediate size is ignored). If
             you need finer control for path optimization, consider replacing
             :func:`cupy.einsum` by :func:`cuquantum.contract` instead.
           - Requires `cuQuantum Python`_ (v22.03+).

       - If :envvar:`CUPY_ACCELERATORS` includes ``cutensor``, `einsum` will be
         accelerated by the cuTENSOR backend whenever possible.

    Args:
        subscripts (str): Specifies the subscripts for summation.
        operands (sequence of arrays): These are the arrays for the operation.
        dtype: If provided, forces the calculation to use the data type
            specified. Default is None.
        optimize: Valid options include {`False`, `True`, 'greedy', 'optimal'}.
            Controls if intermediate optimization should occur. No optimization
            will occur if `False`, and `True` will default to the 'greedy'
            algorithm. Also accepts an explicit contraction list from
            :func:`numpy.einsum_path`. Defaults to `False`. If a pair is
            supplied, the second argument is assumed to be the maximum
            intermediate size created.

    Returns:
        cupy.ndarray:
            The calculation based on the Einstein summation convention.

    .. seealso:: :func:`numpy.einsum`
    .. _cuQuantum Python: https://docs.nvidia.com/cuda/cuquantum/python/
    Nr   optimizeFTgreedyz+Did not understand the following kwargs: %sc                 S   s   g | ]}t |qS r   )rr   
asanyarrayr   r]   r   r   r   rK      s    zeinsum.<locals>.<listcomp>c                 S   s$   g | ]\}\}}t |||jd qS ))rS   )rZ   rS   )r   rR   rV   r]   r   r   r   rK     s    
r
   zJSize of label '%s' for operand %d (%d) does not match previous terms (%d).c                    s&   g | ]}|d k s  |dkr|qS )r   r
   )r9   rJ   )tmp_subscriptsr   r   rK      s
    r   r    rl   zoutput has more dimensions than subscripts given in einstein sum, but no '...' ellipsis provided to broadcast the extra dimensions.c                 s   s    | ]}|d k V  qdS rj   r   rJ   r   r   r   r   .  rN   zeinsum.<locals>.<genexpr>)rT   z^einstein sum subscripts string included output subscript '%s' which never appeared in an inputr.   zLeinstein sum subscripts string includes output subscript '%s' multiple timesc                 s   s    | ]}|j d kV  qdS rj   )r   r   r   r   r   r   D  r   c                 3   s    | ]} | V  qd S r   r   rJ   dimension_dictr   r   r   F  rN   )r   )r   c                 3   s     | ]\}}| vr|V  qd S r   r   r   r   rE   )other_subscriptsr   r   r   c  s    c                    s   g | ]
\}}| vr|qS r   r   r   )sum_axesr   r   rK   j  s
    )r   r   c                 S   s   g | ]}|  qS r   )r   r   r   r   r   r   rK   t  rL   c                    s"   g | ]}|j fd di qS )copyF)astyper   )casting_kwargsresult_dtyper   r   rK   v  s    )r   optimalr   einsum_pathl        z*Did not understand the path (optimize): %sc                 S   rG   r   )r`   )r   rV   r   r   r   rK     rL   c                 s   s    | ]	}t |d kV  qdS )r.   N)r3   )r   rp   r   r   r   r     s    z,memory efficient einsum is not supported yetc                    r   r   r   rJ   r   r   r   rK     s    )1r   rC   r5   r7   r;   r$   keysrr   result_typera   re   r   r4   rF   	itertoolschainfrom_iterablern   r`   rd   rZ   r   r3   r9   ri   anyzerosrx   rP   r   squeezerS   r   r   r   intfloatKeyErrorr6   warningswarnr   PerformanceWarningrq   r   r#   rv   rw   r   )!r>   kwargsoutr@   rA   r   r   rR   rV   shr   rE   dimdim_oldr]   squeeze_indicesreturns_viewoptimize_algorithmsro   algomemory_limit
input_sets
output_setidx0idx1r   r   r   r   r   r   r   ry   r   )r   r   r   r   r   r   r   einsum  sR  .











"








r   )NN)&r   r   r"   stringr   rr   
cupy._corer   r   cupy.linalg._einsum_optr   r   cupy.linalg._einsum_cutnr    cupy_backends.cuda.libs.cutensorcupy_backendscupyxr   ImportErrorrd   ascii_uppercaseascii_lowercaser%   r   r*   rC   rF   rZ   ri   rq   r{   r   r   r   r   r   r   r   r   r   r   r   <module>   sH    a
2"$?