o
    ir2                     @   sD  d dl Z d dlZd dlmZmZ d dlZd dlmZ d dlmZ d dlmZ e	e
e
e
f Ze	eef Zeeee
e
f f Zee	eee
f  Ze	edf Ze jG dd dZd	ed
edefddZded	ed
edefddZde	edf de	e
df de	edf fddZde	edf de	edf fddZde	e
df dee
ee	edf  f deeef fddZdddee	e
df ge	e
df f dee	edf ge	edf f ddfdd Zd3d!d"Zd3d#d$Zd3d%d&Zd3d'd(Z	d4d)dd*dd+ed ddfd,d-Zd.ee
 d/ee
 d0eee e
   dee
ee	edf  f fd1d2Z!dS )5    N)CallableOptional)_array)_chunk)_modes.c                   @   s2   e Zd ZU ee ed< ee ed< ee ed< dS )	_Blockingi_partitionsj_partitionsk_partitionsN)__name__
__module____qualname__listint__annotations__ r   r   \/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/cupyx/distributed/array/_linalg.pyr      s   
 	r   location_map_alocation_map_breturnc                 C   s   g }g }g }dd }|   D ]\}}||| ||| q|  D ]\}}||| ||| q!dd }	|	|}|	|}|	|}dd }
|   D ]\}}|
|| |
|| qH|  D ]\}}|
|| |
|| q[t|||S )Nc                 S   s2   | \}}}|dkrt d|| || d S )N   z"Step other than 1 is not supported)RuntimeErrorappend)indices
partitionsstartstopstepr   r   r   add_to_partitions6   s
   

z)_find_blocking.<locals>.add_to_partitionsc                 S   sX   t | dkr
td|   | d g}t| | dd  D ]\}}||kr)|| q|S )Nr   zArray has no chunkr   )lenr   sortzipr   )r   resxyr   r   r   to_unique_sortedG   s   

z(_find_blocking.<locals>.to_unique_sortedc                 S   s.   | \}}}| |d | |krtdd S )Nr   zInconsistent index mapping)indexr   )r   r   r   r   _r   r   r   check_indicesX   s   
z%_find_blocking.<locals>.check_indices)keysr   )r   r   r   r	   r
   r   	i_indices	k_indices	j_indicesr%   r(   r   r   r   _find_blocking-   s,   	



r-   blockingc                 C   s   | j }| j}| j}g }t||dd  D ]V}t||dd  D ]J}t||dd  D ]>}	|d |	d f}
|	d |d f}t||
  }t||  }||@ }|r_| }||
||f q*td|
 d| qq|S )Nr   r   zAThere is no device that can perform multiplication between block z and )	r   r	   r
   r!   setr)   popr   r   )r.   r   r   r   r	   r
   plani_rangej_rangek_rangeblock_ablock_b	devices_a	devices_bintersectiondevr   r   r   _make_execution_planh   s2   r<   slicesshapec                 C   s,   t | t |ks
J tdd t| |D S )Nc                 s   s    | ]
\}}| |V  qd S N)r   ).0slengthr   r   r   	<genexpr>   s    z%_convert_to_tuples.<locals>.<genexpr>)r   tupler!   )r=   r>   r   r   r   _convert_to_tuples   s   rE   tuplesc                 C   s   t dd | D S )Nc                 s   s    | ]}t | V  qd S r?   slice)r@   tr   r   r   rC      s    z%_convert_to_slices.<locals>.<genexpr>)rD   )rF   r   r   r   _convert_to_slices   s   rJ   	index_mapc                 C   s|   i }|  D ]5\}}t|D ],\}}t|| }|d d |dd  }}	tt|	}	||i }
|
|	i }|||< qq|S )N)items	enumeraterE   typingcast	_BlockIdx
setdefault)r>   rK   location_mapsr;   idxschunk_iidx
idx_tuples	batch_idx	block_idxlocation_maplocationr   r   r   _group_by_batch   s   

	r\   arr_array.DistributedArrayf_shapef_idxc                    sl   dt jdt jf fddi }| j D ]\}}fdd|D ||< q| j}t|| j|| j| j	S )Nchunkr   c                    sH   | j | j j} | j} fdd| jD }t|| j||| jS )Nc                    s   g | ]
\}}| |fqS r   r   )r@   datarV   )r`   r   r   
<listcomp>   s    z>_reshape_array_with.<locals>.reshape_chunk.<locals>.<listcomp>)	arrayreshaper>   r&   updatesr   _Chunkready
prevent_gc)ra   rb   r&   rf   )r`   r_   r   r   reshape_chunk   s   
z*_reshape_array_with.<locals>.reshape_chunkc                    s   g | ]} |qS r   r   )r@   ra   )rj   r   r   rc      s    z'_reshape_array_with.<locals>.<listcomp>)
r   rg   _chunks_maprM   r>   r   DistributedArraydtype_mode_comms)r]   r_   r`   
chunks_mapr;   chunksr>   r   )r`   r_   rj   r   _reshape_array_with   s   
rr   c                 C      t | dd dd S )Nc                 S   s   d|  S Nr/   r   r>   r   r   r   <lambda>       z'_prepend_one_to_shape.<locals>.<lambda>c                 S   s   t d f|  S r?   rG   rV   r   r   r   rv          rr   r]   r   r   r   _prepend_one_to_shape   
   r|   c                 C   rs   )Nc                 S   s   | d S rt   r   ru   r   r   r   rv      rw   z&_append_one_to_shape.<locals>.<lambda>c                 S   s   | t d f S r?   rG   rx   r   r   r   rv      ry   rz   r{   r   r   r   _append_one_to_shape   r}   r~   c                 C   &   | j d dks	J t| dd dd S )Nr   c                 S      | d d S Nr   r   ru   r   r   r   rv          z!_pop_from_shape.<locals>.<lambda>c                 S   r   r   r   rx   r   r   r   rv      r   r>   rr   r{   r   r   r   _pop_from_shape      r   c                 C   r   )Nr   r   c                 S      | dd  S Nr   r   ru   r   r   r   rv      r   z'_pop_front_from_shape.<locals>.<lambda>c                 S   r   r   r   rx   r   r   r   rv      r   r   r{   r   r   r   _pop_front_from_shape   r   r   aboutc           "   
   K   s  |durt ddD ]}||v rt d| dq
t| tjr%t|tjs)t d| tj} |tj}d }}| jdkrDd	}t| } |jdkrOd	}t	|}| j
d
d \}}|j
d
d \}	}
||	kss| j
dd
 |j
dd
 krwtdt| j
| j}t|j
|j}| | krt ddd | jD }d}| D ]}|| }|| }t||}t|||}t|}|D ]y\}}}|| }|| }| j| ||  }|j| ||  }|tj |tj |t|d  t|d  f }| 3}||j tjjj|j|jfi |}tj||  |||fd}|| !| |j"}W d   n	1 s.w   Y  qq| j
dd
 ||
f } t| ||tj#| j$}!|rRt%|!}!|rYt&|!}!|!S )ak  Matrix multiplication between distributed arrays.

    The arguments must have compatible :attr:`~DistributedArray.shape` and
    :attr:`~DistributedArray.index_map`.

    This operation converts its operands into the replica mode, and compute
    their product in the sum mode.

    Args:
        a, b: Input distributed arrays.
        out (optional): A location into which the result is stored. This option
            is currently not supported.
    Returns:
        The matrix product of the inputs.

    Example:
        >>> A = distributed_array(
        ...     cupy.arange(6).reshape(2, 3),
        ...     make_2d_index_map([0, 2], [0, 1, 3],
        ...                       [[{0}, {1, 2}]]))
        >>> B = distributed_array(
        ...     cupy.arange(12).reshape(3, 4),
        ...     make_2d_index_map([0, 1, 3], [0, 2, 4],
        ...                       [[{0}, {0}],
        ...                        [{1}, {2}]]))
        >>> C = A @ B
        >>> C.mode
        'sum'
        >>> C.all_chunks()
        {0: [array([[0, 0],
                    [0, 3]]),
             array([[0, 0],
                    [6, 9]])],
         1: [array([[20, 23],
                    [56, 65]])],
         2: [array([[26, 29],
                    [74, 83]])]}
        >>> C
        array([[20, 23, 26, 29],
               [56, 68, 80, 92]])

    .. seealso:: :obj:`numpy.matmul`
    NzArgument `out` is not supported)subokaxesaxisz
Argument `z` is not supportedzHMixing a distributed array with a non-distributed array is not supportedFr   TrL   zShapes are incompatiblezMismatched batch shapesc                 S   s   i | ]}|g qS r   r   )r@   r;   r   r   r   
<dictcomp>,  s    zmatmul.<locals>.<dictcomp>r   )ri   )'r   
isinstancer   rl   _to_op_moder   REPLICAndimr|   r~   r>   
ValueErrorr\   rK   r)   devicesr-   r<   rJ   rk   flushrH   on_ready
wait_eventrh   cupylinalg_productmatmulrd   r   rg   recordr   rm   SUMro   r   r   )"r   r   r   kwargsparamone_prependedone_appendednmm2plocation_maps_alocation_maps_brp   rm   rX   r   r   r.   r2   index_prefixr6   r7   r;   loc_aloc_bchunk_achunk_br&   streamchunk_ab_arraychunk_abr>   r"   r   r   r   r      s   /


$


r   r   r	   r   c                 C   s  | d dksJ t t| | ksJ |d dksJ t t||ks$J i }t|t| d ks2J tt|D ]K}t|| t|d ksHJ tt|| D ]2}| | }| |d  }|| }||d  }	t||t||	f}
|| | D ]}||g |
 qvqPq8|S )a  Create an ``index_map`` for a 2D matrix with a specified blocking.

    Args:
        i_partitions (list of ints): boundaries of blocks on the `i` axis
        j_partitions (list of ints): boundaries of blocks on the `j` axis
        devices (2D list of sets of ints): devices owning each block

    Returns:
        dict from int to array indices: index_map
            Indices for the chunks that devices with designated IDs are going
            to own.

    Example:
        >>> index_map = make_2d_index_map(
        ...     [0, 2, 4], [0, 3, 5],
        ...     [[{0}, {1}],
        ...      [{2}, {0, 1}]])
        >>> pprint(index_map)
        {0: [(slice(0, 2, None), slice(0, 3, None)),
             (slice(2, 4, None), slice(3, 5, None))],
         1: [(slice(0, 2, None), slice(3, 5, None)),
             (slice(2, 4, None), slice(3, 5, None))],
         2: [(slice(2, 4, None), slice(0, 3, None))]}
    r   r   )sortedr0   r   rangerH   rR   r   )r   r	   r   rK   iji_starti_stopj_startj_stoprV   r;   r   r   r   make_2d_index_mapX  s&   r   )r   r^   r?   )"dataclassesrO   r   r   r   cupyx.distributed.arrayr   r   r   rD   r   _SliceIndicesrQ   dict_BlockLocationMapr   _ExecutionPlan	_BatchIdx	dataclassr   r-   r<   rH   rE   rJ   r\   rr   r|   r~   r   r   r   r0   r   r   r   r   r   <module>   s    
;
#
















{