o
    X۷i~2                     @  s4  d dl mZ d dlmZ d dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
 eeeef Zeeef Zeeeeef f Zeeeeef  Zeedf ZejG dd	 d	Zd@ddZdAddZdBddZdCddZdDd!d"ZdEd)d*ZdFd+d,ZdFd-d.ZdFd/d0ZdFd1d2Z	dGdHd7d8Z dId>d?Z!dS )J    )annotations)CallableN)_array)_chunk)_modes.c                   @  s&   e Zd ZU ded< ded< ded< dS )	_Blocking	list[int]i_partitionsj_partitionsk_partitionsN)__name__
__module____qualname____annotations__ r   r   U/home/ubuntu/vllm_env/lib/python3.10/site-packages/cupyx/distributed/array/_linalg.pyr   !   s   
 	r   location_map_a_BlockLocationMaplocation_map_breturnc                 C  s   g }g }g }dd }|   D ]\}}||| ||| q|  D ]\}}||| ||| q!dd }	|	|}|	|}|	|}dd }
|   D ]\}}|
|| |
|| qH|  D ]\}}|
|| |
|| q[t|||S )Nc                 S  s2   | \}}}|dkrt d|| || d S )N   z"Step other than 1 is not supported)RuntimeErrorappend)indices
partitionsstartstopstepr   r   r   add_to_partitions8   s
   

z)_find_blocking.<locals>.add_to_partitionsc                 S  sX   t | dkr
td|   | d g}t| | dd  D ]\}}||kr)|| q|S )Nr   zArray has no chunkr   )lenr   sortzipr   )r   resxyr   r   r   to_unique_sortedI   s   

z(_find_blocking.<locals>.to_unique_sortedc                 S  s.   | \}}}| |d | |krtdd S )Nr   zInconsistent index mapping)indexr   )r   r   r   r   _r   r   r   check_indicesZ   s   
z%_find_blocking.<locals>.check_indices)keysr   )r   r   r	   r
   r   r   	i_indices	k_indices	j_indicesr%   r(   r   r   r   _find_blocking/   s,   	



r-   blocking_ExecutionPlanc                 C  s   | j }| j}| j}g }t||dd  D ]V}t||dd  D ]J}t||dd  D ]>}	|d |	d f}
|	d |d f}t||
  }t||  }||@ }|r_| }||
||f q*td|
 d| qq|S )Nr   r   zAThere is no device that can perform multiplication between block z and )	r	   r
   r   r!   setr)   popr   r   )r.   r   r   r	   r
   r   plani_rangej_rangek_rangeblock_ablock_b	devices_a	devices_bintersectiondevr   r   r   _make_execution_planj   s2   r=   slicestuple[slice, ...]shapetuple[int, ...]tuple[_SliceIndices, ...]c                 C  s,   t | t |ks
J tdd t| |D S )Nc                 s  s    | ]
\}}| |V  qd S N)r   ).0slengthr   r   r   	<genexpr>   s    z%_convert_to_tuples.<locals>.<genexpr>)r   tupler!   )r>   r@   r   r   r   _convert_to_tuples   s   rI   tuplesc                 C  s   t dd | D S )Nc                 s  s    | ]}t | V  qd S rC   slice)rD   tr   r   r   rG      s    z%_convert_to_slices.<locals>.<genexpr>)rH   )rJ   r   r   r   _convert_to_slices   s   rN   	index_map"dict[int, list[tuple[slice, ...]]]"dict[_BatchIdx, _BlockLocationMap]c                 C  s|   i }|  D ]5\}}t|D ],\}}t|| }|d d |dd  }}	tt|	}	||i }
|
|	i }|||< qq|S )N)items	enumeraterI   typingcast	_BlockIdx
setdefault)r@   rO   location_mapsr<   idxschunk_iidx
idx_tuples	batch_idx	block_idxlocation_maplocationr   r   r   _group_by_batch   s   

	rb   arr_array.DistributedArrayf_shape,Callable[[tuple[int, ...]], tuple[int, ...]]f_idx0Callable[[tuple[slice, ...]], tuple[slice, ...]]c                   s`   d fddi }| j  D ]\}}fdd|D ||< q| j}t|| j|| j| jS )	Nchunk_chunk._Chunkr   c                   sH   | j | j j} | j} fdd| jD }t|| j||| jS )Nc                   s   g | ]
\}}| |fqS r   r   )rD   datar\   )rg   r   r   
<listcomp>   s    z>_reshape_array_with.<locals>.reshape_chunk.<locals>.<listcomp>)	arrayreshaper@   r&   updatesr   _Chunkready
prevent_gc)ri   rk   r&   ro   )rg   re   r   r   reshape_chunk   s   
z*_reshape_array_with.<locals>.reshape_chunkc                   s   g | ]} |qS r   r   )rD   ri   )rs   r   r   rl      s    z'_reshape_array_with.<locals>.<listcomp>)ri   rj   r   rj   )_chunks_maprS   r@   r   DistributedArraydtype_mode_comms)rc   re   rg   
chunks_mapr<   chunksr@   r   )rg   re   rs   r   _reshape_array_with   s   
r{   c                 C     t | dd dd S )Nc                 S  s   d|  S Nr0   r   r@   r   r   r   <lambda>       z'_prepend_one_to_shape.<locals>.<lambda>c                 S  s   t d f|  S rC   rK   r\   r   r   r   r          r{   rc   r   r   r   _prepend_one_to_shape   
   r   c                 C  r|   )Nc                 S  s   | d S r}   r   r~   r   r   r   r      r   z&_append_one_to_shape.<locals>.<lambda>c                 S  s   | t d f S rC   rK   r   r   r   r   r      r   r   r   r   r   r   _append_one_to_shape   r   r   c                 C  &   | j d dks	J t| dd dd S )Nr   c                 S     | d d S Nr   r   r~   r   r   r   r          z!_pop_from_shape.<locals>.<lambda>c                 S  r   r   r   r   r   r   r   r      r   r@   r{   r   r   r   r   _pop_from_shape      r   c                 C  r   )Nr   r   c                 S     | dd  S Nr   r   r~   r   r   r   r      r   z'_pop_front_from_shape.<locals>.<lambda>c                 S  r   r   r   r   r   r   r   r      r   r   r   r   r   r   _pop_front_from_shape   r   r   about_array.DistributedArray | Nonec           "   
   K  s  |durt ddD ]}||v rt d| dq
t| tjr%t|tjs)t d| tj} |tj}d }}| jdkrDd	}t| } |jdkrOd	}t	|}| j
d
d \}}|j
d
d \}	}
||	kss| j
dd
 |j
dd
 krwtdt| j
| j}t|j
|j}| | krt ddd | jD }d}| D ]}|| }|| }t||}t|||}t|}|D ]y\}}}|| }|| }| j| ||  }|j| ||  }|tj |tj |t|d  t|d  f }| 3}||j tjjj|j|jfi |}tj||  |||fd}|| !| |j"}W d   n	1 s.w   Y  qq| j
dd
 ||
f } t| ||tj#| j$}!|rRt%|!}!|rYt&|!}!|!S )ak  Matrix multiplication between distributed arrays.

    The arguments must have compatible :attr:`~DistributedArray.shape` and
    :attr:`~DistributedArray.index_map`.

    This operation converts its operands into the replica mode, and compute
    their product in the sum mode.

    Args:
        a, b: Input distributed arrays.
        out (optional): A location into which the result is stored. This option
            is currently not supported.
    Returns:
        The matrix product of the inputs.

    Example:
        >>> A = distributed_array(
        ...     cupy.arange(6).reshape(2, 3),
        ...     make_2d_index_map([0, 2], [0, 1, 3],
        ...                       [[{0}, {1, 2}]]))
        >>> B = distributed_array(
        ...     cupy.arange(12).reshape(3, 4),
        ...     make_2d_index_map([0, 1, 3], [0, 2, 4],
        ...                       [[{0}, {0}],
        ...                        [{1}, {2}]]))
        >>> C = A @ B
        >>> C.mode
        'sum'
        >>> C.all_chunks()
        {0: [array([[0, 0],
                    [0, 3]]),
             array([[0, 0],
                    [6, 9]])],
         1: [array([[20, 23],
                    [56, 65]])],
         2: [array([[26, 29],
                    [74, 83]])]}
        >>> C
        array([[20, 23, 26, 29],
               [56, 68, 80, 92]])

    .. seealso:: :obj:`numpy.matmul`
    NzArgument `out` is not supported)subokaxesaxisz
Argument `z` is not supportedzHMixing a distributed array with a non-distributed array is not supportedFr   TrR   zShapes are incompatiblezMismatched batch shapesc                 S  s   i | ]}|g qS r   r   )rD   r<   r   r   r   
<dictcomp>.  s    zmatmul.<locals>.<dictcomp>r   )rr   )'r   
isinstancer   ru   _to_op_moder   REPLICAndimr   r   r@   
ValueErrorrb   rO   r)   devicesr-   r=   rN   rt   flushrL   on_ready
wait_eventrq   cupylinalg_productmatmulrm   r   rp   recordr   rv   SUMrx   r   r   )"r   r   r   kwargsparamone_prependedone_appendednmm2plocation_maps_alocation_maps_bry   rv   r^   r   r   r.   r3   index_prefixr7   r8   r<   loc_aloc_bchunk_achunk_br&   streamchunk_ab_arraychunk_abr@   r"   r   r   r   r      s   /


$


r   r	   r   r
   r   list[list[set[int]]]c                 C  s  | d dksJ t t| | ksJ |d dksJ t t||ks$J i }t|t| d ks2J tt|D ]K}t|| t|d ksHJ tt|| D ]2}| | }| |d  }|| }||d  }	t||t||	f}
|| | D ]}||g |
 qvqPq8|S )a  Create an ``index_map`` for a 2D matrix with a specified blocking.

    Args:
        i_partitions (list of ints): boundaries of blocks on the `i` axis
        j_partitions (list of ints): boundaries of blocks on the `j` axis
        devices (2D list of sets of ints): devices owning each block

    Returns:
        dict from int to array indices: index_map
            Indices for the chunks that devices with designated IDs are going
            to own.

    Example:
        >>> index_map = make_2d_index_map(
        ...     [0, 2, 4], [0, 3, 5],
        ...     [[{0}, {1}],
        ...      [{2}, {0, 1}]])
        >>> pprint(index_map)
        {0: [(slice(0, 2, None), slice(0, 3, None)),
             (slice(2, 4, None), slice(3, 5, None))],
         1: [(slice(0, 2, None), slice(3, 5, None)),
             (slice(2, 4, None), slice(3, 5, None))],
         2: [(slice(2, 4, None), slice(0, 3, None))]}
    r   r   )sortedr1   r   rangerL   rX   r   )r	   r
   r   rO   iji_starti_stopj_startj_stopr\   r<   r   r   r   make_2d_index_mapZ  s&   r   )r   r   r   r   r   r   )r.   r   r   r   r   r   r   r/   )r>   r?   r@   rA   r   rB   )rJ   rB   r   r?   )r@   rA   rO   rP   r   rQ   )rc   rd   re   rf   rg   rh   r   rd   )r   rd   rC   )r   rd   r   rd   r   r   r   rd   )r	   r   r
   r   r   r   r   rP   )"
__future__r   collections.abcr   dataclassesrU   r   cupyx.distributed.arrayr   r   r   rH   int_SliceIndicesrW   dictr   listr/   	_BatchIdx	dataclassr   r-   r=   rI   rN   rb   r{   r   r   r   r   r   r   r   r   r   r   <module>   s8    

;
#







{