o
    i2                     @   s   d dl Z ddlmZ ddlmZmZmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ edG dd deZedurXeddddZndZdd ZdS )    N   )registry)cublascupycupyx)DeviceTypes)is_cupy_arrayis_mxnet_gpu_arrayis_tensorflow_gpu_arrayis_torch_cuda_arraymxnet2xptensorflow2xptorch2xp   )_custom_kernels)NumpyOps)OpsCupyOpsc                       s,  e Zd ZdZeZeZ	d[dede	ddfdd	Z
dd
ddZ fddZd\ fdd	Zd\ fdd	Zd\ fdd	Zd\ fdd	Zd]ddZd^ddZd_ fdd	Z fdd Z fd!d"Zd\d#d$Zd\d%d&Z	'	(	(	'	d`d)ed*ed+ed,ed-ef
 fd.d/Z	'	(	(	'	d`d)ed*ed+ed,ed-ef
 fd0d1Zd\d-ef fd2d3Zd\d-ef fd4d5Zda fd7d8	Zda fd9d:	Zd\ fd;d<	Z d\ fd=d>	Z!d?d@ Z"ddA fdBdC
Z#ddA fdDdE
Z$ fdFdGZ% fdHdIZ& fdJdKZ' fdLdMZ( fdNdOZ) fdPdQZ*dRdS Z+dTdU Z,	'dbdVdWZ-dcdYdZZ.  Z/S )dr   r   gpur   device_type	device_idreturnNc                 K   s   || _ || _d S N)r   r   )selfr   r   kwargs r   K/home/ubuntu/.local/lib/python3.10/site-packages/thinc/backends/cupy_ops.py__init__   s   
zCupyOps.__init__)
byte_orderc                C   s6   t |tjs
| }|r|j|}tj||d}|S )Ndtype)
isinstancenumpyndarraygetr    newbyteorderasarray)r   datar   r    r   r   r   to_numpy    s   zCupyOps.to_numpyc                    s$   |j dv rt||S t ||S Nfloat32float64)r    r   
gather_addsuper)r   tableindices	__class__r   r   r-   (   s   
zCupyOps.gather_addFc                    s(   |j dv rtj||dS t j||dS Nr*   inplace)r    r   dishr.   r   Xr5   r1   r   r   r6   .   s   
zCupyOps.dishc                    8   |j |j kr|j dv rtj|||dS t j|||dS r3   )r    r   backprop_dishr.   r   dYr8   r5   r1   r   r   r:   4      zCupyOps.backprop_dishc                    *   |j dv rtj||ddS t j||dS Nr*   g      @r5   	thresholdr4   )r    r   gelur.   r7   r1   r   r   rB   :      
zCupyOps.geluc                    s:   |j |j kr|j dv rtj|||ddS t j|||dS r?   )r    r   backprop_gelur.   r;   r1   r   r   rD   @   s   zCupyOps.backprop_geluc                 C   s`   t |tjst |tjrtd|r|j}|r|j}|d u r%| j||S | jj|||d |S )NzaEncountered a numpy array when processing with cupy. Did you call model.ops.asarray on your data?)out)r!   r"   r#   
ValueErrorTxpdot)r   xyrE   trans1trans2r   r   r   gemmF   s   zCupyOps.gemmc                 C   sz   t |r| jj||d}n#t|rt|}nt|rt|}nt|r(t|}n| jj	||d}|d ur;|j
|dd}|S )Nr   F)r    copy)r   rH   r&   r   r   r
   r   r	   r   arrayastype)r   r'   r    rP   r   r   r   r&   V   s   


zCupyOps.asarrayr   c                    s   |st dttdd |D dkrt dttdd |D dkr(t dttdd |D dkr9t d	td
d |D rI|d jdvrPt ||S t||S )zPerform padding on a list of arrays so that they each have the same
        length, by taking the maximum dimension across each axis. This only
        works on non-empty sequences with the same `ndim` and `dtype`.
        zCannot pad empty sequencec                 s       | ]}|j V  qd S r   )ndim.0seqr   r   r   	<genexpr>p       zCupyOps.pad.<locals>.<genexpr>r   z)Cannot pad sequences with different ndimsc                 s   rR   r   r   rT   r   r   r   rW   r   rX   z*Cannot pad sequences with different dtypesc                 s   s    | ]
}|j d d V  qdS )r   N)shaperT   r   r   r   rW   t   s    z4Cannot pad sequences that differ on other dimensionsc                 s   s    | ]}|j d  V  qdS )C_CONTIGUOUSN)flagsrT   r   r   r   rW   x   s    r   )r+   r,   int32int64)rF   lensetallr    r.   padr   )r   seqsround_tor1   r   r   ra   h   s    zCupyOps.padc                    s    |j dv r
t|S t |S r)   )r    r   maxoutr.   )r   r8   r1   r   r   rd      s   

zCupyOps.maxoutc                    s2   |j dv r|j dkrt|||S t |||S Nr*   r\   )r    r   backprop_maxoutr.   )r   r<   whichPr1   r   r   rf      s   zCupyOps.backprop_maxoutc                 C   s    |s||dk S ||dk9 }|S Nr   r   r7   r   r   r   relu   s   zCupyOps.reluc                 C   s    |s||dk S ||dk9 }|S ri   r   )r   r<   Yr5   r   r   r   backprop_relu   s   zCupyOps.backprop_relu      ?        slopeoffsetmin_valmax_valr5   c                    s8   |j dv rtj||||||dS t j||||||dS )Nr*   )r5   ro   rp   rq   rr   )r    r   clipped_linearr.   )r   r8   ro   rp   rq   rr   r5   r1   r   r   rs      s"   
		zCupyOps.clipped_linearc              	      sH   |j |j kr|j dv rtj|||||||dS t j|||||||dS )Nr*   )ro   rp   rq   rr   r5   )r<   r8   ro   rp   rq   rr   r5   )r    r   backprop_clipped_linearr.   )r   r<   r8   ro   rp   rq   rr   r5   r1   r   r   rt      s&   

zCupyOps.backprop_clipped_linearc                    r9   r3   )r    r   backprop_hard_swishr.   r;   r1   r   r   ru      r=   zCupyOps.backprop_hard_swishc                    r9   r3   )r    r   backprop_hard_swish_mobilenetr.   r;   r1   r   r   rv      r=   z%CupyOps.backprop_hard_swish_mobilenet      4@c                    s*   |j dv rtj|||dS t |||S Nr*   r@   )r    r   mishr.   )r   r8   rA   r5   r1   r   r   ry      rC   zCupyOps.mishc                    s:   |j |j kr|j dv rtj||||dS t ||||S rx   )r    r   backprop_mishr.   )r   r<   r8   rA   r5   r1   r   r   rz      s
   zCupyOps.backprop_mishc                    r>   Nr*   g      1@r@   r4   )r    r   swishr.   r7   r1   r   r   r|      rC   zCupyOps.swishc                    sP   |j |j   kr|j krn n|j dv rtj||||ddS t j||||dS r{   )r    r   backprop_swishr.   )r   r<   r8   rk   r5   r1   r   r   r}      s
   (
zCupyOps.backprop_swishc                 C   s0   dd }t ||d}|t ||| 9 }|S )Nc                 S   s   |  d}t|S )N)reshaper   nrm2)r8   X_vecr   r   r   frobenius_norm  s   

z-CupyOps.clip_gradient.<locals>.frobenius_normg-q=)r   maximumminimum)r   gradientrA   r   	grad_normr   r   r   clip_gradient   s   zCupyOps.clip_gradientlengthsc                   s>   |j dv r|du s|j dkrtj|||dS t j|||dS )zGiven an (M, N) sequence of vectors, return an (M, N*(nW*2+1)) sequence.
        The new sequence is constructed by concatenating nW preceding and succeeding
        vectors onto each column in the sequence, to extract a window of features.
        r*   Nr\   r   )r    r   seq2colr.   )r   rV   nWr   r1   r   r   r   
  s   
zCupyOps.seq2colc                   s>   |j dv r|d u s|j dkrtj|||dS t j|||dS Nr*   r\   r   )r    r   backprop_seq2colr.   )r   r<   r   r   r1   r   r   r     s   
zCupyOps.backprop_seq2colc                    s4   |j dv r|j dkrtj||dS t || d S r   )r    r   reduce_meanr.   r   r8   r   r1   r   r   r     s   zCupyOps.reduce_meanc                    2   |j dv r|j dkrt||S t || d S re   )r    r   backprop_reduce_meanr.   )r   d_meansr   r1   r   r   r   $     zCupyOps.backprop_reduce_meanc                    r   re   )r    r   
reduce_maxr.   r   r1   r   r   r   *  r   zCupyOps.reduce_maxc                    s@   |j dv r|j dkr|j dkrt|||S t ||| d S re   )r    r   backprop_reduce_maxr.   )r   d_maxesrg   r   r1   r   r   r   0  s
   


zCupyOps.backprop_reduce_maxc                    .   |j dv r|j dkrt||S t ||S re   )r    r   
reduce_sumr.   r   r1   r   r   r   :     zCupyOps.reduce_sumc                    r   re   )r    r   backprop_reduce_sumr.   )r   d_sumsr   r1   r   r   r   @  r   zCupyOps.backprop_reduce_sumc                 C   s   t ||S r   )r   hash)r   idsseedr   r   r   r   F  s   zCupyOps.hashc                 C   s   | j ||| d S r   )_xp2scatter_add)r   r/   r0   valuesr   r   r   r   I  s   zCupyOps.scatter_addc
           
   	   C   sR   t || t || t || t||d| d| |||| |d ||||fS )Nr   r   )_check_compatible_shapeadam_kernelfill)
r   weightsr   mom1mom2beta1beta2eps
learn_ratemod_rater   r   r   adamL  s   



zCupyOps.adam'  c                 C   s   t  j||||d}| |S )N)periodrE   )r   position_encoder&   )r   NDr   rE   	positionsr   r   r   r   Y  s   
zCupyOps.position_encode)r   r   )F)NFFr   )r   )rm   rn   rn   rm   F)rw   F)rm   )r   N)0__name__
__module____qualname__namer   rH   r   r   r   intr   r(   r-   r6   r:   rB   rD   rN   r&   ra   rd   rf   rj   rl   floatboolrs   rt   ru   rv   ry   rz   r|   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r   r   r1   r   r      s    




	 

z9T grad, T lr, T one_minus_beta1, T one_minus_beta2, T epszT param, T m, T vzm += one_minus_beta1 * (grad - m);
        v += one_minus_beta2 * (grad * grad - v);
        param -= lr * m / (sqrt(v) + eps);r   c                 C   s,   | j |j krd| j  d|j  }t|d S )Nz!arrays have incompatible shapes: z and )rY   rF   )uvmsgr   r   r   r   k  s   r   )r"    r   compatr   r   r   typesr   utilr   r	   r
   r   r   r   r   r   	numpy_opsr   opsr   r   ElementwiseKernelr   r   r   r   r   r   <module>   s*    $	  K	