o
    iN                     @   sN  d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZmZmZ d dlZddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/m0Z0 ddl1m2Z2 e
ded	Z3e
d
e,d	Z4e5dej6 Z7de5d Z8de5dej6  Z9G dd dZ:	 dedededede!deeef fddZ;dede!dedefddZ<dede=de=de=de=f
dd Z>d!d" Z?d#d$ Z@d%ed&ed'ed(ed)edeeef fd*d+ZAd<d,d-ZBd.e3de3fd/d0ZCd.e3de3fd1d2ZDd3e:dedefd4d5ZEd3e:dedefd6d7ZFd8ed9efd:d;ZGdS )=    N)AnyIteratorListOptionalSequenceTupleTypeTypeVarUnioncastoverload   )Array1dArray2dArray3dArray4dArrayXd	BatchableDeviceTypesDTypesDTypesFloat	DTypesIntFloats1dFloats2dFloats3dFloats4dFloatsXd	FloatsXdT	GeneratorInts1dInts2dInts3dInts4dIntsXdList2dListXdPaddedShapeSizedGeneratorXp_Floats)get_array_moduleis_xp_arrayto_numpy   )CBlasArrayT)boundFloatsT       @      ?c                   @   sp  e Zd ZU dZeed< eZeed< 	dede	de
dd	fd
dZdefddZd	dddZddddee
ef dedede
def
ddZddddee
ef dededede
defddZdd Zde
d ee
 fd!d"Zd	d#d$ed%e
d&ee defd'd(Zd	d#d)ed%e
d&ee defd*d+Z				dfd,ed-ed.ee d/ed0edefd1d2Zd3ed4e
defd5d6Zd3ed7ed8e defd9d:Z!e"			;	<dgd3e#e d=ee$ d>e
d?e
def
d@dAZ%e"			;	<dgd3e#e d=ee$ d>e
d?e
def
dBdAZ%e"			;	<dgd3e&d=ee$ d>e
d?e
de'f
dCdAZ%e"			;	<dgd3e(d=ee$ d>e
d?e
de)f
dDdAZ%e"			;	<dgd3e*e) d=ee$ d>e
d?e
de)f
dEdAZ%			;	<dgd3e*e) d=ee$ d>e
d?e
de)f
dFdAZ%e"dhd3ed&ed>e
de#e fdGdHZ+e"dhd3ed&ed>e
de#e fdIdHZ+e"dhd3e'd&ed>e
de&fdJdHZ+e"dhd3e)d&ed>e
de(fdKdHZ+dhd3e)d&ed>e
de(fdLdHZ+e"didMe#e, de-fdNdOZ.e"didMe#e de/fdPdOZ.	didMee#e, e#e f de0fdQdOZ.dRe0d&e#e
 de&fdSdTZ1dMe&de2fdUdVZ3dRe2de&fdWdXZ4dYe5dZee6 de7fd[d\Z8d]d^d_d`e
d=ee9 daede fdbdcZ:d]d^d_d`e
dde
d=ee9 daedef
dedfZ;d]d^d_d`e
dde
dge
d=ee9 daede/fdhdiZ<d]d^d_d`e
dde
dge
dje
d=ee9 daede=fdkdlZ>d]d^d_dYe5d=ee9 daede7fdmdnZ?dod^d_d`e
d=ee@ daedefdpdqZAdod^d_d`e
dde
d=ee@ daede,f
drdsZBdod^d_d`e
dde
dge
d=ee@ daede-fdtduZCdod^d_d`e
dde
dge
dje
d=ee@ daedeDfdvdwZEdod^d_dYe5d=ee@ daedeFfdxdyZGd]d^d_dYe5d=ee$ daedeHfdzd{ZId|e)d`e
deJfd}d~ZKd|e)d`e
dde
de'fddZLd|e)d`e
dde
dge
de0f
ddZMd|e)d`e
dde
dge
dje
deNfddZOd|e7d`e
de fddZPd|e7d`e
dde
defddZQd|e7d`e
dde
dge
de/f
ddZRd|e7d`e
dde
dge
dje
de=fddZSd|e7dYe5de7fddZTd|eFd`e
defddZUd|eFd`e
dde
de,fddZVd|eFd`e
dde
dge
de-f
ddZWd|eFd`e
dde
dge
dje
deDfddZXd|eFdYe5deFfddZYd|eZdYe5deZfddZ[d]ddee=e*e*e*e*e6    f d=ee$ de=fddZ\d]ddee/e*e*e*e6   f d=ee$ de/fddZ]d]ddeee*e*e6  f d=ee$ defddZ^d]ddee e*e6 f d=ee$ de fddZ_d]ddee7e*eH f d=ee$ de7fddZ`doddeee*e
 f d=ee$ defddZadoddee,e*e*e
  f d=ee$ de,fddZbdoddee-e*e*e*e
   f d=ee$ de-fddZcdoddeeDe*e*e*e*e
    f d=ee$ deDfddZddoddeeFe*eH f d=ee$ deFfddZed	ddee)e*e) e*eH f d=ee$ de)fddZfdjdeZd=ee$ deZfddZgddd3ehdedehfddZiddd)ehdehdedehfddZjdddehdedehfddZkdddeldedelfddZmddddd,eldede
de6delf
ddńZndddƜded&edede
def
ddɄZodddʜdeld)elde
de6delf
dd̄Zpd)eded&edefdd΄Zqde de/de/d3ededereerf fddԄZsde de/de/d3ededefddքZtd)ed&ede derderee f f
ddلZud3e/deree,f fddۄZvd)ede,de
de/fdd߄Zwdkd3ededefddZx	dkd)edededefddZy					dld3ehde6de6de6de6dedehfddZz					dld)ehd3ehde6de6de6de6dedehfddZ{dmd3ehde6dedehfddZ|	dmd)ehd3ehde6dedehf
ddZ}dkd3ehdedehfddZ~	dkd)ehd3ehdedehfddZdkd3ehdedehfddZ	dkd)ehd3ehdedehfddZdkd3ehdedehfddZ	dkd)ehd3ehdehdedehf
ddZdkd3ehdedehfdd Z	dkd)ehd3ehdedehfddZdkd3ehdedehfddZ	dkd)ehd3ehdedehfddZdkd3ehdedehfddZ	dkd)ehd3ehdedehfd	d
Zd3ehdehfddZd3ehdehfddZdkd3ehdedehfddZ	dkd)ehd3ehdedehfddZdkd3ehdedehfddZ	dkd)ehd3ehdedehfddZ	dnd3ehde6dedehfddZ		dnd)ehd3ede6dedehf
ddZ	dodeldeld e
d!e6dd	f
d"d#Z	dpde d$e d%e d&e d'e6d(e6d)e6d*e6d+e6dere e e e f fd,d-Zd$elde6delfd.d/Zd0eld1elde6fd2d3Zd3ed&edefd4d5Zd3ed&edereef fd6d7Zd3ed&edereef fd8d9Zd3ed&edefd:d;Zd3ed&ederee,f fd<d=Zd>ed?edefd@dAZdBedCedefdDdEZdFed&edefdGdHZdIed&edefdJdKZdLede,d&edefdMdNZdOedPe
de,fdQdRZde
dSedefdTdUZ		dqdWe
dXe
dYe
d.ee def
dZd[Zd\ed]e,defd^d_Zd\e7d]eFd`e7de7fdadbZdcdd Zd	S (r  Opsbasenamexpcpudevice_type	device_idreturnNc                 K   s   || _ || _d S N)r;   r<   )selfr;   r<   kwargs rA   F/home/ubuntu/.local/lib/python3.10/site-packages/thinc/backends/ops.py__init__@   s   
zOps.__init__c                 C   s   t | j d}t|)zReturn C BLAS function table.z" does not provide C BLAS functions)type__name__NotImplementedError)r?   errrA   rA   rB   cblasF   s   z	Ops.cblas)
byte_orderc                C   s6   t |tjr|r|j|}tj||d}|S td)Ndtypez,Cannot convert non-numpy from base Ops class)
isinstancenumpyndarrayrK   newbyteorderasarray
ValueError)r?   datarI   rK   rA   rA   rB   r-   K   s   zOps.to_numpyFr.   )shufflebuffersizesequencerS   rT   c                   sv   t dsdt }t|tt|trt|n|t	
t fdd}t|tS )a  Iterate slices from a sequence, optionally shuffled. Slices
        may be either views or copies of the underlying data.

        The `size` argument may be either an integer, or a sequence of integers.
        If a sequence, a new size is drawn before every output.

        If shuffle is True, shuffled batches are produced by first generating
        an index array, shuffling it, and then using it to slice into the
        sequence.

        An internal queue of `buffer` items is accumulated before being each
        output. Buffering is useful for some devices, to allow the
        network to run asynchronously without blocking on every batch.
        __len__z-Can't minibatch data. Expected sequence, got c               
   3   sz    r	t j g } d}D ]&}t|}| |||   t|  kr1| E d H  g } ||7 }q| E d H  d S Nr   rM   randomrS   intappend
_get_batchlen)queueirU   rT   indicesr?   rV   rS   sizesrA   rB   _iter_itemsu   s   

z"Ops.minibatch.<locals>._iter_items)hasattrrD   rQ   _get_batch_sizesr^   rL   r[   	itertoolsrepeatrM   aranger(   )r?   rU   rV   rS   rT   rG   rd   rA   ra   rB   	minibatchT   s   
zOps.minibatchothersc          	         s   |ft | tdd D s#ddd D }d| }t|t|t|tr2t	|n|t
t| fdd}t|tS )	zyMinibatch one or more sequences of data, and yield
        lists with one batch per sequence. See ops.minibatch.
        c                 s   s    | ]}t |d V  qdS )rW   N)re   .0seqrA   rA   rB   	<genexpr>   s    z!Ops.multibatch.<locals>.<genexpr>z, c                 S   s   g | ]}t | qS rA   )rD   rl   rA   rA   rB   
<listcomp>       z"Ops.multibatch.<locals>.<listcomp>z/Can't multibatch data. Expected sequences, got c                  3   s    r	t j g } d}D ]4}t|}|||  }| g  D ]}| d || q$t|  kr?| E d H  g } ||7 }q| E d H  d S Nr   r:   rY   )r_   r`   rU   	idx_batchrV   rT   rb   r?   	sequencesrS   rc   rA   rB   rd      s    


z#Ops.multibatch.<locals>._iter_items)tuplealljoinrQ   rf   r^   rL   r[   rg   rh   rM   ri   r(   )	r?   rU   rV   rS   rT   rk   valuesrG   rd   rA   rt   rB   
multibatch   s   
zOps.multibatchc                    sf   t  tr fdd|D }nt  tr t fdd|D }n | }t|r1| | j|}|S )Nc                       g | ]} | qS rA   rA   rm   r`   rV   rA   rB   rp          z"Ops._get_batch.<locals>.<listcomp>c                 3   s    | ]} | V  qd S r>   rA   r|   r}   rA   rB   ro          z!Ops._get_batch.<locals>.<genexpr>)rL   listrv   r,   	as_contigr8   rP   )r?   rV   rb   subseqrA   r}   rB   r]      s   

zOps._get_batchlengthrc   c                 C   s6   g }d}||k r| t| ||d 7 }||k s|S rr   )r\   next)r?   r   rc   outputr`   rA   rA   rB   rf      s   zOps._get_batch_sizes)lengthsrn   nWr   c                C   s   |dksJ |dksJ |j d }|j d }| ||d d |}| |d|  d||||dd|f< ||dd|f< | ||d d|||d| |d df< | |||d| d  S )a  Given an (M, N) sequence of vectors, return an (M, N*(nW*2+1))
        sequence. The new sequence is constructed by concatenating nW preceding
        and succeeding vectors onto each column in the sequence, to extract a
        window of features.
        r.   Nr   r   r:   )shapealloc3f	reshape3f	reshape2f)r?   rn   r   r   BIcolsrA   rA   rB   seq2col   s   	

*.zOps.seq2coldYc          	   	   C   s   |dksJ |dksJ |d d }|j d }|j d | }| ||}| ||||}|d|   | ||dd|f d|7  < ||dd|f 7 }||d  | |d| |d df d|7  < |S )zThe reverse/backward operation of the `seq2col` function: calculate
        the gradient of the original `(M, N)` sequence, as a function of the
        gradient of the output `(M, N*(nW*2+1))` sequence.
        r.   Nr   r   r:   )r   alloc2fr   r   )	r?   r   r   r   nFr   r   dXdY3drA   rA   rB   backprop_seq2col   s   
04zOps.backprop_seq2colxyouttrans1trans2c                 C   s@   |r|j }|r
|j }|du r| j||S | jj|||d |S )zPerform General Matrix Multiplication (GeMM) and optionally store
        the result in the specified output variable.
        Nr   )Tr8   dot)r?   r   r   r   r   r   rA   rA   rB   gemm   s   zOps.gemmXrepsc                 C   s   | j ||S r>   )r8   tile)r?   r   r   rA   rA   rB   r      s   zOps.tileWbc                 C   s   | j ||dd}||7 }|S )zVApply a weights layer and a bias to some inputs, i.e.
        Y = X @ W.T + b
        T)r   )r   )r?   r   r   r   YrA   rA   rB   affine  s   z
Ops.affiner   r   rK   padndim_if_emptyc                 C      d S r>   rA   r?   r   rK   r   r   rA   rA   rB   flatten     zOps.flattenc                 C   r   r>   rA   r   rA   rA   rB   r     r   c                 C   r   r>   rA   r   rA   rA   rB   r     r   c                 C   r   r>   rA   r   rA   rA   rB   r   +  r   c                 C   r   r>   rA   r   rA   rA   rB   r   5  r   c           
      C   s  |du s
t |dkr| jd| |pddS t|d }|d j}dd |D }t |dkr6| j||p3ddS t|dkrpg }|D ]}||j|f|jdd  |jd || q@||j|f|jdd  |jd |}||}	|dur|j	|	|d}	|	S )	z.Flatten a list of arrays into one large array.Nr   r   frJ   c                 S   s   g | ]	}|j d kr|qS r   )rU   )rm   r   rA   rA   rB   rp   K  s    zOps.flatten.<locals>.<listcomp>r.   )
r^   allocr+   r   r[   r\   zerosrK   concatenaterP   )
r?   r   rK   r   r   r8   shape_if_emptypaddedr   resultrA   rA   rB   r   ?  s$   
&&
c                 C   r   r>   rA   r?   r   r   r   rA   rA   rB   	unflattenZ     zOps.unflattenc                 C   r   r>   rA   r   rA   rA   rB   r   ^  r   c                 C   r   r>   rA   r   rA   rA   rB   r   b  r   c                 C   r   r>   rA   r   rA   rA   rB   r   h  r   c                    st   t |} dkrt|dk|  d}| j|t|dd } dkr. fdd|D }t|t|ks8J |S )zThe reverse/backward operation of the `flatten` function: unflatten
        a large array into a list of arrays according to the given lengths.
        r   Nr:   c                    s   g | ]}| d  qS r>   rA   )rm   ar   rA   rB   rp   w  s    z!Ops.unflatten.<locals>.<listcomp>)r-   rM   wherer8   splitcumsumr^   )r?   r   r   r   unflatrA   r   rB   r   l  s   seqsc                 C   r   r>   rA   r?   r   round_torA   rA   rB   r   }  r   zOps.padc                 C   r   r>   rA   r   rA   rA   rB   r     r   c                 C   s  |dk rt d| |st dttdd |D dkr"t dttdd |D dkr3t dttd	d |D dkrDt d
tdd |D }|| | 7 }t||f|d jdd  }tt| j||d jd}t	|D ]\}}|||d|jd f< qt|S )zPerform padding on a list of arrays so that they each have the same
        length, by taking the maximum dimension across each axis. This only
        works on non-empty sequences with the same `ndim` and `dtype`.
        r.   z.Rounding for padding must at least be 1, was: zCannot pad empty sequencec                 s       | ]}|j V  qd S r>   )ndimrl   rA   rA   rB   ro         zOps.pad.<locals>.<genexpr>z)Cannot pad sequences with different ndimsc                 s   r   r>   rJ   rl   rA   rA   rB   ro     r   z*Cannot pad sequences with different dtypesc                 s   s    | ]
}|j d d V  qdS )r.   Nr   rl   rA   rA   rB   ro     s    z4Cannot pad sequences that differ on other dimensionsc                 s   s    | ]}t |V  qd S r>   r^   rl   rA   rA   rB   ro     r   r   NrJ   )
rQ   r^   setmaxr   r   r   r   rK   	enumerate)r?   r   r   max_seq_lenfinal_shaper   r`   arrrA   rA   rB   r     s&   r   c                 C   s6   g }t |D ]\}}|||d|f  qtt|S )zThe reverse/backward operation of the `pad` function: transform an
        array back into a list of arrays, each with their original length.
        N)r   r\   r   r$   )r?   r   r   r   r`   r   rA   rA   rB   unpad  s   
z	Ops.unpadc                    s   st | ddd| d| d| dS t dkrR|  d  d jd d d jd }| dg|jd  }| |jd g}| dg}t ||||S dd t D }|jdd dd |D }dd |D }t	d	d  D }	t }
 d jd }t
t fd
d|D  |  }|j|
|	|fksJ |
|	|f| |d}|j|	|
|fksJ dd t|	D }t|}t|	D ]}|r|||d  kr|d8 }|r|||d  ks|||< qt|t|ksJ t || || || |S )z4Pack a sequence of 2d arrays into a Padded datatype.r   r.   c                 S   s   g | ]
\}}t ||fqS rA   r   )rm   r`   rn   rA   rA   rB   rp     s    z#Ops.list2padded.<locals>.<listcomp>T)reversec                 S   s   g | ]\}}|qS rA   rA   rm   r   r`   rA   rA   rB   rp     r~   c                 S   s   g | ]\}}|qS rA   rA   r   rA   rA   rB   rp     r~   c                 S   s   g | ]}|j d  qS r   r   rl   rA   rA   rB   rp     rq   c                    r{   rA   rA   r|   r   rA   rB   rp     r~   r.   r   r   c                 S   s   g | ]}d qS r   rA   )rm   _rA   rA   rB   rp     s    )r&   r   alloc1ir^   reshape3r   	asarray1ir   sortr   r   r$   r   r   	transposerangesum)r?   r   rR   	size_at_tr   rb   lengths_indicesindices_lengths_nSnBnOr   batch_size_at_t_current_sizetrA   r   rB   list2padded  sH   $(

zOps.list2paddedc                 C   sv   |j }t|j}t|j}dgt| }| |d}t|jd D ]}||dt	|| f ||| < q#t
t|S )z;Unpack a Padded datatype to a list of 2-dimensional arrays.Nr   r   )rR   r-   rb   r   r^   r   r   r   r   r[   r   r$   )r?   r   rR   rb   r   unpaddedr`   rA   rA   rB   padded2list  s   

"
zOps.padded2listr   dropc                 C   sh   |du s|dkr| j j|ddS |dkr| |S | j jdd|}||kd|  }tt| j|ddS )a  Create a random mask for applying dropout, with a certain percent of
        the mask (defined by `drop`) will contain zeros. The neurons at those
        positions will be deactivated during training, resulting in a more
        robust network and less overfitting.
        Nr   r   rJ   r4           float32)r8   onesalloc_frZ   uniformr   r   rP   )r?   r   r   	coinflipsmaskrA   rA   rB   get_dropout_mask  s   
zOps.get_dropout_maskr   TrK   r   d0r   c                C      t t| j|f||dS Nr   )r   r   r   r?   r   rK   r   rA   rA   rB   alloc1f     zOps.alloc1fd1c                C      t t| j||f||dS r   )r   r   r   r?   r   r   rK   r   rA   rA   rB   r        zOps.alloc2fd2c                C      t t| j|||f||dS r   )r   r   r   r?   r   r   r   rK   r   rA   rA   rB   r        	zOps.alloc3fd3c                C      t t| j||||f||dS r   )r   r   r   r?   r   r   r   r   rK   r   rA   rA   rB   alloc4f     
zOps.alloc4fc                C      t t| j|||dS r   )r   r   r   r?   r   rK   r   rA   rA   rB   r        zOps.alloc_fint32c                C   r   r   )r   r   r   r   rA   rA   rB   r   &  r   zOps.alloc1ic                C   r   r   )r   r    r   r   rA   rA   rB   alloc2i/  r   zOps.alloc2ic                C   r   r   )r   r!   r   r   rA   rA   rB   alloc3i9  r   zOps.alloc3ic                C   r   r   )r   r"   r   r   rA   rA   rB   alloc4iD  r  zOps.alloc4ic                C   r  r   )r   r#   r   r  rA   rA   rB   alloc_iP  r  zOps.alloc_ic                C   s4   t |tr|f}|r| jj||dS | jj||dS )z%Allocate an array of a certain shape.rJ   )rL   r[   r8   r   emptyr  rA   rA   rB   r   Y  s
   
z	Ops.allocarrayc                 C      t t| ||fS r>   )r   r   reshaper?   r  r   rA   rA   rB   reshape1i     zOps.reshape1c                 C      t t| |||fS r>   )r   r   r  r?   r  r   r   rA   rA   rB   reshape2l     zOps.reshape2c                 C      t t| ||||fS r>   )r   r   r  r?   r  r   r   r   rA   rA   rB   r   o     zOps.reshape3c              	   C      t t| |||||fS r>   )r   r   r  r?   r  r   r   r   r   rA   rA   rB   reshape4r     zOps.reshape4c                 C   r  r>   )r   r   r  r  rA   rA   rB   	reshape1fu  r  zOps.reshape1fc                 C   r  r>   )r   r   r  r  rA   rA   rB   r   x  r  zOps.reshape2fc                 C   r  r>   )r   r   r  r  rA   rA   rB   r   {  r  zOps.reshape3fc              	   C   r  r>   )r   r   r  r  rA   rA   rB   	reshape4f~  s   zOps.reshape4fc                 C      |  ||S r>   r  r?   r  r   rA   rA   rB   	reshape_f     zOps.reshape_fc                 C   r  r>   )r   r   r  r  rA   rA   rB   	reshape1i  r  zOps.reshape1ic                 C   r  r>   )r   r    r  r  rA   rA   rB   	reshape2i  r  zOps.reshape2ic                 C   r  r>   )r   r!   r  r  rA   rA   rB   	reshape3i  r  zOps.reshape3ic              	   C   r  r>   )r   r"   r  r  rA   rA   rB   	reshape4i  r  zOps.reshape4ic                 C   r  r>   r  r   rA   rA   rB   	reshape_i  r"  zOps.reshape_ic                 C   s    t |tr|f}tt||S )zReshape an array.)rL   r[   r   r0   r  r   rA   rA   rB   r    s   
zOps.reshaperJ   rR   c                C      t t| j||dS NrJ   )r   r   rP   r?   rR   rK   rA   rA   rB   	asarray4f     zOps.asarray4fc                C   r(  r)  )r   r   rP   r*  rA   rA   rB   	asarray3f  r,  zOps.asarray3fc                C   r(  r)  )r   r   rP   r*  rA   rA   rB   	asarray2f  r,  zOps.asarray2fc                C   r(  r)  )r   r   rP   r*  rA   rA   rB   	asarray1f  r,  zOps.asarray1fc                C   r(  r)  )r   r   rP   r*  rA   rA   rB   	asarray_f  r,  zOps.asarray_fc                C   r(  r)  )r   r   rP   r*  rA   rA   rB   r        zOps.asarray1ic                C   r(  r)  )r   r    rP   r*  rA   rA   rB   	asarray2i  r,  zOps.asarray2ic                C   r(  r)  )r   r!   rP   r*  rA   rA   rB   	asarray3i  r,  zOps.asarray3ic                C   r(  r)  )r   r"   rP   r*  rA   rA   rB   	asarray4i  r,  zOps.asarray4ic                C   r(  r)  )r   r#   rP   r*  rA   rA   rB   	asarray_i  r1  zOps.asarray_ic                C   sn   t || jjr|du r|S |j|kr|S | jj||dS t|dr%| S |dur1| jj||dS | j|S )z,Ensure a given array is of the correct type.NrJ   rM   )rL   r8   rN   rK   rP   re   rM   r  r*  rA   rA   rB   rP     s   

zOps.asarrayc                 C   sD   |j d r|d|jfv r|S |durd|ini }| jj|fi |S )zAllow the backend to make a contiguous copy of an array.
        Implementations of `Ops` do not have to make a copy or make it
        contiguous if that would not improve efficiency for the execution engine.
        C_CONTIGUOUSNrK   )flagsrK   r8   ascontiguousarray)r?   rR   rK   r@   rA   rA   rB   r     s   zOps.as_contiginplacer:  c                C   sd   |r| j j|dd|d}| j j| |d |d7 }|dC }|S | j |dd}dd| j |   S )N      4      4@r   r4         )r8   clipexpr?   r   r:  rA   rA   rB   sigmoid  s   zOps.sigmoidr   c                C   s0   |r| j |dd ||9 }|S || j ||d S )NTr9  )dsigmoidr?   r   r   r:  rA   rA   rB   backprop_sigmoid  s
   zOps.backprop_sigmoidc                C   s    |r
|d| 9 }|S |d|  S )Nr.   r4   rA   r?   r   r:  rA   rA   rB   rB    s   zOps.dsigmoidc                C   s,   |r|dC }|d9 }|d7 }|S d|d  S )Nr   r=  r4   r.   rA   rE  rA   rA   rB   dtanh"  s   z	Ops.dtanhr4   )r:  axistemperaturerG  rH  c                C   sL   |dkr|| }| j j||dd}|| }| j |}||j|dd }|S Nr4   T)rG  keepdims)r8   r   r?  r   )r?   r   r:  rG  rH  maxesshiftednew_xrA   rA   rB   softmax+  s   zOps.softmax)r:  rG  Xsc                C   sZ   |j dkrd|j  }t|| j|dd}| j|}| | |||}|| }|S )N   z)Softmax currently only supports 2d. Got: r;  r<  )r   rF   r8   r>  r?  backprop_reduce_sum
reduce_sum)r?   rO  r   r:  rG  rG   rM  summedrA   rA   rB   softmax_sequences;  s   
zOps.softmax_sequences)rG  rH  c                C   s2   |dkr|| }|| }|||j |dd 8 }|S rI  r   )r?   r   r   rG  rH  r   rA   rA   rB   backprop_softmaxH  s
   zOps.backprop_softmaxc                 C   s,   || }|  | |||}||| 8 }|S r>   )rQ  rR  )r?   r   r   r   r   sum_dXrA   rA   rB   backprop_softmax_sequencesR  s   zOps.backprop_softmax_sequencesparamsH0C0r   c                 C   sD   |j |j ksJ |j d |j d ksJ t|||||\}}||fS Nr.   )r   lstm_forward_training)r?   rY  rZ  r[  r   r   r   	fwd_staterA   rA   rB   r]  Z  s   zOps.lstm_forward_trainingc                 C   s   t |||||\}}|S r>   )r]  )r?   rY  rZ  r[  r   r   r   r   rA   rA   rB   lstm_forward_inferenceg  s   zOps.lstm_forward_inferencer^  c                 C   s   t ||||\}}||fS r>   )backprop_lstm)r?   r   r   rY  r^  r   d_paramsrA   rA   rB   r`  r  s   zOps.backprop_lstmc                 C   s   |j dd}|jdd|fS )Nr:   rG  )argmaxr   )r?   r   whichrA   rA   rB   maxoutx  s   z
Ops.maxoutrd  Pc              	   C   sj   | j |jd |jd ||jd}t|jd D ]}t|jd D ]}|||f ||||||f f< q q|S )Nr   r.   rJ   )r   r   rK   r   )r?   r   rd  rf  r   r   orA   rA   rB   backprop_maxout|  s     zOps.backprop_maxoutc                 C   s    |s||dk S ||dk9 }|S rX   rA   r@  rA   rA   rB   relu  s   zOps.reluc                 C   s    |s||dk S ||dk9 }|S rX   rA   rC  rA   rA   rB   backprop_relu  s   zOps.backprop_relur   slopeoffsetmin_valmax_valc                 C   sD   |r||9 }||7 }| j j||||dS || | }| j |||S )Nr   )r8   r>  )r?   r   rk  rl  rm  rn  r:  r   rA   rA   rB   clipped_linear  s   	zOps.clipped_linearc                 C   st   || | }|| | }	| j ||j}| j d|j}
| j ||k ||	k @ ||
}|r6||9 }|S || S )Nr   )r8   float64astyperK   r   )r?   r   r   rk  rl  rm  rn  r:  lowhighzeror   rA   rA   rB   backprop_clipped_linear  s   
zOps.backprop_clipped_linear      @nc                 C   s   | j |||dS N)rn  r:  ro  )r?   r   rw  r:  rA   rA   rB   relu_k     z
Ops.relu_kc                 C   s   | j ||||dS rx  ru  )r?   r   r   rw  r:  rA   rA   rB   backprop_relu_k     zOps.backprop_relu_kc                 C      | j |dd|dS )N皙?      ?)rk  rl  r:  ry  r@  rA   rA   rB   hard_sigmoid     zOps.hard_sigmoidc                 C      | j ||dddS )Nr  r  )rk  rl  r|  r?   r   r   r:  rA   rA   rB   backprop_hard_sigmoid  r~  zOps.backprop_hard_sigmoidc                 C   r  )Nr=  r4   )rm  rn  r:  ry  r@  rA   rA   rB   	hard_tanh  r  zOps.hard_tanhc                 C   r  )Nr=  r4   )rm  rn  r|  r  rA   rA   rB   backprop_hard_tanh  r~  zOps.backprop_hard_tanhc                 C   (   |r||  |9 }|S ||  | }|S r>   rA  r?   r   r:  r   rA   rA   rB   swish  
   z	Ops.swishc                 C   s2   ||  |d|   }|r||9 }|S || }|S r\  r  )r?   r   r   r   r:  r   rA   rA   rB   backprop_swish  s   zOps.backprop_swishc                 C   r  r>   )r  r  rA   rA   rB   
hard_swish  r  zOps.hard_swishc                 C   s<   |d d }d||dk< d||dk < |r||9 }|S || S )Ng?r  r4   g      @r   g      rA   r?   r   r   r:  r   rA   rA   rB   backprop_hard_swish  s   zOps.backprop_hard_swishc                 C   s4   |r||  |d d 9 }|S ||  |d d  S )NrP     )rz  r@  rA   rA   rB   hard_swish_mobilenet  s   zOps.hard_swish_mobilenetc                 C   s@   d|d d  }d||dk< d||dk < |r||9 }|S || S )NgUUUUUU?r3         @r4   r   g      rA   r  rA   rA   rB   backprop_hard_swish_mobilenet  s   z!Ops.backprop_hard_swish_mobilenetc                 C   sT   | j |}|d7 }| j j||d || }|d7 }|d9 }|r&||9 }|S || S )Nr4   r   r.   r  r8   squaresqrt)r?   r   r:  tmprA   rA   rB   dish  s   zOps.dishc                 C   sd   | j |}|d }|| j | }d| | }||d  }||8 }|d7 }|r.||9 }|S || S )Nr4   r  g      ?r  )r?   r   r   r:  x_sqx_sq_plus_onederivsecondrA   rA   rB   backprop_dish  s   zOps.backprop_dishc                 C   s   | j |}| j |}d}d}d}d}d}d}dd||   }	d||	 | |	 | |	 | |	 | |	 | j | |   }
||
 }||j}|S )Ng~Z O?gi<15ҿgWU?g9LW@g-UB?g{=@?r4   )r8   signabsr?  rq  rK   )r?   r   r  a1a2a3a4a5pr   r   r   rA   rA   rB   erf#  s   .zOps.erfc                 C   s$   | j |dd}d| j | d S )Nr;  r<  r.   r   )r8   r>  cosh)r?   r   rA   rA   rB   sechsq7  s   z
Ops.sechsqc              
   C   sb   d| j t|d| j |d    }|d9 }||j}|r%||9 }|S | j |}||9 }|S )Nr4   gHm?rP  r  )r8   tanhSQRT2PIpowerrq  rK   r  )r?   r   r:  r  r   rA   rA   rB   gelu_approx<  s   &zOps.gelu_approxc                 C   s   t t| |j}| j|d}d| jd| d|   }|d| d|  | d| d|   7 }|d7 }||7 }|rC||9 }|S || S )NrP  r  giND?gF?gFrVvf?gD?)r   r   r   r   r8   r  r  r  )r?   r   r   r:  r   Xp3r  rA   rA   rB   backprop_gelu_approxH  s   zOps.backprop_gelu_approxc                 C   s"   t | |}|r||9 }|S || S r>   )gaussian_cdf)r?   r   r:  cdfrA   rA   rB   geluX  s
   
zOps.geluc                 C   s0   t | ||t| |  }|r||9 }|S || S r>   )r  gaussian_pdfr  rA   rA   rB   backprop_gelu`  s
   zOps.backprop_gelur<  	thresholdc              	   C   sP   || j | j d| j |  }| j ||k||}|r&||d d < |S |S Nr4   )r8   r  logr?  r   )r?   r   r  r:  r  r   rA   rA   rB   mishj  s   $zOps.mishc                 C   s   |j |j krd|j  d|j  }t|t|}||k }|| }|| }	d|d  }
|
d|d|  7 }
|
|d| 7 }
|
||d| d  7 }
||d }||9 }|d7 }|	|||
 |d   }|rl|}n||}|||< |S )	N!arrays have incompatible shapes:  and g      @r4   r3   r  rv  r   )r   rQ   r+   r?  copy)r?   r   r   r  r:  msgr8   rb   XsubdYsubomegadeltadXsubr   rA   rA   rB   backprop_mishu  s(   
zOps.backprop_mishH.?emaweightsr   	max_decayc                 C   s4   d| d|  }||kr|}|d| ||  8 }d S )Nr4   g      $@r.   rA   )r?   r  r  r   r  decayrA   rA   rB   update_averages  s   zOps.update_averagesgradientmom1mom2beta1beta2eps
learn_ratemod_ratec
           
      C   s~   t || t || t || ||9 }||9 }||d|  7 }||| d|  7 }||||	| j| |   8 }||||fS r  )_check_compatible_shaper8   r  )
r?   r  r  r  r  r  r  r  r  r  rA   rA   rB   adam  s   


 zOps.adamc                 C   s,   t |}|j|}||kr||| 9 }|S r>   )r+   linalgnorm)r?   r  r  r8   	grad_normrA   rA   rB   clip_gradient  s
   zOps.clip_gradienty_truey_predc                 C   s:   | j |d }|| d| | j d| d   }| S )Ng:0yE>r.   )r8   r  )r?   r  r  log_yplossrA   rA   rB   logloss  s   $zOps.loglossc                 C   s   | j |jd |jd dd}d}t|D ]5\}}|dk r#td| || |jd kr0td|rE||||  jdd||< ||7 }qd||< q|S 	Nr   r.   Fr   'all sequence lengths must be >= 0, got )lengths must sum up to the number of rowsrb  r   )r   r   r   rQ   
IndexErrorr   r?   r   r   r   startr`   r   rA   rA   rB   rR    s   

zOps.reduce_sumc                 C   s   |j dkr| d|jd |fS | j|dkstd| j|jd d dd}d|d< | |dd < |d |jd krAtd||d d  |fS )Nr   r.    all sequence lengths must be > 0Fr  r:   r  )	rU   r   r   r8   rw   rQ   r   r   r  )r?   r   r   starts_endsrA   rA   rB   reduce_first  s   
zOps.reduce_firstc                 C   sn   |j dkr| d|jd |fS | j|dkstd| d }|d d |jd kr1td|| |fS )Nr   r.   r  r:   r  )rU   r   r   r8   rw   rQ   r   r  )r?   r   r   lastsrA   rA   rB   reduce_last  s   
zOps.reduce_lastc                 C   s   | j |jd |jd dd}d}t|D ]5\}}|dk r#td| || |jd kr0td|rA||||  jdd||< nd||< ||7 }q|S r  )r   r   r   rQ   r  meanr  rA   rA   rB   reduce_mean  s   
zOps.reduce_meanc                 C   s   | j |jd |jd |jdd}| j|jd |jd dd}d}t|D ]>\}}|dkr3td| || |jd kr@td|r^||||  jdd||< ||||  jdd||< ||7 }q$||fS )	Nr   r.   Fr   r  &all sequence lengths must be > 0, got r  rb  )	r   r   rK   r  r   rQ   r  rc  r   )r?   r   r   r   rd  r  r`   r   rA   rA   rB   
reduce_max  s    
zOps.reduce_maxd_firstsr  c                 C   sl   |j dkr| jd|jd |jddS |j dkrtd| jt|d |jd |jdd}|||d d < |S )Nr   r.   Tr   z starts_ends must not have size 1r:   )rU   r   r   rK   rQ   r[   )r?   r  r  r   rA   rA   rB   backprop_reduce_first	  s   

zOps.backprop_reduce_firstd_lastsr  c                 C   sV   |j dkr| jd|jd |jddS | jt|d d |jd |jdd}|||< |S )Nr   r.   Tr   r:   )rU   r   r   rK   r[   )r?   r  r  r   rA   rA   rB   backprop_reduce_last  s   
zOps.backprop_reduce_lastd_sumsc                 C   sj   | j | |jd |jdd}d}t|D ]\}}|dk r$td| || |||| < ||7 }q|S Nr.   Fr   r   r  r   r   r   rK   r   rQ   )r?   r  r   r   r  r`   r   rA   rA   rB   rQ    s   
zOps.backprop_reduce_sumd_meansc                 C   sn   | j | |jd |jdd}d}t|D ]\}}|dk r$td| || | |||| < ||7 }q|S r  r  )r?   r  r   r   r  r`   r   rA   rA   rB   backprop_reduce_mean+  s   
zOps.backprop_reduce_meand_maxesc                 C   s   | j | |jd |jd}d}t|D ])\}}|dkr#td| | j||||  || d|| d ||7 }q|S )Nr.   rJ   r   r  )r.   r:   )	r   r   r   rK   r   rQ   r8   put_along_axisr  )r?   r  rd  r   r   r  r`   r   rA   rA   rB   backprop_reduce_max7  s   "
zOps.backprop_reduce_maxidsseedc                 C   s.   ddl m} | }| ||j|dd|S )zcHash a sequence of 64-bit keys into a table with 4 32-bit keys, using
        murmurhash3.
        r.   NumpyOpsuint64rJ   )	numpy_opsr  r2  hashrP   )r?   r  r  r  r  rA   rA   rB   r  F  s
   zOps.hashkeysc              	   C   s.   ddl m} | }| |||j|ddS )Nr.   r  r  rJ   )r  r  r   ngramsrP   )r?   rw  r   r  r  rA   rA   rB   r  Q  s
   z
Ops.ngrams'  NDperiodc                 C   s(   ddl m} | }| |||||S )Nr.   r  )r  r  r.  position_encode)r?   r  r  r  r   r  r  rA   rA   rB   r  Y  s   zOps.position_encodetablerb   c                 C   s   || j ddS )Nr.   rb  rU  )r?   r  rb   rA   rA   rB   
gather_addb  r{  zOps.gather_addry   c                 C   s   | j j|||S r>   )r8   addat)r?   r  rb   ry   rA   rA   rB   scatter_adde  r~  zOps.scatter_addc                 C   s@   | j ||d jd}t|D ]\}}|||d|jd f< q|S )z8Maybe don't need this? Just a quicky to get Jax working.r   rJ   N)r   rK   r   r   )r?   r   rO  r   r`   r   rA   rA   rB   insert_intoj  s   zOps.insert_into)r9   r:   )NFF)Nr   r   r   )r.   r>   )F)r4   r   r   r4   F)rv  F)r<  F)r  )r4   )r  N)rE   
__module____qualname__r7   str__annotations__rM   r8   r)   r   r[   rC   r/   rH   r-   r
   r   r   boolr(   rj   rz   r]   r   rf   r   r   r   r   r   r   r   r   r   r   r   r   r   r$   r   r%   r   r   r   r    r!   r   r   r   r   r&   r   r   r'   floatr   r   r   r   r   r   r   r   r   r   r   r  r  r"   r  r#   r	  r   r   r   r  r  r   r   r  r  r   r   r  r!  r#  r$  r%  r&  r'  r0   r  r+  r-  r.  r/  r0  r   r2  r3  r4  r5  rP   r   r   rA  rD  rB  r2   rF  rN  rT  rV  rX  r   r]  r_  r`  re  rh  ri  rj  ro  ru  rz  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rR  r  r  r  r  r  r  rQ  r  r  r  r  r  r  r  r  rA   rA   rA   rB   r5   <   sf  
 


6

*	


			
$$  
!	*


	




	


"
"





	
	


	
	















	




	 	
""
"			r5   rY  c_inith_initr   r   r=   c           $   
   C   s  t | }|j\}}}|j\}	}
|d }tt|j|||jd |d fdd}tt|j|||jd |fdd}tt|j|||jd |fdd}tt|j||fdd}tt|j||fdd}g }d}|D ]}|||| f ||7 }qfd}|}t|D ]<}|jd }
t|D ]}|||f d|f}|||f d|f}t	| |||
|\}}t
|\}}}|||f  |||j7  < |||f  |7  < |dkr|nt|D ]\}}|d ||  }|d ||  }|||||f }|d |jd  }||||j7 }tt|d|df}t|d d d d df }t|d d d d df }t|d d d d df } ||d d d d df }!|| }"|"||! 7 }"|||| |!fdd|fd	d|d f}t||| jd  }||"|  |||||f< ||||||f< |"|||||f< |"}|||||f }qqtt|| d
|	df}#|dkr||#}#|#}q~|#||||ffS )Nr      r   rJ   r.   r:   r   rP  )r   r   r.   r   )r+   r   r   r   r   r   r\   r   r  _split_weights_transpose_weightsr   r   reversedr   rA  r  hstackr   minr8  )$rY  r  r  r   r   r8   depthdirsr   r  nI
batch_sizeGr   CYt2Ct2rb   r  params_iorig_Xr`   dlayer_paramsWxWhbiasendGt3Gt3_hfhihohcCt3HrA   rA   rB   r]    sn   
&""

	 

r]  r   r^  c           .         s  t ||\}}}|j\}} }	|jd }
|d }ttj |	f|jd}ttj |	d f|jd}ttj|jd f|jd}d}g }t|D ]/}|g  |dkrZ|
n|	| }t|D ]}t	|||	||\}}t
|}|d ||f qbqMd}g }t|D ]/}|g  |dkr|
n|	| }t|D ]}t	|||	||\}}t
|}|d ||f qqg }d}|D ]}|||| f ||7 }q|g fddt|d D  }fdd|D }tt|D ]}tt|  ||	fd	}|| }|| }|d
kr|}t|D ]}|| | d \}}}|| | d \} }!}"|dkr?|d \}#}$|d d }%|%  n|d \}#}$|dd  }%|%D ]]\}&}'t|'|& |$|# }(t|||#|#|( f ||#|#|(  ||||#|#|( f ||||#|#|( f ||||&|&|( f \})}*|||&|&|( f  |)| 7  < |*||&|&|( < |&}#|'}$qM| |j| 7 } |!|j||f  7 }!|"|jdd7 }"||| 7 }q|} q|jd |jd ksJ g }+|D ]}|D ]\},}-|+t|, qq||+fS )Nr.   r   rJ   r  r:   c                    s*   g | ]}t t| d  dfqS )r   r:   )r   r   r   r  r|   )r  r   rA   rB   rp     s    z!backprop_lstm.<locals>.<listcomp>c                    s,   g | ]} j |jd  |jd f|jdqS )r   r.   rJ   )r   r   rK   )rm   r   )r8   rA   rB   rp     s   , r   r   rb  )r+   r   r   r   r   rK   r   r   r\   r  r  r  r   r  r   r8  r   r  backprop_lstm_gatesr   r   _untranspose_unsplit_weightsr   ).r   r   rY  r^  r  r   r   r  r  r   r  r  dCdGra  r#  all_layer_paramsr`   n_inputsr%  r&  all_layer_gradslayer_gradsrb   r  rO  dXsr   r   r'  r(  r)  dWxdWhd_biasstart_t3end_t3layer_indicesstart_t2end_t2rU   dGt3dCt2
grad_parts	dir_gradsr   rA   )r  r   r8   rB   r`    s   








 r`  r`   r   r  r#  c                 C   s   d| | }d| }d| | }d| }| |||   d| |f}	||7 }| |||   d| f}
||7 }| |||   d| |f}||7 }| |||   d| f}||7 }|	|
f||ff|fS )Nr  r  )rY  r`   r   r  r#  Wx_sizebx_sizeWh_sizebh_sizer'  bxr(  bhrA   rA   rB   r  E  s   r  c                 C   s   | \\}}\}}t |}|dd|jd f}|dd|jd f}|ddd}|dd|jd f}|dd|jd f}|ddd}|j}||}||}||| }|||fS )Nr  r:   r   )r  r:   r.   r   r:   )r+   r  r   r   r8  )rY  r'  rL  r(  rM  r8   ascontigr)  rA   rA   rB   r  U  s   
r  c                 C   s   | \}}}t |}|jd }|jd }|dd|fdd|f}|dd|fdd|f}|ddd}|j|jdd	}|| || |fS )
Nr.   r:   r  r   )r:   r  rN  rO  r   rJ   )r+   r   r  r   r   r   ravel)rY  r'  r(  r)  r8   r   r  r   rA   rA   rB   r4  g  s   


  r4  dYt3dCt3r+  r1  r"  c                 C   sD  t | }|j|ddd\}}}}	|jd |jd   kr,|jd   kr,|	jd ks/J  J |jd | jd   krS|jd   krS|jd   krS|jd ksVJ  J ||}
| |
 }| | }||t|
 7 }||	 }|| }|| }|| }|t|	 }|t| }|t| }|t| }|j||||fdd}||fS )Nr  r:   rb  r   )r+   r   r   r  rF  rB  r   )rR  rS  r+  r1  r"  r8   r-  r.  r/  r0  tanhCt3d_ho	d_tanhCt3d_hid_hcd_hfrE  d_At3_hcd_At3_hod_At3_hid_At3_hfdAt3rA   rA   rB   r3  s  s$   >N
r3  c                 C   s*   t | }|| dd} dd||    S )Nr;  r<  r4   )r+   r>  r?  )r   r   r8   rA   rA   rB   rA    s   rA  r   c                 C   s   | d|   S r  rA   r   rA   rA   rB   rB    r"  rB  c                 C   s   d| d  S )Nr.   r   rA   r_  rA   rA   rB   rF    r"  rF  opsc                 C   s   dd|  t|   S )z6Gaussian CDF for distribution with mean 0 and stdev 1.r  r4   )r  	INV_SQRT2r`  r   rA   rA   rB   r    s   r  c                 C   s   t | jd| |  S )z6Gaussian PDF for distribution with mean 0 and stdev 1.g      )INV_SQRT_2PIr8   r?  rb  rA   rA   rB   r    s   r  uvc                 C   s,   | j |j krd| j  d|j  }t|d S )Nr  r  )r   rQ   )rd  re  r  rA   rA   rB   r    s   r  r>   )Hrg   mathtypingr   r   r   r   r   r   r   r	   r
   r   r   rM   typesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   utilr+   r,   r-   rH   r/   r0   r2   r  pir  ra  rc  r5   r]  r`  r[   r  r  r4  r3  rA  rB  rF  r  r  r  rA   rA   rA   rB   <module>   sx    4|          @&

Gf


