o
    Z۷iCj                     @   s&  d dl mZ d dlZd dlmZmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZ edg d	Zd
d Zdd Zdd Zdd Zdd Zdd ZG dd deZedZG dd deZG dd deZdd Zdd  ZG d!d" d"eZG d#d$ d$eZ G d%d& d&eZ!G d'd( d(e!Z"dS ))    )
namedtupleN)Constant	IRBuilder)ir)typescgutils)global_compiler_lock)make_library_cache	NullCache_wrapper_infolibraryenvnamec              	   C   s  |  }|j |||	j|	j|\}}|j|jddC\}}| || W d    n1 s-w   Y  | |
 }|j ||
| |
| W d    n1 sPw   Y  W d    n1 s_w   Y  t	||D ]\}}|
||||j| qi|
||||j| |jS NTlikely)	call_convcall_functionreturn_typeargsif_elseis_ok
gil_ensureraise_errorgil_releasezipstoreaddloadstepcode)r   r   contextfuncbuilderarraysoutoffsetsstore_offset	signaturepyapir   elemsstatusretvalif_okif_errorgiloffary r3   M/home/ubuntu/vllm_env/lib/python3.10/site-packages/numba/np/ufunc/wrappers.py_build_ufunc_loop_body   s,   

	r5   c                 C   s   |  }t jgt|	j }|jdd |j||t j||\}}|D ]}|| q"W d    n1 s4w   Y  || t||D ]\}}|	|
|||j| qB|	|
|||j| |jS )NT)keep_new)r   pyobjectlenr   err_pushr   r   decrefr   r   r   r   r    r!   )r   r   r"   r#   r$   r%   r&   r'   r(   r)   r   r*   r+   _objargsr,   r-   elemr1   r2   r3   r3   r4   _build_ufunc_loop_body_objmode,   s$   

r=   c
                    s@    fdd}
fdd}t |
|| | |||	dS )Nc                     s   fddt  D } | S )Nc                        g | ]\}}|  |qS r3   load_directr   .0r1   r2   r$   r3   r4   
<listcomp>O       z6build_slow_loop_body.<locals>.load.<locals>.<listcomp>)r   r+   )r%   r$   r'   r3   r4   r   N   s   
z"build_slow_loop_body.<locals>.loadc                    s    |   d S N)store_directr   r-   )r$   r&   r(   r3   r4   r   S   s   z#build_slow_loop_body.<locals>.storer   r5   )r"   r#   r$   r%   r&   r'   r(   r)   r*   r   r   r   r3   )r%   r$   r'   r&   r(   r4   build_slow_loop_bodyL   s   
rL   c                    sb   |  |	}|
||	 fdd}fdd}t||| | |	S )Nc                     s8   fddt  D } fddt | jD } | S )Nc                    r>   r3   r?   rA   rC   r3   r4   rD   b   rE   z5build_obj_loop_body.<locals>.load.<locals>.<listcomp>c                    s   g | ]\}} || qS r3   )from_native_value)rB   vt)env_managerr*   r3   r4   rD   e   s    )r   r   rF   )r%   r$   rP   r'   r*   r)   r3   r4   r   `   s   

z!build_obj_loop_body.<locals>.loadc                    sz   t  | } j|dd& j| }|jd u sJ |j  	|  W d    d S 1 s6w   Y  d S r   )
r   is_not_nullif_thento_native_valuer   cleanuprH   valuer   r:   )r-   r   native)r$   r&   r*   r)   r(   r3   r4   r   i   s   "z"build_obj_loop_body.<locals>.store)get_env_bodyget_env_managerr=   )r"   r#   r$   r%   r&   r'   r(   r)   r*   envptrr   env_bodyr   r   r3   )r%   r$   rP   r'   r&   r*   r)   r(   r4   build_obj_loop_body[   s   	r[   c                    s<    fdd}fdd}t ||| || ||||	|
dS )Nc                     s   fdd D } | S )Nc                    s   g | ]}|  qS r3   )load_aligned)rB   r2   )indr3   r4   rD   ~   s    z6build_fast_loop_body.<locals>.load.<locals>.<listcomp>r3   rF   )r%   r]   r3   r4   r   }   s   
z"build_fast_loop_body.<locals>.loadc                    s    |   d S rG   )store_alignedrI   )r]   r&   r3   r4   r      s   z#build_fast_loop_body.<locals>.storerJ   rK   )r"   r#   r$   r%   r&   r'   r(   r)   r]   r*   r   r   r   r3   )r%   r]   r&   r4   build_fast_loop_body{   s   
r_   c           *      C   s  t |tsJ td}t|}t|}|tj}	t|	}
tt	 ||
|
|g}|
 d}|d}|rK|jtjtjgt|j }n	|j|j|j}tj|||d}|jd t||d|j }|j\}}}}d|_d|_d	|_d
|_t|d}||j}|j}|||j|}|j|dd}g }t|jD ]\}}| t!|||||| qt!||||t||j}g }|"tjd}|D ]} t#$||	}!| |! |%||! qt#$||	}"|%||" t#j&}#|D ]	}$|'|#|$j(}#q|)|}%|r;|%* }&t#j+|||	d t,|||||||"||%|| W d   n	1 s,w   Y  |%-|& |.  n|/|#\}'}(|'1 t#j+|||	d})t0|||||||"||)j1|%|d W d   n	1 sjw   Y  W d   n	1 szw   Y  |(/ t#j+|||	d t2|||||||"||%|d
 W d   n	1 sw   Y  W d   n	1 sw   Y  W d   n	1 sw   Y  |.  ~|3| |4|  t5|||jdS )z
    Wrap the scalar function with a loop that iterates over the arguments

    Returns
    -------
    (library, env, name)
       ufunc_wrapper r   alwaysinlinez
__ufunc__.r   dimsstepsdataentry	loopcountr   intpNrJ   r   )6
isinstancestrr   IntTypePointerTypeget_value_typer   rk   FunctionTypeVoidTypecodegencreate_librarycreate_ir_moduler   get_function_typer7   r8   r   r   Function
attributesr   r   r   append_basic_blockget_env_namefndescenvironmentr   declare_env_globalmodule	enumerateappend	UArrayArgget_constantr   alloca_oncer   true_bitand_is_unit_stridedget_python_apir   	for_ranger[   r   ret_voidr   r_   indexrL   add_ir_moduleadd_linking_libraryr   )*r   r"   fnamer)   objmodecresbyte_t
byte_ptr_tbyte_ptr_ptr_tintp_t
intp_ptr_tfnty
wrapperlibwrapper_module	func_typer#   wrapperarg_argsarg_dims	arg_stepsarg_datar$   envnamer   rY   ri   r%   itypr&   r'   zero_pr(   unit_stridedr2   r*   r0   r   
is_stridedloopr3   r3   r4   build_ufunc_wrapper   s   












r   c                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )r   c                 C   s   || _ || _|| _| j tj|}| j|||g}||}	| j	||	
 | _| j |	}
| j tj|
| _| j||g}| j|| _|d| j| j| _|| _d S )Nz==)r"   r$   fe_typer   r   rk   r   gepget_data_typebitcast
as_pointerdataptrget_abi_sizeofabisizer    icmp_unsignedr   )selfr"   r$   r   rf   r   r   offsetoffseted_args	data_typesizeofoffseted_stepr3   r3   r4   __init__   s"   

zUArrayArg.__init__c                 C   s&   t | j| j|}| j| j| j|S )zm
        Generic load from the given *byteoffset*.  load_aligned() is
        preferred if possible.
        )r   pointer_addr$   r   r"   unpack_valuer   )r   
byteoffsetptrr3   r3   r4   r@     s   zUArrayArg.load_directc                 C   s&   | j | j|g}| j| j | j|S rG   )r$   r   r   r"   r   r   )r   r]   r   r3   r3   r4   r\     s   zUArrayArg.load_alignedc                 C   s,   t | j| j|}| j| j| j|| d S rG   )r   r   r$   r   r"   
pack_valuer   )r   rU   r   r   r3   r3   r4   rH        zUArrayArg.store_directc                 C   s,   | j | j|g}| j| j | j|| d S rG   )r$   r   r   r"   r   r   )r   rU   r]   r   r3   r3   r4   r^     r   zUArrayArg.store_alignedN)__name__
__module____qualname__r   r@   r\   rH   r^   r3   r3   r3   r4   r      s    r   gufc                   @   s   e Zd Zdd Zedd Zedd Zedd Zed	d
 Zedd Z	edd Z
dd Zdd Zdd Zedd Zdd Zdd Zdd ZdS )_GufuncWrapperc                 C   sN   || _ || _|| _|| _| jjtjk| _|rt	| j dnt
 | _t|| _dS )a,  
        The *is_parfors* argument is a boolean that indicates if the GUfunc
        being built is to be used as a ParFors kernel. If True, it disables
        the caching on the wrapper as a separate unit because it will be linked
        into the caller function and cached along with it.
        )py_funcN)r   r   sinsoutr)   r   r   r7   is_objectmodeGufWrapperCacher
   cachebool
is_parfors)r   r   r   r   r   r   r   r3   r3   r4   r   &  s   z_GufuncWrapper.__init__c                 C      | j jS rG   )r   r   r   r3   r3   r4   r   6     z_GufuncWrapper.libraryc                 C   r   rG   )r   target_contextr   r3   r3   r4   r"   :  r   z_GufuncWrapper.contextc                 C   r   rG   )r"   r   r   r3   r3   r4   r   >  r   z_GufuncWrapper.call_convc                 C   r   rG   )r   r)   r   r3   r3   r4   r)   B  r   z_GufuncWrapper.signaturec                 C   r   rG   )r   r{   r   r3   r3   r4   r{   F  r   z_GufuncWrapper.fndescc                 C   r   rG   )r   r|   r   r3   r3   r4   r   J  r   z_GufuncWrapper.envc                 C   sR   t d}t |}t |}| jtj}t |}t t  ||||g}|S )Nr`   )	r   rn   ro   r"   rp   r   rk   rq   rr   )r   r   r   r   r   r   r   r3   r3   r4   _wrapper_function_typeN  s   



z%_GufuncWrapper._wrapper_function_typec           !         s  | j tj}|  }|d}| j| jj	| jj
}| jj}tj|||d}|jd t|||}	d|	_|	j\}
}}}d|
_d|_d|_d|_t|	d	}|j|d
d}| j |}t }| j| jfD ]}|D ]}|t|O }qkqgi }| jD ]}|D ]}||vrt|||< q~qzi }| D ]\}}|||| j tj|d g||< qg }t| jt| j }tt| j j| j| j D ]\}\}}t!| j ||
||||||	}|t|7 }|"| q|d}| #|| t$j%|||d"  fdd|D }| &||||\}} t$'|| | W d   n	1 sw   Y  |(| |)| | *|| |+  |,| |-| j. dS )z
        The LLVM IRBuilder code to create the gufunc wrapper.
        The *library* arg is the CodeLibrary to which the wrapper should
        be added.  The *name* arg is the name of the wrapper function being
        created.
        _gufunc_wrapperrc   rd   weak_odrr   re   rf   rg   rh   ri      z.returnrj   c                    s   g | ]}|  jqS r3   )get_array_at_offsetr   )rB   ar   r3   r4   rD     s    z1_GufuncWrapper._build_wrapper.<locals>.<listcomp>N)/r"   rp   r   rk   r   ru   r   rv   r{   restypeargtypesllvm_func_namer   rw   rx   r   linkager   r   r   ry   r   r   setr   r   r8   itemsr   r   r   r   r)   
GUArrayArgr   gen_prologuer   r   gen_loop_bodycbranch_or_continuebranchposition_at_endgen_epiloguer   r   r   r   )!r   r   r   r   r   r   r   r   r#   r   r   r   r   r   r$   ri   r*   unique_symsgrpsymssym_mapssym_dimr   r%   step_offsetr   symr2   bbreturnr   	innercallerrorr3   r   r4   _build_wrapperY  s|   









z_GufuncWrapper._build_wrapperc                 C   s   | j r| j t| }| || |S | j| jj	| jj
}|d u rA| j t| }|  | || | j| jj	| |S rG   )r   r"   rs   rt   rm   r   r   load_overloadr   r)   r   enable_object_cachingsave_overloadr   wrapper_namer   r3   r3   r4   _compile_wrapper  s   z_GufuncWrapper._compile_wrapperc                 C   s&   d| j j }| |}t|| j|dS )Nz__gufunc__.r   )r{   mangled_namer   r   r   r   r3   r3   r4   build  s
   
z_GufuncWrapper.buildc                 C   s   | j ||| jj| jj|\}}|j|jdd | }| jj 	||| |
| W d    n1 s5w   Y  |j|jfS )NFr   )r   r   r)   r   r   rR   is_errorr   r"   r   r   r!   )r   r$   r*   r#   r   r,   r-   r0   r3   r3   r4   r     s   z_GufuncWrapper.gen_loop_bodyc                 C      d S rG   r3   r   r$   r*   r3   r3   r4   r        z_GufuncWrapper.gen_prologuec                 C   r   rG   r3   r   r3   r3   r4   r     r   z_GufuncWrapper.gen_epilogueN)r   r   r   r   propertyr   r"   r   r)   r{   r   r   r   r   r   r   r   r   r   r3   r3   r3   r4   r   %  s,    





T
r   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )_GufuncObjectWrapperc                 C   s"   t | j|||| j|\}}||fS rG   )_prepare_call_to_object_moder"   r)   )r   r$   r*   r#   r   r   r   r3   r3   r4   r     s   z"_GufuncObjectWrapper.gen_loop_bodyc                 C   s   |  | _d S rG   )r   r0   r   r3   r3   r4   r     s   z!_GufuncObjectWrapper.gen_prologuec                 C   s   | | j d S rG   )r   r0   r   r3   r3   r4   r     s   z!_GufuncObjectWrapper.gen_epilogueN)r   r   r   r   r   r   r3   r3   r3   r4   r     s    r   c                 C   s2   |j }|jtjkrtnt}|| |||||d S )N)r   )r)   r   r   r7   r   r   r   )r   r   r   r   r   r   r)   wrapclsr3   r3   r4   build_gufunc_wrapper  s   r   c           &   
   C   sX  |j }|d}| tj}| tj}	t|	}
| tj}| tj	}t
|||
|
|||g}t||d}tj|tddd}|tj| g }g }tt||jD ]\}\}}tj||dd}|| t|tjr| |}|| ||d}t|d	}t||j}||d
||g}||d||g}||j|}tt |j}t||j!}t||j"}|#|||||||g} n|$||} || | ||  t%|| }!||!| t&||!| qVtj	gt'| }"| j()||tj	|"|\}#}$||#j*| |+|$ |,| |-| |D ]}|+|.| q|#j/}%|%|.|fS )Nzufunc.core.returnnumba_ndarray_newr   r   rc   T)zfill)rU   r   shapestrides)0r~   ry   rp   r   int32rk   r   ro   voidptrr7   rq   r   get_or_insert_functionr   rn   r   r   r   r   r   r   rl   Array
make_arrayr   ndimr   _get_ptr_by_namer   rg   npdtyperm   numitemsizecallrM   is_nullr   r8   r   r   r   r:   r   r   r   r!   )&r"   r$   r*   r#   r)   r   modbb_core_returnll_intll_intpll_intp_ptr
ll_voidptrll_pyobjr   fn_array_newerror_pointerobject_argsobject_pointersr   argargtyobjptraryclsarrayr   ndre   r  rg   r  type_numr  objobj_is_null
object_sigr,   r-   r   r3   r3   r4   r     sp   











r   c                   @   s   e Zd Zdd Zdd ZdS )r   c
                    sR  || _ || _|tj|}
|j|j||
gdddd}|| _|j||
gdd}||}t|tj	r| }t
||jkrQt
|dkrH|jdkrHn	td|d |j} fdd	|D }g }t|D ]}|j||tj|| gd
d}||}|| qc|rtnt}||j|||||d| _d S |rtd||d t||d| _d S )Nzdata.ptrrc   rg   zcore.step.ptrr   r   z.type and shape signature mismatch for arg #{0}c                    s   g | ]} | qS r3   r3   )rB   r   r   r3   r4   rD   {  s    z'GUArrayArg.__init__.<locals>.<listcomp>zstep.ptrr  r	  	core_step	as_scalarr  r  z2scalar type {0} given for non scalar argument #{1}r  stride)r"   r$   r   r   rk   r   r   rg   rl   r  r8   r	  	TypeErrorformatranger   _ArrayAsScalarArgLoader_ArrayArgLoaderr  _loader_ScalarArgLoader)r   r"   r$   r   rf   r   r   r   r   r   r   rg   core_step_ptrr(  r)  r	  r  r  jstepptrr    ldclsr3   r&  r4   r   \  sZ   


zGUArrayArg.__init__c                 C   s   | j j| j| j| j|dS )N)r"   r$   rg   r]   )r1  r   r"   r$   rg   )r   r]   r3   r3   r4   r     s   zGUArrayArg.get_array_at_offsetN)r   r   r   r   r   r3   r3   r3   r4   r   [  s    ;r   c                   @   s    e Zd ZdZdd Zdd ZdS )r2  z
    Handle GFunc argument loading where a scalar type is used in the core
    function.
    Note: It still has a stride because the input to the gufunc can be an array
          for this argument.
    c                 C   s   || _ || _d S rG   r*  )r   r  r+  r3   r3   r4   r     s   
z_ScalarArgLoader.__init__c                 C   s:   | |||| jg}|||| j }||S rG   )r   mulr+  r   r   r  r   r   )r   r"   r$   rg   r]   dptrr3   r3   r4   r     s
   
z_ScalarArgLoader.loadN)r   r   r   __doc__r   r   r3   r3   r3   r4   r2    s    r2  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	r0  zD
    Handle GUFunc argument loading where an array is expected.
    c                 C   s(   || _ || _|| _|| _|| _|| _d S rG   r'  )r   r  r	  r(  r)  r  r  r3   r3   r4   r     s   
z_ArrayArgLoader.__init__c              	   C   s   t j| j| jdd}||}|||}t|||| j|}| 	||\}	}
|
|| j}|j||||jj|	|
|t j|d d | S )NA)r  r	  layout)rg   r  r  r  meminfo)r   r  r  r	  r  r   r   r7  r(  _shape_and_stridesr   r   populate_arrayr   rg   typer   rk   	_getvalue)r   r"   r$   rg   r]   arytypr  r   offseted_datar  r  r  r3   r3   r4   r     s.   

	z_ArrayArgLoader.loadc                 C   s$   t || j}t || j}||fS rG   )r   
pack_arrayr  r  )r   r"   r$   r  r  r3   r3   r4   r=    s   z"_ArrayArgLoader._shape_and_stridesN)r   r   r   r9  r   r   r=  r3   r3   r3   r4   r0    s
    r0  c                   @   s   e Zd ZdZdd ZdS )r/  z
    Handle GUFunc argument loading where the shape signature specifies
    a scalar "()" but a 1D array is used for the type of the core function.
    c                 C   s@   | tjd}| tjd}t||g}t||g}||fS )Nr   r   )r   r   rk   r   rC  )r   r"   r$   oner   r  r  r3   r3   r4   r=    s
   z*_ArrayAsScalarArgLoader._shape_and_stridesN)r   r   r   r9  r=  r3   r3   r3   r4   r/    s    r/  )#collectionsr   numpyr  llvmlite.irr   r   llvmliter   
numba.corer   r   numba.core.compiler_lockr   numba.core.cachingr	   r
   r   r5   r=   rL   r[   r_   r   objectr   r   r   r   r   r   r   r2  r0  r/  r3   r3   r3   r4   <module>   s2      o) <
`A+