o
    ԰i,j                  	   @   sx  d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	m
Z
mZmZmZ ddlZddlZddlmZmZ zddlZW n eyI   dZY nw g dZeZdaeeZdejdd	d
ejfddZdeeejd	f de	eejf de	eejf d
efddZdedejde d
ed	 fddZ!d"dejded
d	fddZ"d#ded d
efddZ#dd Z$e%e$ G d d! d!Z&dS )$a  Wrapper for cuDNN frontend to improve user experience.

This wrapper provides a more user-friendly interface for cuDNN frontend.
It allows users to create a graph, add operations to the graph, and then
compile the graph to a cuDNN plan. This wrapper is designed to avoid
boilerplate code.

Key Features:
    - Automatic graph validation and compilation
    - Simplified tensor management
    - Support for both named and positional tensor inputs
    - Automatic workspace management
    - PyTorch integration with DLPack support

Example:
    >>> x = torch.randn(8, 56, 56, 64, device=device, dtype=torch.float16).permute(0, 3, 1, 2)
    >>> w = torch.randn(32, 3, 3, 64, device=device, dtype=torch.float16).permute(0, 3, 1, 2)
    >>> with Graph() as graph:
    ...     y = graph.conv_fprop(
    ...         image=x, weight=w,
    ...         padding=[1,1], stride=[1,1], dilation=[1,1],
    ...         compute_data_type=data_type.FLOAT,
    ...         name="conv2d",
    ...     )
    ...     y.set_output(True).set_data_type(data_type.HALF)
    ...     # Graph is automatically validated and compiled on exit
    >>> graph.set_io_tuples(["conv2d::image", "conv2d::weight"], ["conv2d::Y"])
    >>> # Execute the graph
    >>> output = graph(x, w)
    )OrderedDictN)AnyDictListOptionalTupleUnion)	data_type	heur_mode)Graphr	   r
   cudnngraphtensortorch.Tensorreturnc                 C   s(   t |dr|jr| | S | |S )a  Create a tensor in the graph object.

    Args:
        graph: The cuDNN graph object to create the tensor in
        tensor: The dlpack tensor to create a graph tensor from

    Returns:
        A cuDNN tensor object representing the input tensor in the graph

    Note:
        If the input tensor has requires_grad=True, it will be detached
        before creating the graph tensor to avoid gradient tracking issues.
    requires_grad)hasattrr   tensor_likedetach)r   r    r   A/home/ubuntu/.local/lib/python3.10/site-packages/cudnn/wrapper.py_graph_tensor:   s   
r   
tensor_map
dlpack_mapc                 C   s  t | tr"| |v r| S | D ]\}}| | kr|  S qtdt | tr>| D ]\}}| | kr9|  S q+tdt | tjrY| D ]\}}| |u rT|  S qHtdt| drt |t	rt
| |v r|t
|  } | D ]\}}|| kr|  S qstd)a7  Find the mapping name for a tensor used in a graph.

    This function searches for a tensor in the tensor map and returns its
    corresponding name. The tensor can be specified in multiple ways:
    - As a string (either the assigned tensor name or the node::input_name)
    - As a cuDNN tensor object
    - As a DLPack-compatible tensor (e.g., PyTorch tensor) that was used in creating the graph

    Args:
        tensor: The tensor to find, can be a string name, cuDNN tensor, or DLPack tensor
        tensor_map: Dictionary mapping tensor names to cuDNN tensor objects
        dlpack_map: Dictionary mapping DLPack tensor IDs to cuDNN tensor objects

    Returns:
        The key in tensor_map that the provided tensor is mapped to

    Raises:
        ValueError: If the tensor cannot be found in the tensor map
    
__dlpack__zInput not found in tensor map)
isinstancestritemsget_nameintget_uidr   r   r   dictid
ValueError)r   r   r   tensor_nametensor_valuer   r   r   _find_tensorO   s6   

 r&   namearg_dictc                 C   sf   | |v r||  S ||v r|| S z||   W S  ty    Y nw z||  W S  ty2   Y dS w )a3  Extract a dlpack tensor from the arg_dict that matches the provided name or cudnn tensor

    Args:
        name: The name of the tensor to extract
        tensor: The cudnn tensor to extract
        arg_dict: The dictionary of arguments to extract the tensor from

    Returns:
        A dlpack tensor
    N)r   KeyErrorr    )r'   r   r(   r   r   r   _extract_tensor   s   r*   pytcudnn_tensortensor_typec                 C   sz   |dkrt dtj stdtj|  }|du r&td|   tj	| 
 d|d}t|| 
 |  }|S )a0  Create a tensor like the provided cudnn tensor

    Args:
        cudnn_tensor: The cuDNN tensor to create a dlpack tensor from
        tensor_type: The type of tensor to create, currently only "pyt" is supported

    Returns:
        A dlpack tensor allocated that is like the provided cuDNN tensor
    r+   z(Only PyTorch tensor is supported for nowPyTorch is not availableNz0cuDNN uses an unsupported data type in PyTorch: cudadevicedtype)NotImplementedErrorr   	datatypesis_torch_availableRuntimeError_cudnn_to_torch_data_typeget_data_type	TypeErrortorchemptyget_dim
as_strided
get_stride)r,   r-   r2   r   r   r   r   _tensor_like   s   

r?   streamztorch.cuda.Streamc                 C   sF   t du rtdtdu rt a| du rt j j} tjt| d tS )z>Get the default cuDNN handle and set to torch's current streamNr.   )handler@   )	r:   r6   _default_cudnn_handler   create_handler/   current_streamcuda_stream
set_stream)r@   r   r   r   get_default_handle   s   rG   c                   C   s   t d urtt  d S d S N)rB   r   destroy_handler   r   r   r   destroy_default_handle   s   rJ   c                   @   s*  e Zd ZU dZdZee ed< dddddddee deee	e
dejf   d	eee	e
dejf   d
eee  deddfddZdd Zdd Zdd Zde
defddZdd Zde	ded f fddZdee
df dee
df fddZdee	e
dejf  d	ee	e
dejf  ddfd d!ZdS )"r   a  Wrapper object for cuDNN computation graph

    This class simplifies the process of creating, compiling, and executing
    cuDNN computation graphs. It handles common boilerplate code and provides
    a more Pythonic interface to the cuDNN frontend API.

    Key features:
    - Automatic graph validation and compilation
    - Simplified tensor management with PyTorch integration
    - Support for both named and positional tensor inputs
    - Automatic workspace management

    Note:
        The graph is automatically validated and compiled when exiting the
        context manager. Any errors in graph construction will be raised
        at that point.
    N_Graph__handleT)rA   inputsoutputs
heuristicsworkspace_allocrA   rL   r   rM   rN   rO   r   c                K   s   t d u rtd|rt|ttfstd|r"t|ttfs"td|r/t|ttfs/tdt dk r9td|| _d | _	i | _
t | _t | _g | _i | _t | _d | _d | _|p]g | _|pbg | _|pktjtjg| _|srd| _|rw|| _dD ]}||v rtj|| p|| ||< qyd S )	Nr.   inputs must be a list or tupleoutputs must be a list or tuplez"heuristics must be a list or tuplei@d z*cuDNN version 9.12.0 or higher is requiredF)io_data_typeintermediate_data_typecompute_data_type)r:   r6   r   listtupler#   r   backend_version_Graph__kwargs_Graph__graph_Graph__tensor_mapr   _Graph__tensor_in_Graph__tensor_out_Graph__tensor_unknown_Graph__node_countset_Graph__node_names_Graph__input_tuples_Graph__output_tuples_Graph__inputs_Graph__outputsr
   AFALLBACK_Graph__heuristics_Graph__workspacerK   r4   _torch_to_cudnn_data_type)selfrA   rL   rM   rN   rO   kwargskeyr   r   r   __init__   s@   


zGraph.__init__c                 C   s   d S rH   r   rj   r   r   r   __del__  s   zGraph.__del__c                 C   sB   | j d ur	tdtjdi | jdvrd| jini | j| _ | S )NzGraph already created)autoNrA   r   )rY   r6   r   pygraphrK   rX   rn   r   r   r   	__enter__  s   
zGraph.__enter__c                 C   s   |durt d| d| _ | j  | j  | j| j | j  | j  t	| ds;t
j| j dt
jd| _| jsA| jrI| | j| j | `| `t d| j t d| j t d| j | jS )	a%  Exit the context manager, validating and compiling the graph.

        This method performs the following steps:
        1. Validates the graph structure
        2. Builds the operation graph
        3. Creates execution plans
        4. Checks hardware support
        5. Builds the final plans
        6. Allocates workspace memory

        Raises:
            ValidationError: If graph validation fails
            GraphStateError: If graph operations are performed in invalid order
            CudnnError: For other cuDNN-related errors
        Nz'Exception during graph construction: %s__workspacer/   r0   z
Inputs: %szOutputs: %szNode count: %s)loggererrorrY   validatebuild_operation_graphcreate_execution_plansrg   check_supportbuild_plansr   r:   r;   get_workspace_sizeuint8rh   rc   rd   set_io_tuplesdebugr[   r\   r^   )rj   exc_type	exc_valuetbr   r   r   __exit__  s,   




zGraph.__exit__r'   c                    s|   t j g d}|v r S g d}|v r td dt s' S  fdd}dv r4|S  fdd	}|S )
N)r{    get_workspace_size_plan_at_index	serializedeserializequery_tensor_attributes_of_uid)buildrw   build_plan_at_indexrz   ry   create_execution_planrx   deselect_behavior_notesdeselect_enginesdeselect_numeric_notesdeselect_workspace_greater_thanexecuteexecute_plan_at_indexget_behavior_notes$get_behavior_notes_for_plan_at_indexget_engine_countget_execution_plan_countget_knobs_for_engineget_plan_name_at_indexrl   populate_cuda_graphr   select_behavior_notesselect_numeric_notesupdate_cuda_graphrv   zCalling z via wrapper is not allowedc                     s    | i |}j | |S rH   )r]   append)argsrk   output)attrrj   r   r   tensor_capturem  s   z)Graph.__getattr__.<locals>.tensor_capture)r   r   c            
         s  t | } jvrdj< j  d7  < d|v r |d }n dj d  }||d< |jv r=td| dj| t| D ]4\}}t|drkt|}|jvrbt	j
|j|< j|  }| |< t|tjr{|j| d| < qG| D ]4\}}t|drt|}|jvrt	j
|j|< j|  }||< t|tjr|j| d| < q | i |}t|tjr|g}n	t|t tfr|}t|D ]$\}}t|tjrt|d	r| r| }	n| d| }	|j|	< q|S )
Nr      r'   .z
Node name z already usedr   z::r   )rU   r^   r`   r#   add	enumerater   r"   rZ   r   rY   r   r   r   r[   r   rV   r   r\   )
r   rk   	node_nameiobjobj_idrl   r   output_listr$   r   r'   rj   r   r   wrapperv  sT   









z"Graph.__getattr__.<locals>.wrapper)getattrrY   r6   inspectismethod)rj   r'   pass_throughblocked_methodsr   r   r   r   r   __getattr__;  s   
/zGraph.__getattr__c                 O   s   | j du r	td| j  stdt|dkr*t|d tr*| j|d fi |S t|dkr7| js7tdt|t| jkrPtdt| dt| j d	| j	|i |S )
z"Execute the graph with tensor dictNzGraph not createdz8You should not invoke the graph before the context exitsr   r   zXYou should not invoke the graph with positional arguments before running set_io_tuples()zNumber of arguments (z#) does not match number of inputs ())
rY   r6   r   lenr   r!   _Graph__call_with_tensor_dictra   r#   !_Graph__call_with_positional_args)rj   r   rk   r   r   r   __call__  s   

 zGraph.__call__)r   .c                 O   s   i }t | j|D ]
\}}||| < qdd | jD }t | j|D ]
\}}||| < q!t|}d|vrN| jdkr?t |d< n| jdurJ| j|d< ntdd|vr`| jdu r[td	| j|d< | j	j
|fi | t|d
krt|d S |S )a?  Execute the graph with positional arguments.

        Args:
            *args: Positional arguments to pass to the graph
            **kwargs: Additional keyword arguments to pass to the graph execution

        Returns:
            A single tensor or a tuple of tensors

        Note:
            This method is called by __call__() when the graph is executed with positional arguments.
            It is not intended to be called directly by the user. The `args` should be a list of dlpack tensors
            that matches the input order of `self.__input_tuples`.
        c                 S   s   g | ]}t |d qS r+   )r?   ).0r,   r   r   r   
<listcomp>  s    z5Graph.__call_with_positional_args.<locals>.<listcomp>rA   rp   N-Need to specify cudnn handle to execute graph	workspaceF*Need to specify workspace to execute graphr   r   )zipra   r    rb   r!   rK   rG   r6   rh   rY   r   r   )rj   r   rk   variant_packr,   user_tensoroutput_tupler   r   r   __call_with_positional_args  s*   



z!Graph.__call_with_positional_argstensor_dictc           
      K   s  	 i }i }t | j | j D ]1\}}| |v s| r qt|||}|du r/|||< qt|ds<t	d| d||| < qg }g }	| D ](\}}| |v rVqK|| jv rnt
|d || < ||< |	| qK|| qK|r}t	d| |	rtd|	 t|}d|vr| jd	krt |d< n| jdur| j|d< nt	d
d|vr| jdu rt	d| j|d< | jj|fi | |S )a  Execute the graph with a dictionary of tensors.

        Args:
            tensor_dict: Dictionary of tensor names to tensors
            **kwargs: Additional keyword arguments to pass to the graph execution

        Returns:
            Dictionary of tensor names to tensors

        Raises:
            RuntimeError: If a non-virtual tensor in the graph is not found in
            `tensor_dict`, or the tensor in `tensor_dict` is not a dlpack tensor
        Nr   zTensor z# is not provided as a dlpack tensorr+   z5Non-virtual input tensors not found in variant pack: zAdded output tensors: %srA   rp   r   r   Fr   )	itertoolschainr[   r   r\   r    get_is_virtualr*   r   r6   r?   r   rt   r~   r!   rK   rG   rh   rY   r   )
rj   r   rk   r   missing_tensorsr'   r   r   missing_inputsmissing_outputsr   r   r   __call_with_tensor_dict  sN    





zGraph.__call_with_tensor_dictc           	   
   C   s\  t |ttfstdt |ttfstd| j s#|| _|| _dS g }t }t	|D ]O\}}z8|| j
v r=|| W q,t|| j| j}| j| }t||v r[td| d| d|t| || W q, ty{   td| d| ddw g }t	|D ]O\}}z8|| j
v r|| W qt|| j| j}| j| }t||v rtd| d| d|t| || W q ty   td| d| ddw t	|D ]\}}| rtd| d	q| j D ]\}}| s||vrtd
| dq| j D ]\}}| s t||vr td| dqt|| _t|| _dS )a4  Set order of input and output tensors to allow graph to be executed with positional arguments.

        Args:
            inputs: List of input tensors or names
            outputs: List of output tensors or names

        Raises:
            ValueError: If inputs or outputs are not lists or tuples
        rP   rQ   NzInput at index z (z) is a duplicatez) not found in tensor mapzOutput at index z is a virtual tensorzNode output z4 is a non-virtual tensor but not specified as outputzNode input z= is a non-virtual tensor but not specified as input or output)r   rU   rV   r#   rY   r   rc   rd   r_   r   r]   r   r&   r[   rZ   r"   r   r\   r   r   ra   rb   )	rj   rL   rM   input_tensorstensors_foundr   r'   r   output_tensorsr   r   r   r}   +  sj   







zGraph.set_io_tuples)__name__
__module____qualname____doc__rK   r   CudnnHandle__annotations__r   r   r   r   r   r
   boolrm   ro   rr   r   r   r   r   r   r   r   r   r}   r   r   r   r   r      sN   
 
	
*
,l-


Hr   r   rH   )'r   collectionsr   atexitr   r   loggingtypingr   r   r   r   r   r   r   cudnn.datatypesr	   r
   r:   ImportError__all__r   r   rB   	getLoggerr   rt   rq   r   r   r   r&   r!   r*   r?   rG   rJ   registerr   r   r   r   r   <module>   sF     

 0
