o
    i *                     @   sV  U d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ ddl
mZ ddl
mZ dd	lmZ ddlZdd
lmZ eejedk rRedddlZddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  dddddZ!e"e#ef e$d< eeejj%edkre!d= de!d< dZ&dZ'ej(dd Z)G dd  d Z*G d!d" d"Z+	d@d#ej,j-d$e"e#ef dB fd%d&Z.	'dAd(e+d)ej/j0d*e#fd+d,Z1d)ej/j0fd-d.Z2ej(	'dAd)ej/j0ed/ef B d*e#fd0d1Z3G d2d3 d3Z4G d4d5 d5Z5d)ej/j0d6ej/j0fd7d8Z6d@d)ej/j0d9e#d:e#dB fd;d<Z7d=e#d:e#dB fd>d?Z8dS )B
    N)
ContextVar)BytesIO)Path)Any)Callable)cast)patch)versionz2.8z"ZeroGPU AoTI reuqires PyTorch 2.8+)!unwrap_tensor_subclass_parameters)package_aoti)AOTICompiledModel)Weights   )register_cleanupFT)z$aot_inductor.package_constants_in_so&aot_inductor.package_constants_on_diskzaot_inductor.packagealways_keep_tensor_constantsINDUCTOR_CONFIGS_OVERRIDESz2.10r   pickle_weightsz-aot_inductor.package_constants_on_disk_formatz2/tmp/*/archive/data/aotinductor/model/*.wrapper.sozpackage.pt2c                  c   s    t  } td|  d}dd | D }dV  | D ] }|j|vr=|  }tr=t|jdd  }t	| |  S qdS )z
    PyTorch already cleans-up extracted archives in /tmp
    But the GPU worker never terminates gracefully in ZeroGPU so cleanup must be done manually
    z/proc/z
/map_filesc                 S   s   h | ]}|j qS  )name).0fr   r   J/home/ubuntu/.local/lib/python3.10/site-packages/spaces/zero/torch/aoti.py	<setcomp>4   s    z)_register_aoti_cleanup.<locals>.<setcomp>N   )
osgetpidr   iterdirr   readlinkmatchARCHIVE_SO_PATTERNpartsr   )pid	map_filesmaps_beforemap_filemappedpackage_pathr   r   r   _register_aoti_cleanup,   s   
r)   c                   @   s2   e Zd Zd	deeejf defddZdd Z	dS )
ZeroGPUWeightsFconstants_mapto_cudac                 C   s&   |rdd |  D | _d S || _d S )Nc                 S   s   i | ]
\}}|| d qS )cuda)tor   r   tensorr   r   r   
<dictcomp>@       z+ZeroGPUWeights.__init__.<locals>.<dictcomp>)itemsr+   )selfr+   r,   r   r   r   __init__>   s   
zZeroGPUWeights.__init__c                 C   sL   i }| j  D ]\}}tj|dd }||  ||< qt|dffS )NcpudeviceT)	r+   r3   torch
empty_like
pin_memorycopy_detachshare_memory_r*   )r4   r+   r   r0   tensor_r   r   r   
__reduce__C   s
   zZeroGPUWeights.__reduce__N)F)
__name__
__module____qualname__dictstrr9   Tensorboolr5   r@   r   r   r   r   r*   =   s    r*   c                   @   s2   e Zd ZdejjdefddZdd Zdd Z	d	S )
ZeroGPUCompiledModelarchive_fileweightsc                 C   s   || _ || _tdd d| _d S )Ncompiled_modeldefault)rI   rJ   r   rK   )r4   rI   rJ   r   r   r   r5   L   s   zZeroGPUCompiledModel.__init__c                    s    j   }d u rAt  tj j}W d    n1 sw   Y  tt|} fdd|	 D }|j
|ddd  j | ||i |S )Nc                    s   i | ]	}| j j| qS r   )rJ   r+   r   r   r4   r   r   r1   U   s    z1ZeroGPUCompiledModel.__call__.<locals>.<dictcomp>Tcheck_full_updateuser_managed)rK   getr)   r9   	_inductoraoti_load_packagerI   r   r   get_constant_fqnsload_constantsset)r4   argskwargsrK   constant_mapr   rO   r   __call__P   s   
zZeroGPUCompiledModel.__call__c                 C   s   t | j| jffS N)rH   rI   rJ   rO   r   r   r   r@   Y   s   zZeroGPUCompiledModel.__reduce__N)
rA   rB   rC   r9   typesFileLiker*   r5   r\   r@   r   r   r   r   rH   K   s    	rH   exported_programinductor_configsc           	         s   i |pi t }ttjj|  }| jd usJ | j\}}tjj||||d}tt	t
tB  |}t }dd |D }t|t	| dd |D \ tt  t fdd D }t||S )N)optionsc                 s       | ]
}t |tr|V  qd S r]   )
isinstancerE   )r   filer   r   r   	<genexpr>h       zaoti_compile.<locals>.<genexpr>c                 s   rc   r]   )rd   r   )r   artifactr   r   r   rf   j   rg   c                    s   i | ]
}|  |d  qS )r   )
get_weightrN   rJ   r   r   r1   l   r2   z aoti_compile.<locals>.<dictcomp>)r   r   r9   fxGraphModulemoduleexample_inputsrT   aot_compilelistrE   r   r   r   r*   rH   )	r`   ra   gmrY   rZ   	artifactsrI   fileszerogpu_weightsr   rj   r   aoti_compile]   s   


ru   forwardcompiledrm   call_methodc                 C   s   t |||  t| d S r]   )setattrdrain_module_parameters)rw   rm   rx   r   r   r   
aoti_applyp   s   r{   c                 C   sp   dd |    D }dd |    D }| j|dd | D ]\}}|| }tg jdi ||_q!d S )Nc                 S   s    i | ]\}}||j |jd qS )r8   dtyper|   r/   r   r   r   r1   z   s     z+drain_module_parameters.<locals>.<dictcomp>c              	   S   s(   i | ]\}}|t jt j|d dqS )r6   r7   )r9   nn	Parameterr:   r/   r   r   r   r1   {   s   ( T)assignr   )
state_dictr3   load_state_dictr9   rF   r.   data)rm   state_dict_metar   r   parammetar   r   r   rz   y   s   rz   .c                 #   s    G dd dt  G dd d}| } fdd}tj| ||d1 z|V  W n  yA } z|j|_|j|_W Y d }~nd }~ww W d    d S W d    d S 1 sUw   Y  d S )Nc                       s   e Zd Z fddZ  ZS )z+aoti_capture.<locals>.CapturedCallExceptionc                    s   t    || _|| _d S r]   )superr5   rY   rZ   )r4   rY   rZ   	__class__r   r   r5      s   

z4aoti_capture.<locals>.CapturedCallException.__init__)rA   rB   rC   r5   __classcell__r   r   r   r   CapturedCallException   s    r   c                   @   s   e Zd Zdd ZdS )z"aoti_capture.<locals>.CapturedCallc                 S   s   d| _ i | _d S )Nr   rY   rZ   rO   r   r   r   r5      s   
z+aoti_capture.<locals>.CapturedCall.__init__N)rA   rB   rC   r5   r   r   r   r   CapturedCall   s    r   c                     s    | i |r]   r   r   r   r   r   capture_call   s   z"aoti_capture.<locals>.capture_call)new)	Exceptionr	   objectrY   rZ   )rm   rx   r   captured_callr   er   r   r   aoti_capture   s"   
"r   c                   @   sR   e Zd ZdejjfddZdeeej	f de
fddZdeeej	f fdd	Zd
S )LazyAOTIModelrI   c                 C   s&   || _ tdd d| _tdd d| _d S )NrK   rL   loaded_weights)rI   r   rK   r   )r4   rI   r   r   r   r5      s   zLazyAOTIModel.__init__rJ   rQ   c                 O   s   | j   }d u r.t  tj| j}W d    n1 sw   Y  tt|}| j 	| | j
  }d u s;||urI|j||dd | j
	| ||i |S )NTrP   )rK   rS   r)   r9   rT   rU   rI   r   r   rX   r   rW   )r4   rJ   rQ   rY   rZ   rK   r   r   r   r   r\      s   
zLazyAOTIModel.__call__c                 C   s
   t | |S r]   )LazyAOTIModelWithWeights)r4   rJ   r   r   r   with_weights   s   
zLazyAOTIModel.with_weightsN)rA   rB   rC   r9   r^   r_   r5   rD   rE   rF   rG   r\   r   r   r   r   r   r      s    
r   c                   @   s0   e Zd Zdedeeejf fddZdd Z	dS )r   modelrJ   c                 C   s   || _ || _d| _d S )NT)r   rJ   
first_call)r4   r   rJ   r   r   r   r5      s   
z!LazyAOTIModelWithWeights.__init__c                 O   s(   | j }d| _ | j| j|g|R i |S )NF)r   r   rJ   )r4   rY   rZ   rQ   r   r   r   r\      s   z!LazyAOTIModelWithWeights.__call__N)
rA   rB   rC   r   rD   rE   r9   rF   r5   r\   r   r   r   r   r      s    r   returnc                 C   sJ   t | j}| j |_| j |_| j |_dd | j D |_|S )Nc                 S   s"   i | ]\}}|d ur|t |qS r]   )_shallow_clone_module)r   kvr   r   r   r1      s   " z)_shallow_clone_module.<locals>.<dictcomp>)	r   __new__r   __dict__copy_parameters_buffers_modulesr3   )rm   cloner   r   r   r      s   r   repo_idvariantc                    s0  ddl m  ddlm} t| dd }du rtdz td d}W n |y2   d}Y nw t|dur?t	|
 nd	}|d
d }dur^|D ]}ddlm}	 |	di | qNttt |} fdd|D }
|
 D ]!\}}|  D ]}|jj|krt|}t| || |_q|qtdS )aq  
    Loads AOTI-compiled blocks for a given module from the Hugging Face Hub.
    This function expects the module to expose a `_repeated_blocks` attribute.
    This attribute is present on most models from the diffusers library.

    Args:
        module (torch.nn.Module): The module containing repeated blocks to be replaced.
        repo_id (str): The Hugging Face Hub repository ID where the compiled blocks are stored.
        variant (str | None, optional): Variant suffix to append to block names. Defaults to None.

    Returns:
        None: The function mutates the given module in place.

    Example:
        >>> import spaces
        >>> import torch
        >>> from diffusers import FluxPipeline
        >>> pipeline = FluxPipeline.from_pretrained('black-forest-labs/FLUX.1-dev')
        >>> spaces.aoti_blocks_load(pipeline.transformer, 'zerogpu-aoti/FLUX.1')
    r   )hf_hub_download)EntryNotFoundError_repeated_blocksNzEaoti_blocks_load only works with modules that expose _repeated_blocksconfigz.jsonz{}kernels)
get_kernelc              
      s&   i | ]}|t  tt|d qS ))r   filename	subfolder)r   PACKAGE_FILENAME_variantrN   r   r   r   r   r   r1      s    z$aoti_blocks_load.<locals>.<dictcomp>r   )huggingface_hubr   huggingface_hub.errorsr   getattrRuntimeErrorr   jsonloadsr   	read_textrS   r   r   r   rp   rE   r3   modulesr   rA   r   r   r   r   rv   )rm   r   r   r   repeated_blocksconfig_pathr   kernels_configkernels_kwargsr   aoti_models
block_name
aoti_modelblockblock_r   r   r   aoti_blocks_load   s6   r   r   c                 C   s   |r	|  d| S | S )N.r   )r   r   r   r   r   r      s   r   r]   )rv   )9__doc__
contextlibr   r   contextvarsr   ior   pathlibr   typingr   r   r   unittest.mockr	   r9   	packagingr
   parse__version__r   torch._inductor.codecache7torch._functorch._aot_autograd.subclass_parametrizationr   torch._inductor.package.packager   !torch.export.pt2_archive._packager   )torch.export.pt2_archive._package_weightsr   utilsr   r   rD   rE   __annotations__base_versionr!   r   contextmanagerr)   r*   rH   exportExportedProgramru   r~   Moduler{   rz   r   r   r   r   r   r   r   r   r   r   <module>   s|    


		 	7