o
    Ti                     @   s   d dl mZmZ d dlZd dlZd dlmZmZmZ zd dl	m
Z
mZmZmZmZmZmZmZmZ W n	 ey;   Y nw ddlmZ ejjjjhZdeded	ee fd
dZdd Zdeeeeejf  fddZdeeeeejf  fddZ dS )    )TupleListN)GraphModuleGraphNode)	is_sym_node
_is_primal_is_fwd_seed_offset_extract_fwd_bwd_outputs"_extract_graph_with_inputs_outputs_extract_fwd_bwd_moduleshas_recomputable_ops#min_cut_rematerialization_partitionchoose_saved_values_set   )get_no_copy_opsgraphds_param_nodereturnc                    s\   t  }t  | jD ]"}|j|v r+||jv r | t fdd|jD r+ | q	 S )z
    Given a graph and a node that represents a parameter that was allgathered,
    find all nodes that use the parameter and require recomputation.
    c                 3   s    | ]}| v V  qd S N .0arecompute_nodesr   Q/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/compile/partitioner.py	<genexpr>#       z(_find_recompute_nodes.<locals>.<genexpr>)r   setnodestargetargsaddany)r   r   no_copy_opsnoder   r   r   _find_recompute_nodes   s   




r'   c                    s   t tt| jfdd|D  t }t  i | jD ]-}|j|v rJt fdd|jD rJ|jD ]}| v r?||< q4|v rI| |< q4qS )Nc                       g | ]	\}}} | qS r   r   r   arg_idx_)primal_inputsr   r   
<listcomp>+       z._get_values_from_ds_params.<locals>.<listcomp>c                 3   s     | ]}| v p|v V  qd S r   r   r   )ds_param_inputsds_param_usersr   r   r   3   s    z-_get_values_from_ds_params.<locals>.<genexpr>)	listfilterr   r    r   r   r!   r$   r"   )joint_graphparam_indicesr%   r&   r   r   )r/   r0   r,   r   _get_values_from_ds_params)   s   
$

r5   r4   c                    s$   ddt jjdtt f fdd}|S )Nr   r3   r   c                    sV   t | ||}t|  }g }|D ]}||v r#|| }||vr"|| q|| q|S r   )r   r5   append)r3   	node_infomemory_budgetsaved_valuesr0   new_saved_valuesvds_valr4   r   r   ds_choose_saved_values_set?   s   

zGget_wrapped_choose_saved_values_set.<locals>.ds_choose_saved_values_set)r   )torchfxr   r   r   )r4   r>   r   r=   r   #get_wrapped_choose_saved_values_set=   s    rA   c                    s"   dt dtt t f f fdd}|S )Njoint_moduler   c                   s  t | rt| ||dS ttt| jj}ttt| jj}|| }t| |d\}}t	| j||d}dd |jD  g }	g }
ttt|jfddD }dd |D }t
 }| jjD ]}|j vrdq\t|rn|
| q\d|jvr|jd	kr|j}td
d |D sJ |	| q\ fdd|jD }d|jv rtdd |D r|
| |j|v r|	| t| j|} fdd|D }|D ]}|| qt|dkr|	| q\||vr|	| q\tt|	 }	tt|
 }
t| |	|
|d\}}||fS )z
        This is basically the same as the default_partition function, but
        it doesn't save the gathered params and values computed from them.
        )num_fwd_outputsforwardc                 S   s   h | ]
}|j d kr|jqS )output)opnamer   r&   r   r   r   	<setcomp>a   s    zQget_wrapped_partitioner.<locals>.partition_recompute_ds_params.<locals>.<setcomp>c                    r(   r   r   r)   )
fwd_inputsr   r   r-   f   r.   zRget_wrapped_partitioner.<locals>.partition_recompute_ds_params.<locals>.<listcomp>c                 S   s   h | ]}|j qS r   rG   rH   r   r   r   rI   g   s    tensor_metacall_functionc                 s   s    | ]	}|j tjkV  qd S r   )r!   operatorgetitem)r   userr   r   r   r   v   s    zQget_wrapped_partitioner.<locals>.partition_recompute_ds_params.<locals>.<genexpr>c                    s   g | ]	}|j  vr|qS r   rK   r   nforward_node_namesr   r   r-   y   r.   c                 s   s    | ]}t |V  qd S r   )r   rQ   r   r   r   r   {   r   c                    s   g | ]	}|j  v r|qS r   rK   rQ   rS   r   r   r-      r.   r   )saved_sym_nodesrC   )r   r   r1   r2   r   r   r    r	   r
   r   r   rG   r   r6   metarF   usersallextendr'   r#   lendictfromkeyskeysr   )rB   _joint_inputsrC   r,   fwd_seed_offset_inputsinputsfwd_outputsbwd_outputsforward_only_graphr9   rU   r/   ds_param_input_namesds_param_recompute_nodesr&   rW   backward_usagesr   recompute_nodef_gmb_gmr=   )rT   rJ   r   partition_recompute_ds_paramsS   s^   

	




z>get_wrapped_partitioner.<locals>.partition_recompute_ds_params)r   r   )r4   rj   r   r=   r   get_wrapped_partitionerQ   s   
Krk   )!typingr   r   rN   r?   torch.fxr   r   r   torch._functorch.partitionersr   r   r	   r
   r   r   r   r   r   ImportErrorutilr   opsatentdefault_recompute_opsr'   r5   intSizerA   rk   r   r   r   r   <module>   s   0"