o
    xi;                     @  s  d Z ddlmZ ddlZddlZddlmZmZ 	 ddl	Z	ddl
mZ ddlmZ ddlmZ d	d
lmZmZ d	dlmZ d	dlmZmZmZmZmZ ddlmZmZmZ ee Z!G dd deZ"G dd deZ#d8ddZ$	d9d:d*d+Z%	d9d;d1d2Z&d<d6d7Z'dS )=z,Implementation of the SageMakerRunner class.    )annotationsN)AnycastF)Api)AwsEnvironmentLaunchError   )
EntryPointLaunchProject)AbstractRegistry)
LOG_PREFIXMAX_ENV_LENGTHSPROJECT_SYNCHRONOUSevent_loop_thread_execto_camel_case   )AbstractRunAbstractRunnerStatusc                      s\   e Zd ZdZ	dd fddZedddZdddZdddZdddZ	d ddZ
  ZS )!SagemakerSubmittedRunzrInstance of ``AbstractRun`` corresponding to a subprocess launched to run an entry point command on aws sagemaker.Ntraining_job_namestrclientboto3.Client
log_clientboto3.Client | NonereturnNonec                   s*   t    || _|| _|| _td| _d S )Nrunning)super__init__r   r   r   r   _status)selfr   r   r   	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/wandb/sdk/launch/runner/sagemaker_runner.pyr!   "   s
   
zSagemakerSubmittedRun.__init__c                 C  s   d| j  S )Nz
sagemaker-)r   )r#   r&   r&   r'   id.   s   zSagemakerSubmittedRun.id
str | Nonec              
     s  | j d u rd S zNt| j j}|d| jdI d H }t|d dkr-td| j  W d S |d d d }t| j j}|d|dI d H }d|v sJJ d	d
d |d D W S  | j j	j
yl   td| j  Y d S  ty } ztd| j dt|  W Y d }~d S d }~ww )Nz/aws/sagemaker/TrainingJobs)logGroupNamelogStreamNamePrefix
logStreamsr   z%Failed to get logs for training job: logStreamName)r*   r-   events
c                 S  s"   g | ]}|d   d|d  qS )	timestamp:messager&   ).0eventr&   r&   r'   
<listcomp>J   s   " z2SagemakerSubmittedRun.get_logs.<locals>.<listcomp>z(Failed to handle logs for training job: z with error )r   r   describe_log_streamsr   lenwandbtermwarnget_log_eventsjoin
exceptionsResourceNotFoundException	Exceptionr   )r#   r6   describe_reslog_namer:   reser&   r&   r'   get_logs2   sL   


zSagemakerSubmittedRun.get_logsboolc                   sT   	 |   I d H j}tt d| j d|  |dv r!	 |dkS tdI d H  q)NTzTraining job z	 status: )stoppedfailedfinished   rG   )
get_statusstater8   termlogr   r   asynciosleep)r#   status_stater&   r&   r'   waitW   s   zSagemakerSubmittedRun.waitc                   s@   |   I d H }|jdkr| jj| jd |  I d H  d S d S )Nr   TrainingJobName)rI   rJ   r   stop_training_jobr   rO   )r#   statusr&   r&   r'   cancelb   s   
zSagemakerSubmittedRun.cancelr   c                   s   t | jj}|| jdI d H d }|dks|dkr"td| _| jS |dkr.td| _| jS |dkr:td	| _| jS |d
krCtd| _| jS )NrP   TrainingJobStatus	CompletedStoppedrG   FailedrF   Stoppingstopping
InProgressr   )r   r   describe_training_jobr   r   r"   )r#   r\   
job_statusr&   r&   r'   rI   i   s&   



z SagemakerSubmittedRun.get_statusN)r   r   r   r   r   r   r   r   )r   r   )r   r)   )r   rD   )r   r   )r   r   )__name__
__module____qualname____doc__r!   propertyr(   rC   rO   rT   rI   __classcell__r&   r&   r$   r'   r      s    

%
r   c                      s,   e Zd ZdZd fddZdddZ  ZS )SageMakerRunnerz?Runner class, uses a project to create a SagemakerSubmittedRun.apir   backend_configdict[str, Any]environmentr   registryr   r   r   c                   s   t  || || _|| _dS )a;  Initialize the SagemakerRunner.

        Arguments:
            api (Api): The API instance.
            backend_config (Dict[str, Any]): The backend configuration.
            environment (AwsEnvironment): The AWS environment.

        Raises:
            LaunchError: If the runner cannot be initialized.
        N)r    r!   ri   rj   )r#   rf   rg   ri   rj   r$   r&   r'   r!   ~   s   
zSageMakerRunner.__init__launch_projectr   	image_urir   AbstractRun | Nonec                   sl  t d |jd}|du rtd| jdi d}|dur,|ds,d| }| j I dH }t	|j
dI dH }| }|d	 }t d
|  | j I dH }	t|| j||	}
|
d}d}z|
d}W n ty } ztdt| d W Y d}~nd}~ww |di ddurt|| j|
|j|jt| jj |di d|}t d|  t||||I dH }| jt r| I dH  |S t d |jp| }g }|dur||j7 }||j7 }|rd|}tt  d|  ntt  d t|| j|
||jt| jj ||}t d|  t||||I dH }| jt r4| I dH  |S )a  Run a project on Amazon Sagemaker.

        Arguments:
            launch_project (LaunchProject): The project to run.

        Returns:
            Optional[AbstractRun]: The run instance.

        Raises:
            LaunchError: If the launch is unsuccessful.
        zusing AWSSagemakerRunner	sagemakerNDNo sagemaker args specified. Specify sagemaker args in resource_argsrunners3_output_pathzs3://stsAccountzUsing account ID logsz0Failed to connect to cloudwatch logs with error z, logs will not be availableAlgorithmSpecificationTrainingImagez:Launching sagemaker job on user supplied image with args: zConnecting to sagemaker client z,Launching run on sagemaker with entrypoint: zALaunching run on sagemaker with user-provided entrypoint in imagez#Launching sagemaker job with args: )!_loggerinforesource_argsgetr   rg   
startswithri   get_sessionr   r   get_caller_identityget_partitionget_role_arnr>   r8   r9   r   build_sagemaker_args_apioverride_entrypointoverride_argsr   r%   r_   launch_sagemaker_jobr   rO   get_job_entry_pointcommandr;   rK   r   )r#   rk   rl   given_sagemaker_argsdefault_output_pathsessionr   	caller_id
account_id	partitionrole_arnsagemaker_clientr   rB   sagemaker_argsrunentry_pointcommand_argscommand_strr&   r&   r'   r      s   













zSageMakerRunner.run)
rf   r   rg   rh   ri   r   rj   r   r   r   )rk   r   rl   r   r   rm   )r_   r`   ra   rb   r!   r   rd   r&   r&   r$   r'   re   {   s    re   algorithm_specificationdict[str, Any] | Nonerl   r)   entrypoint_command	list[str]argslist[str] | Noner   rh   c                 C  sP   | du r
|dd} n|r|| d< |r|| d< |r|| d< | d du r&t d| S )aL  Create an AWS AlgorithmSpecification.

    AWS Sagemaker algorithms require a training image and an input mode. If the user
    does not specify the specification themselves, define the spec minimally using these
    two fields. Otherwise, if they specify the AlgorithmSpecification set the training
    image if it is not set.
    NFile)rv   TrainingInputModerv   ContainerEntrypointContainerArgumentsz'Failed determine tag for training imager   )r   rl   r   r   r&   r&   r'   ,merge_image_uri_with_algorithm_specification  s   r   rk   r   rf   r   r   r   r   EntryPoint | Nonemax_env_lengthintr   c                 C  s  i }|  |}	|	d}
|
d u rtd|
dd u r&|d ur&d|i|d< n|
d|d< |dd u r8tdtt|
dpA| j}||d< |rL|jng }t|
d|
d||||d< ||d	< d
d |
 D }i ||}|dd u r|td|dd u rtd|
d|di }| 	||}i ||}||d< |dg }|
d| jd ||d< |dd  |dd  |dd  dd | D }|S )Nrn   ro   OutputDataConfigS3OutputPathzKSagemaker launcher requires an OutputDataConfig Sagemaker resource argumentrQ   ru   r   RoleArnc                 S  s   i | ]	\}}t ||qS r&   )r   )r3   keyitemr&   r&   r'   
<dictcomp>Y  s    z(build_sagemaker_args.<locals>.<dictcomp>ResourceConfigz>Sagemaker launcher requires a ResourceConfig resource argumentStoppingConditionzASagemaker launcher requires a StoppingCondition resource argumentEnvironmentri   Tags
WandbRunId)KeyValueEcrRepoNameregionprofilec                 S  s   i | ]\}}|d ur||qS r^   r&   )r3   kvr&   r&   r'   r   }  s    )fill_macrosr{   r   r   r   run_idr   r   itemsget_env_vars_dictappendpop)rk   rf   r   r   r   r   rl   r   r   rz   r   r   	entry_cmdcamel_case_args	given_env
calced_env	total_envtagsfiltered_argsr&   r&   r'   r   %  s~   


r   r   r   r   r   r   c           	        s   | dp| j}t|j}|d	i |I d H }| dd u r#tdt|||}tt d| d  d|j	j
 d|j	j
 d| }tt d|  |S )
NrQ   TrainingJobArnz:Failed to create training job when submitting to SageMakerzRun job submitted with arn: zhttps://z..console.aws.amazon.com/sagemaker/home?region=z#/jobs/zSee training job status at: r&   )r{   r   r   create_training_jobr   r   r8   rK   r   metaregion_name)	rk   r   r   r   r   r   respr   urlr&   r&   r'   r     s   
r   rg   r   r   c                 C  st   |  dp	|  d}|du r| di  d}|du s t|ts$td|d| dr/|S d| d| d| S )	z?Get the role arn from the sagemaker args or the backend config.r   r   Nrp   zpAWS sagemaker require a string RoleArn set this by adding a `RoleArn` key to the sagemakerfield of resource_argszarn:z:iam::z:role/)r{   
isinstancer   r   r|   )r   rg   r   r   r   r&   r&   r'   r     s   r   )
r   r   rl   r)   r   r   r   r   r   rh   r^   )rk   r   rf   r   r   r   r   r   r   r   r   r   rl   r   r   r)   r   rh   )
rk   r   r   rh   r   r   r   r   r   r   )
r   rh   rg   rh   r   r   r   r   r   r   )(rb   
__future__r   rL   loggingtypingr   r   boto3r8   wandb.apis.internalr   ,wandb.sdk.launch.environment.aws_environmentr   wandb.sdk.launch.errorsr   _project_specr
   r   registry.abstractr   utilsr   r   r   r   r   abstractr   r   r   	getLoggerr_   rx   r   re   r   r   r   r   r&   r&   r&   r'   <module>   s0    
\ 
'a