o
    ॵi?                      @   sP  d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d d
lmZmZ d dlm Z  d dl!m"Z"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- e- Z.dgZ/dd Z0	 			dddZ1e j2e'j3ej3dG dd deZ4dS )    N)AnyDictListOptionalUnion)tensor_tree_map)	Pipelines)Model)model_config)proteinresidue_constants)UnifoldDatasetload_and_process)
OutputKeys)PipelineTensor)	PIPELINES)Preprocessorbuild_preprocessor)Fields
FrameworksTasks)device_placement)read_config)
get_loggerProteinStructurePipelinec                 C   sH   | dk rd}|S | dk rd}|S | dk rd}|S | dk r d}|S d	}|S )
Ni      i      i       i          )seq_len
chunk_sizer!   r!   k/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/science/protein_structure_pipeline.pyautomatic_chunk_size    s   	r%   Fc           
      C   s   |sd }dg}|r|}n|}t tj|ddd  }|d u r2t| jd|d dd|||d	\}}	nt| jd|d dd|||d	\}}	t	|g}|S )	NAz
chains.txtzutf-8)encodingpredictr   F)	configmodeseed	batch_idxdata_idxis_distillationsequence_idsmonomer_feature_diruniprot_msa_dir)
openospathjoinreadlinesplitr   datar   collater)
r)   data_folderr+   is_multimeruse_uniprotsymmetry_groupr1   r/   batch_r!   r!   r$   load_feature_for_one_target.   sL   
r@   )module_namec                       s   e Zd Z	ddeeef dee f fddZdd Z	de
eef fd	d
Zde
eef de
eef fddZde
eef fddZ  ZS )r   Nmodelpreprocessorc                    sv   t  jd||d| t| jj| _t| jd d | _| jdd| _	|du r4| jj
}t|tj| _
| j  dS )a  Use `model` and `preprocessor` to create a protein structure pipeline for prediction.

        Args:
            model (str or Model): Supply either a local model dir which supported the protein structure task,
            or a model id from the model hub, or a torch model instance.
            preprocessor (Preprocessor): An optional preprocessor instance, please make sure the preprocessor fits for
            the model if supplied.

        Examples:
            >>> from modelscope.pipelines import pipeline
            >>> pipeline_ins = pipeline(task='protein-structure',
            >>>    model='DPTech/uni-fold-monomer')
            >>> protein = 'LILNLRGGAFVSNTQITMADKQKKFINEIQEGDLVRSYSITDETFQQNAVTSIVKHEADQLCQINFGKQHVVC'
            >>> print(pipeline_ins(protein))

        )rB   rC   pipeline
model_namepostprocessorNr!   )super__init__r   rB   	model_dircfgr
   r)   poprF   rC   r   r   scienceeval)selfrB   rC   kwargspreprocessor_cfg	__class__r!   r$   rH   d   s   z!ProteinStructurePipeline.__init__c                 K   s
   |||fS Nr!   )rN   pipeline_parametersr!   r!   r$   _sanitize_parameters   s   
z-ProteinStructurePipeline._sanitize_parametersreturnc              	   O   s   | di }| di }| di }| j|fi |}t| j| j% t  | j|fi |}W d    n1 s;w   Y  W d    n1 sJw   Y  | j|fi |}|S )Npreprocess_paramsforward_paramspostprocess_params)	get
preprocessr   	frameworkdevice_nametorchno_gradforwardpostprocess)rN   inputargsrO   rW   rX   rY   outr!   r!   r$   _process_single   s   
z(ProteinStructurePipeline._process_singleinputsc              	   K   s  i }i }t j| jj|d }g }t| jd d D ]}td|fd }t| j	|||d |d | jj
d}	|	d jd	 }
t|
| jjj_t  d
d |	 D }	| |	}W d    n1 saw   Y  dd }tdd |	}	t||	}	tdd |d }t||}tdd |	}	tdd |}|d }t|}tj|d tjd	d}tj|	||d}| }t|||< |d r| jj
d u rtt|d ||< tt j||d d}|t| |t| W d    n1 sw   Y  qt !dt|  | jd d }| }|d }tt j||d}t"j#||dd W d    n	1 s.w   Y  |rgt !d t|  |d! }tt j||d}t"j#||dd W d    |S 1 sbw   Y  |S )"N	target_idrD   times*   i r;   )r;   r<   r=   aatypec                 S   s    i | ]\}}|t j|d dqS )zcuda:0)device)r^   	as_tensor).0kvr!   r!   r$   
<dictcomp>   s    z4ProteinStructurePipeline.forward.<locals>.<dictcomp>c                 S   s$   | j tjks| j tjkr|  S | S rS   )dtyper^   bfloat16halffloatxr!   r!   r$   to_float   s   z2ProteinStructurePipeline.forward.<locals>.to_floatc                 S      | d S )N)rk   r   .r!   tr!   r!   r$   <lambda>       z2ProteinStructurePipeline.forward.<locals>.<lambda>c                 S   ry   )N)r   .r!   rz   r!   r!   r$   r|      r}   r   c                 S      t |  S rS   nparraycpurv   r!   r!   r$   r|          c                 S   r~   rS   r   rv   r!   r!   r$   r|      r   plddt).N)axis)featuresresult	b_factorsziptm+ptmz.pdbwzplddts:rE   z_plddt.json   )indentptmsz	_ptm.json)$r3   r4   r5   rC   output_dir_baserangerJ   hashr@   r)   r=   shaper%   rB   globalsr#   r^   r_   itemsr   r   meanrepeatr   atom_type_numr   from_predictionstrr2   writeto_pdbappendloggerinfojsondump)rN   rf   rX   plddtsr   
output_dirpdbsr+   cur_seedr>   r"   rd   rx   r   
mean_plddtplddt_b_factorscur_proteincur_save_namefrE   
score_nameplddt_fname	ptm_fnamer!   r!   r$   r`      s   




z ProteinStructurePipeline.forwardc                 K   s   |S rS   r!   )rN   rf   rY   r!   r!   r$   ra      s   z$ProteinStructurePipeline.postprocessrS   )__name__
__module____qualname__r   r	   r   r   r   rH   rU   r   r   re   r`   r   ra   __classcell__r!   r!   rQ   r$   r   `   s    


K)r   FFN)5r3   timetypingr   r   r   r   r   r   numpyr   r^   unicore.utilsr   modelscope.metainfor   modelscope.models.baser	   (modelscope.models.science.unifold.configr
   &modelscope.models.science.unifold.datar   r   )modelscope.models.science.unifold.datasetr   r   modelscope.outputsr   modelscope.pipelines.baser   r   modelscope.pipelines.builderr   modelscope.preprocessorsr   r   modelscope.utils.constantr   r   r   modelscope.utils.devicer   modelscope.utils.hubr   modelscope.utils.loggerr   r   __all__r%   r@   register_moduleprotein_structurer   r!   r!   r!   r$   <module>   s@   
2