o
    ߥi,                  	   @   s   d Z ddlZddlZddlmZmZmZ ddlZddl	m
Z
 ddlmZ eeejf Zeeef ZdZeeZejddG d	d
 d
Zddedee defddZdefddZdedefddZdedejfddZ	ddededeej defddZ	ddedeej defddZdS )zProtein data type.    N)AnyMappingOptional)	PDBParser)residue_constants>ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789T)frozenc                   @   sV   e Zd ZU dZejed< ejed< ejed< ejed< ejed< ejed< dd	 Zd
S )Proteinz!Protein structure representation.atom_positionsaatype	atom_maskresidue_indexchain_index	b_factorsc                 C   s(   t t| jtkrtdt dd S )Nz(Cannot build an instance with more than z6 chains because these cannot be written to PDB format.)lennpuniquer   PDB_MAX_CHAINS
ValueError)self r   b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/science/unifold/data/protein.py__post_init__<   s
   
zProtein.__post_init__N)__name__
__module____qualname____doc__r   ndarray__annotations__r   r   r   r   r   r	       s   
 





r	   pdb_strchain_idreturnc              	      s  t | }tdd}|d|}t| }t|dkr&tdt| d|d }g }g }g }	g }
g }g }|D ]}|durD|j|krDq8|D ]}|jd	 d
kr^td|j d|jd  dt	j
|jd}t	j|t	j}tt	jdf}tt	jf}tt	jf}|D ]#}|jt	jvrq|j|t	j|j < d|t	j|j < |j|t	j|j < qt|dk rqF|| || |	| |
|jd  ||j || qFq8t|}dd t|D  t fdd|D }tt|t|	t|t|
|t|dS )a  Takes a PDB string and constructs a Protein object.

    WARNING: All non-standard residue types will be converted into UNK. All
      non-standard atoms will be ignored.

    Args:
      pdb_str: The contents of the pdb file
      chain_id: If chain_id is specified (e.g. A), then only that chain
        is parsed. Otherwise all chains are parsed.

    Returns:
      A new `Protein` parsed from the pdb contents.
    T)QUIETnone   z,Only single model PDBs are supported. Found z models.r   N    z(PDB contains an insertion code at chain z and residue index z. These are not supported.X         ?      ?c                 S   s   i | ]\}}||qS r   r   ).0ncidr   r   r   
<dictcomp>       z#from_pdb_string.<locals>.<dictcomp>c                    s   g | ]} | qS r   r   )r+   r-   chain_id_mappingr   r   
<listcomp>   s    z#from_pdb_string.<locals>.<listcomp>)r
   r   r   r   r   r   )ioStringIOr   get_structurelist
get_modelsr   r   idr   restype_3to1getresnamerestype_orderrestype_numr   zerosatom_type_numname
atom_typescoord
atom_orderbfactorsumappendr   	enumeratearrayr	   )r   r    pdb_fhparser	structuremodelsmodelr
   r   r   r   	chain_idsr   chainresres_shortnamerestype_idxposmaskres_b_factorsatomunique_chain_idsr   r   r0   r   from_pdb_stringC   s~   






rX   c                 C   s*   d}|d| dd|dd|d|dS )	NTER<6>5z      >3r&   >1>4r   )
atom_indexend_resname
chain_namer   	chain_endr   r   r   
_chain_end   s   rc   protc                    s|  t jdg   fdd}t j}g }| j}| j}| j}| jtj	}| j
tj	}| j}	t|t jkr7tdi }
t|D ]}|tkrLtdt dt| |
|< q>|d d}|d	 }t|jd	 D ]}||| kr|t||||d  |
||d   ||d   || }|d7 }||| }t||| || |	| D ]n\}}}}|d
k rqd}t|dkr|nd| }d}d}d}|d	 }d}|d|dd|d|d|dd|
||  d|| d|dd|d	 d|d d|d d|d|dd|d|d}|| |d7 }qqe|t|||d |
|d  |d  |d |d dd  |D }d!|d! S )"zConverts a `Protein` instance to a PDB string.

    Args:
      prot: The protein to convert to PDB.

    Returns:
      PDB string.
    r'   c                    s   t j |  dS )NUNK)r   restype_1to3r:   )rrestypesr   r   res_1to3   s   zto_pdb.<locals>.res_1to3zInvalid aatypes.z The PDB format supports at most z chains.zMODEL     1r$   r   r*   ATOM   r&    r)   rZ   r[   z<4r]   r\   r^   z   z>8.3fr%   z>6.2fz
          z>2ENDMDLENDc                 S   s   g | ]}| d qS )P   )ljust)r+   liner   r   r   r2      r/   zto_pdb.<locals>.<listcomp>
)r   ri   rA   r   r   r
   r   astyper   int32r   r   anyr=   r   r   r   PDB_CHAIN_IDSrF   rangeshaperc   zipr   join)rd   rj   rA   	pdb_linesr   r   r
   r   r   r   rN   ir_   last_chain_index
res_name_3	atom_namerS   rT   b_factorrecord_typer@   alt_locinsertion_code	occupancyelementcharge	atom_liner   rh   r   to_pdb   s   	








r   c                 C   s   t j| j S )as  Computes an ideal atom mask.

    `Protein.atom_mask` typically is defined according to the atoms that are
    reported in the PDB. This function computes a mask according to heavy atoms
    that should be present in the given sequence of amino acids.

    Args:
      prot: `Protein` whose fields are `numpy.ndarray` objects.

    Returns:
      An ideal atom mask.
    )r   STANDARD_ATOM_MASKr   )rd   r   r   r   ideal_atom_mask   s   r   featuresresultr   c                 C   sb   d| v r| d d }nt | d }|du rt |d }t| d |d |d | d d ||dS )	a  Assembles a protein from a prediction.

    Args:
      features: Dictionary holding model inputs.
      fold_output: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.

    Returns:
      A protein instance.
    asym_idr$   r   Nfinal_atom_maskfinal_atom_positionsr   r   r
   r   r   r   r   r   
zeros_liker	   )r   r   r   r   r   r   r   from_prediction  s   
r   c                 C   sb   d| v r| d d }nt | d }|du rt | d }t| d | d | d | d d ||dS )	zAssembles a standard pdb from input atom positions & mask.

    Args:
      features: Dictionary holding model inputs.
      b_factors: (Optional) B-factors to use for the protein.

    Returns:
      A protein instance.
    r   r$   r   Nall_atom_maskall_atom_positionsr   r   r   )r   r   r   r   r   r   from_feature'  s   
r   )N)r   dataclassesr3   typingr   r   r   numpyr   Bio.PDBr   &modelscope.models.science.unifold.datar   strr   FeatureDictModelOutputrx   r   r   	dataclassr	   rX   rc   r   r   r   r   r   r   r   r   <module>   s>   
"N`
!