o
    i,                     @   sP  d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	m
Z
 ddlZddlmZ eeejf Zeee	f ZdZejdd	G d
d dZdedefddZd$dededee fddZdededefddZdedefddZdedejfddZ					d%dedede
ej de
ej de
e d e
ee  d!e
ee  defd"d#ZdS )&zProtein data type.    N)IteratorMappingSequence)AnyOptional   )residue_constantsg{Gz?T)frozenc                   @   s   e Zd ZU dZejed< ejed< ejed< ejed< ejed< dZeej ed< dZ	ee
 ed	< dZeee
  ed
< dZeee  ed< dS )Proteinz!Protein structure representation.atom_positionsaatype	atom_maskresidue_index	b_factorsNchain_indexremarkparentsparents_chain_index)__name__
__module____qualname____doc__npndarray__annotations__r   r   r   strr   r   r   int r   r   b/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/esm/openfold_utils/protein.pyr
   "   s   
 




r
   proteinnet_strreturnc              	   C   s&  d}dd t || D }t|dd d dd |dd d D }g d}d }d }d }|D ]}d	|d kr\|d d  }	tt|	D ]}
|	|
 tjvrPd
|	|
< qCt	dd |	D }q-d|d krg }tdD ]}|
ttt|d |   qht	|}tt|d d tjdftj}t|D ]\}
}t|d d |
d df |d d tj| d d f< q|t9 }q-d|d krt	ttdddj|d d  }tt|tjftj}t|D ]\}
}d|d d tj| f< q||d 9 }q-|d usJ t|||tt|d dS )Nz(\[[A-Z]+\]\n)c                 S   s    g | ]}t |d kr| qS r   )lenstrip).0tagr   r   r   
<listcomp>J   s     z*from_proteinnet_string.<locals>.<listcomp>r      c                 S   s   g | ]}| d qS )
)split)r$   lr   r   r   r&   K   s    r   )NCACz	[PRIMARY]Xc                 S   s   g | ]
}t j|t jqS r   )r   restype_ordergetrestype_num)r$   
res_symbolr   r   r   r&   X       z
[TERTIARY]   z[MASK])-+).N)r   r   r   r   r   )rer)   zipr#   ranger"   r   restypesr   arrayappendlistmapfloatzerosatom_type_numastypefloat32	enumerate	transpose
atom_orderPICO_TO_ANGSTROMr0   r
   arange)r   tag_retagsgroupsatomsr   r   r   gseqitertiaryaxistertiary_npatommaskr   r   r   from_proteinnet_stringH   s^   ("
&6
(rU   protchain_idc                    s   g }| j }|d ur|d|  | j}| j}|d ur+|d ur+ fddt||D }|d u s5t|dkr8dg}|dd|  |S )NREMARK c                    s   g | ]
\}}| kr|qS r   r   )r$   rO   prW   r   r   r&      r3   z#get_pdb_headers.<locals>.<listcomp>r   N/APARENT  )r   r<   r   r   r8   r"   join)rV   rW   pdb_headersr   r   r   r   rZ   r   get_pdb_headersz   s   r`   pdb_strc                 C   s  g }| d}| j}|dur|d|  | jdurrt| jdkrrg }| jdurii }t| j| jD ]\}}|t|g  |t| | q2t	dd |D }	t
|	d D ]}|t|dg}
||
 qWn|t| j ndgg}d	tt d
tfdd}|||d  d}t|D ]6\}}d|vrd|vr|| d|v rd||d  vr|d7 }|t|ks|| }
ndg}
|||
 qd|S )zWAdd pdb headers to an existing PDB string. Useful during multi-chain
    recycling
    r(   NrX   r   c                 s   s    | ]}t |V  qd S )N)r   )r$   	chain_idxr   r   r   	<genexpr>   s    z"add_pdb_headers.<locals>.<genexpr>r   r[   rY   r    c                 S   s   dd |  S )Nr\   r]   )r^   )rY   r   r   r   make_parent_line   s   z)add_pdb_headers.<locals>.make_parent_linePARENTREMARKTEREND)r)   r   r<   r   r"   r   r8   
setdefaultr   maxr9   r0   r=   r   rD   r^   )rV   ra   out_pdb_lineslinesr   parents_per_chainparent_dictrY   rO   max_idxchain_parentsrd   chain_counterr*   r   r   r   add_pdb_headers   sB   




rr   c           !         s  t jdg  dtdtf fdd}t j}g }| j}| j}| j}| j	t
j}| j}| j}	t
|t jkr8tdt| }
t|
dkrG||
 |jd }d}d}tj}d	}t|D ]}||| }t||| || || D ]v\}}}}|d
k ryqnd}t|dkr|nd| }d}d}d}|d }d}d}|	d	ur||	|  }|d|dd|d|d|dd|d|| d|dd|d d|d d|d d|d|dd|d|d}|| |d7 }qn||d k}|	d	ur||d kr|	|d  |krd}|	|d  }|r>d}|d|dd||| dd|d|| d} ||  |d7 }||d kr>|t| | qY|d  |d d!|S )"zConverts a `Protein` instance to a PDB string.

    Args:
      prot: The protein to convert to PDB.

    Returns:
      PDB string.
    r.   rr    c                    s   t j |  dS )NUNK)r   restype_1to3r0   )rs   r:   r   r   res_1to3   s   zto_pdb.<locals>.res_1to3zInvalid aatypes.r   r   Ng      ?ATOM   r]    g      ?Az<6z>5z<4z>1z>3z>4z   z>8.3fr'   z>6.2fz
          z>2Trg   z      rh   r(   )r   r:   r   r   
atom_typesr   r   r   r   rB   r   int32r   r   anyr1   
ValueErrorr`   r"   extendshapestringascii_uppercaser9   r8   r<   r^   )!rV   rw   r|   	pdb_linesr   r   r   r   r   r   headersn
atom_indexprev_chain_index
chain_tags	chain_tagrO   
res_name_3	atom_nameposrT   b_factorrecord_typenamealt_locinsertion_code	occupancyelementcharge	atom_lineshould_terminate	chain_endchain_termination_liner   rv   r   to_pdb   s   	

&


 0



r   c                 C   s   t j| j S )ao  Computes an ideal atom mask.

    `Protein.atom_mask` typically is defined according to the atoms that are reported in the PDB. This function
    computes a mask according to heavy atoms that should be present in the given sequence of amino acids.

    Args:
      prot: `Protein` whose fields are `numpy.ndarray` objects.

    Returns:
      An ideal atom mask.
    )r   STANDARD_ATOM_MASKr   )rV   r   r   r   ideal_atom_mask  s   r   featuresresultr   r   r   r   r   c                 C   sD   t | d |d |d | d d |dur|nt|d ||||d	S )a  Assembles a protein from a prediction.

    Args:
      features: Dictionary holding model inputs.
      result: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.
      chain_index: (Optional) Chain indices for multi-chain predictions
      remark: (Optional) Remark about the prediction
      parents: (Optional) List of template names
    Returns:
      A protein instance.
    r   final_atom_positionsfinal_atom_maskr   r   N)	r   r   r   r   r   r   r   r   r   )r
   r   
zeros_like)r   r   r   r   r   r   r   r   r   r   from_prediction,  s   
r   r!   )NNNNN)r   dataclassesr7   r   collections.abcr   r   r   typingr   r   numpyr   rz   r   r   r   FeatureDictModelOutputrG   	dataclassr
   rU   r   r=   r`   rr   r   r   r   r   r   r   r   <module>   sP   
%22]

