o
    ॵi!                     @   sz   d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ G d	d
 d
ZdS )z%Library to run Jackhmmer from Python.    N)futures)AnyCallableMappingOptionalSequence)request)logging   )utilsc                    @   s   e Zd ZdZdddddddd	ddddd
dedededededee dededededee dee dee dee	egdf  fddZ
dededeeef fddZdedeeeef  fdd ZdS )!	Jackhmmerz'Python wrapper of the Jackhmmer binary.   r
   g-C6?NFgMb@?g-C6
?gƠ>)n_cpun_itere_valuez_value
get_tblout	filter_f1	filter_f2	filter_f3incdom_edom_enum_streamed_chunksstreaming_callbackbinary_pathdatabase_pathr   r   r   r   r   r   r   r   r   r   r   r   c                C   s   || _ || _|| _tj| js!|du r!td| td| || _	|| _
|| _|| _|| _|	| _|
| _|| _|| _|| _|| _dS )ad  Initializes the Python Jackhmmer wrapper.

        Args:
            binary_path: The path to the jackhmmer executable.
            database_path: The path to the jackhmmer database (FASTA format).
            n_cpu: The number of CPUs to give Jackhmmer.
            n_iter: The number of Jackhmmer iterations.
            e_value: The E-value, see Jackhmmer docs for more details.
            z_value: The Z-value, see Jackhmmer docs for more details.
            get_tblout: Whether to save tblout string.
            filter_f1: MSV and biased composition pre-filter, set to >1.0 to turn off.
            filter_f2: Viterbi pre-filter, set to >1.0 to turn off.
            filter_f3: Forward pre-filter, set to >1.0 to turn off.
            incdom_e: Domain e-value criteria for inclusion of domains in MSA/next
                round.
            dom_e: Domain e-value criteria for inclusion in tblout.
            num_streamed_chunks: Number of database chunks to stream over.
            streaming_callback: Callback function run after each chunk iteration with
                the iteration number as argument.
        Nz$Could not find Jackhmmer database %sz"Could not find Jackhmmer database )r   r   r   ospathexistsr	   error
ValueErrorr   r   r   r   r   r   r   r   r   r   r   )selfr   r   r   r   r   r   r   r   r   r   r   r   r   r    r"   i/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/science/unifold/msa/tools/jackhmmer.py__init__   s0   &
zJackhmmer.__init__input_fasta_pathreturnc                 C   s&  t  }tj|d}ddd|ddt| jdt| jdt| jd	t| j	d
t| j	dt| j
dt| jg}| jrGtj|d}|d|g | jrT|dt| jg | jdurc|dt| jg | jdurr|dt| jg | jg| ||g }tdd| tj|tjtjd}t dtj| d | \}	}
| }W d   n1 sw   Y  |rtd|
d d}| jrt|}| }W d   n1 sw   Y  t|}| }W d   n1 sw   Y  W d   n	1 sw   Y  t|||
| j| j	d}|S )z+Queries the database chunk using Jackhmmer.z
output.stoz-oz	/dev/nullz-Az--noaliz--F1z--F2z--F3z--incEz-Ez--cpuz-Nz
tblout.txtz--tbloutz-ZNz--domEz	--incdomEzLaunching subprocess "%s" )stdoutstderrzJackhmmer (z) queryzJackhmmer failed
stderr:
%s
zutf-8 )stotblr)   r   r   ) r   tmpdir_managerr   r   joinstrr   r   r   r   r   r   r   extendr   r   r   r   r	   info
subprocessPopenPIPEtimingbasenamecommunicatewaitRuntimeErrordecodeopenreaddict)r!   r%   r   query_tmp_dirsto_path	cmd_flagstblout_pathcmdprocess_r)   retcoder,   fr+   
raw_outputr"   r"   r#   _query_chunk\   s   
	







DzJackhmmer._query_chunkc           
   	      sb  j du r|jgS tjj fdd} fdd}t|dD ]}zt| W q' ty@   t	d|  Y q'w t
jdd	`}g }td
j d
 D ]L}|d
krd|tj||||}|j k ry|tj||d
 ||d
 }	|  |||| t|| |j k r|	}jr| qRW d   |S 1 sw   Y  |S )z%Queries the database using Jackhmmer.Nc                    s    j  d|  S )N.)r   db_idx)r!   r"   r#   db_remote_chunk      z(Jackhmmer.query.<locals>.db_remote_chunkc                    s   d  d|  S )Nz/tmp/ramdisk/rI   r"   rJ   )db_basenamer"   r#   db_local_chunk   rM   z'Jackhmmer.query.<locals>.db_local_chunkz[0-9]*zOSError while deleting    )max_workersr
   )r   rH   r   r   r   r6   globremoveOSErrorprintr   ThreadPoolExecutorrangesubmitr   urlretrieveresultappendr   )
r!   r%   rL   rO   rF   executorchunked_outputifuturenext_futurer"   )rN   r!   r#   query   sP   






zJackhmmer.query)__name__
__module____qualname____doc__r/   intfloatr   boolr   r$   r   r   rH   r   ra   r"   r"   r"   r#   r      sb    	

=

"Pr   )re   rR   r   r2   
concurrentr   typingr   r   r   r   r   urllibr   abslr	   r*   r   r   r"   r"   r"   r#   <module>   s   