o
    Jεi#(                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ ddlmZ ddlm	Z	 ddlm
Z
 ejddd	d
 dddZee j_dd Zdd Zdd Zdd Zdd Zdd Zdd Zedkrbe  dS dS )    N)Path   )
Dictionary)__version__)	sudachipy{z>{levelname} {asctime} [{module}:{funcName}:{lineno}] {message}z%m-%d-%Y %H:%M:%S)styleformatdatefmtc                 C   s   d}t jdd D ]	}|dv r dS q	| jjD ]}t|tjs q|j D ]}|t jdd v r2d}q%q|sK|du rCt j	d| dS |	d| dS dS )zM
    copy and modify code from https://bitbucket.org/ruamel/std.argparse
    Fr   N)z-hz--helpTr   )
sysargv_subparsers_actions
isinstanceargparse_SubParsersAction_name_parser_mapkeysinsert)selfnameargssubparser_foundargxsp_name r   J/home/ubuntu/.local/lib/python3.10/site-packages/sudachipy/command_line.py_set_default_subparser"   s$   r   c           
      C   s   |  d}|D ]^}|d}| j ||dD ]D}| ||  | g}	|rL|	| | t| d	d
dd | D g7 }	| rL|	d |d	
|	 |d q|d
 |re|  qd S )N 
)outz[{}],c                 S   s   g | ]}t |qS r   )str).0synonym_group_idr   r   r   
<listcomp>M   s    zrun.<locals>.<listcomp>z(OOV)	zEOS
)tokenizerstripsurfacepart_of_speech_idnormalized_formdictionary_formreading_formr#   dictionary_idr	   joinsynonym_group_idsis_oovappendwriteflush)
	tokenizerinput_output	print_allpos_list	is_stdoutmlistlinem	list_infor   r   r   run=   s0   





r@   c                 C   s@   | j D ]}tj|s|  tdt|tjd t	d qd S )Nz{}: error: {} doesn't existfiler   )
in_filesospathexistsprintr	   __name__r   stderrexit)r   print_usagerB   r   r   r   _input_files_checkerW   s   
rL   c                 C   s   | j rt  d S t| | tj}| jrt| jddd}tt	}|
tj | j}| j}|r3|d z@t| j| jd}|dg}dd |D }|j| jd	}	tj| jtdd
}
t|	|
|||| jd u d W | jrr|  d S d S | jr||  w w )Nwutf-8)encodingz'-d option is not implemented in python.)config_path	dict_typer   c                 S   s   g | ]}d  |qS )r"   )r0   )r$   msr   r   r   r&   y   s    z%_command_tokenize.<locals>.<listcomp>)mode)openhook)r;   )versionprint_versionrL   r   stdout	fpath_outopenlogging	getLoggerrH   setLevelDEBUGadwarningr   fpath_settingsystem_dict_typepos_matchercreaterS   	fileinputinputrC   hook_encodedr@   close)r   rK   r8   loggerr9   debugdict_all_pos_matcherr:   tokenizer_objr7   r   r   r   _command_tokenize`   s>   






rn   c                 C      t | j}| std|dtjd | S g }| jD ]}t |}| s2td|dtjd |   S || qt | j}| rLtd|dtjd d S | j	pPd}t
|d	d
kr`td d S tj||||d}|D ]\}}	}
td||	|
 qkd S )NzMatrix filedoes not existrA   
Input filedoes not existsFile(already exists, refusing to overwrite itr   rN      CDescription is longer than 255 bytes in utf-8, it will be truncated)matrixlexr8   description{} -> {} in {:.2F} sec)r   matrix_filerF   rG   r   rI   rC   r3   out_filery   lenencoder   build_system_dicr	   )r   rK   rw   rC   rB   r|   ry   statsr   sizetimer   r   r   _command_build   s<   




r   c                 C   ro   )NzSystem dictionary filerp   rA   rq   rr   rs   rt   r   rN   ru   rv   )systemrx   r8   ry   rz   )r   
system_dicrF   rG   r   rI   rC   r3   r|   ry   r}   r~   r   build_user_dicr	   )r   rK   r   rC   rB   r|   ry   r   r   r   r   r   r   r   _command_user_build   s@   




r   c                   C   s   t dt d S )Nzsudachipy {})rG   r	   r   r   r   r   r   rV      s   rV   c                  C   s  t jdd} | jdd}|jdddd}|jdd	d
dd |jdddddd |jddd
dd |jdddg ddd |jdddd |jd dd!d |jd"d#dd$d%d& |jd'd
t jd(d) |jt|jd* |jd+d,d-d}|jdd.d
d/d0d1 |jd d2ddd3d4d5 |	d6}|jdd7d
d8d9d: |jd'd
t j
d;d) |jt|jd* |jd<d=d>d}|jdd.d
d?d@d1 |jd d2ddd3d4d5 |	d6}|jddAd
d8dBd: |jd'd
t j
dCd) |jt|jd* | d |  }t|dDr|||j d S |   d S )ENzJapanese Morphological Analyzer)ry   r   r(   z(default) see `tokenize -h`zTokenize Text)helpry   z-rra   rB   zthe setting file in JSON format)destmetavarr   z-mrS   AaBbCcCzthe mode of splitting)r   choicesdefaultr   z-orX   zthe output filez-srb   string)smallcorefullzsudachidict type)r   r   r   r   z-a
store_truezprint all of the fields)actionr   z-dz1print the debug information (not implemented yet)z-vz	--versionrU   zprint sudachipy version)r   r   r   rC   ztext written in utf-8)r   nargsr   )handlerrK   buildzsee `build -h`zBuild Sudachi Dictionaryr|   z
system.dicz!output file (default: system.dic))r   r   r   r   ry   Fz0description comment to be embedded on dictionary)r   r   r   requiredr   zrequired named argumentsr{   Tz5connection matrix file with MeCab's matrix.def format)r   r   r   r   z*source files with CSV format (one of more)ubuildzsee `ubuild -h`zBuild User Dictionaryzuser.diczoutput file (default: user.dic)r   zsystem dictionary pathz*source files with CSV format (one or more)r   )r   ArgumentParseradd_subparsers
add_parseradd_argumentZERO_OR_MOREset_defaultsrn   rK   add_argument_groupONE_OR_MOREr   r   set_default_subparser
parse_argshasattrr   
print_help)parser
subparsers	parser_tk	parser_bdrequired_named_bd
parser_ubdrequired_named_ubdr   r   r   r   main   s   



r   __main__)N)r   re   rZ   rD   r   pathlibr   r   r   r   r   basicConfigr   r   r   r@   rL   rn   r   r   rV   r   rH   r   r   r   r   <module>   s4   
	%$%F
