o
    
i5A                  
   @  s6  U d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZmZmZmZmZmZ ddlmZ ddlZddlmZmZmZ ddlmZ dgZd	Zi Zd
e d< dJddZ!dd Z"dKddZ#ej$fddZ%dLdMddZ&G dd  d Z'G d!d" d"e'Z(G d#d$ d$e'Z)G d%d& d&e)Z*G d'd( d(e)Z+G d)d* d*e)Z,G d+d, d,e'Z-G d-d. d.e)Z.G d/d0 d0e'Z/e' Z0d1d2 Z1dNd3d4Z2d5d6 Z3dLd7d8Z4d9d: Z5dLd;d<Z6dLd=d>Z7d?d@ Z8dNdAdBZ9e1e2e3e4e6e8e9e5e7dC	Z:dDdE Z;G dFd dZ<dGdH Z=e>dIkre=  dS dS )Oz
A commandline tool for semi-automatically converting CSV to RDF.

See also https://github.com/RDFLib/pyTARQL in the RDFlib family of tools

try: `csv2rdf --help`
    )annotationsN)AnyDictListOptionalTupleUnion)quote)RDFRDFS	split_uri)URIRefCSV2RDFa^  
csv2rdf.py     -b <instance-base>     -p <property-base>     [-D <default>]     [-c <classname>]     [-i <identity column(s)>]     [-l <label columns>]     [-s <N>] [-o <output>]     [-f configfile]     [--col<N> <colspec>]     [--prop<N> <property>]     <[-d <delim>]     [-C] [files...]"

Reads csv files from stdin or given files
if -d is given, use this delimiter
if -s is given, skips N lines at the start
Creates a URI from the columns given to -i, or automatically by numbering if
none is given
Outputs RDFS labels from the columns given to -l
if -c is given adds a type triple with the given classname
if -C is given, the class is defined as rdfs:Class
Outputs one RDF triple per column in each row.
Output is in n3 format.
Output is stdout, unless -o is specified

Long options also supported:     --base,     --propbase,     --ident,     --class,     --label,     --out,     --defineclass

Long options --col0, --col1, ...
can be used to specify conversion for columns.
Conversions can be:
    ignore, float(), int(), split(sep, [more]), uri(base, [class]), date(format)

Long options --prop0, --prop1, ...
can be used to use specific properties, rather than ones auto-generated
from the headers

-D sets the default conversion for columns not listed

-f says to read config from a .ini/config file - the file must contain one
section called csv2rdf, with keys like the long options, i.e.:

[csv2rdf]
out=output.n3
base=http://example.org/
col0=split(";")
col1=split(";", uri("http://example.org/things/",
                    "http://xmlns.com/foaf/0.1/Person"))
col2=float()
col3=int()
col4=date("%Y-%b-%d %H:%M:%S")

z*Dict[Any, Tuple[URIRef, Optional[URIRef]]]urislabelstrc                 C  sP   t dd| } t dd| } | d} d| d  gdd | d	d
 D  S )zc
    CamelCase + lowercase initial a string


    FIRST_NM => firstNm

    firstNm => firstNm

    z[^\w] z([a-z])([A-Z])z\1 \2 r   c                 S  s   g | ]}|  qS  )
capitalize.0xr   r   H/home/ubuntu/.local/lib/python3.10/site-packages/rdflib/tools/csv2rdf.py
<listcomp>n       ztoProperty.<locals>.<listcomp>   N)resubsplitjoinlowerr   r   r   r   
toProperty`   s   

*r#   c                 C  s0   | dd   s| dd  | dd   S | S )Nr      r   )isupperr!   r"   r   r   r   toPropertyLabelq   s   r&   l_	List[int]iTuple[int, ...]returnc                   s   t  fdd|D S )zPreturn a set of indexes from a list
    >>> index([1,2,3],(0,2))
    (1, 3)
    c                   s   g | ]} | qS r   r   r   r'   r   r   r   |   r   zindex.<locals>.<listcomp>)tuple)r'   r)   r   r,   r   indexw   s   r.   c                 k  s,    t j| fd|i|}|D ]}|V  qd S )Ndialect)csvreader)csv_datar/   kwargs
csv_readerrowr   r   r   r4      s
   r4   class_Optional[URIRef]c                 C  sD   |rt |t| ddddd }nt | }||ft| < |S )Nutf8r   _r   safe)rdflibr   r	   encodereplacer   )r   prefixr6   rr   r   r   	prefixuri   s
   &
rA   c                   @  s   e Zd Zdd ZdddZdS )		NodeMakerc                 C     t jjS N)r<   r   Literalselfr   r   r   range      zNodeMaker.ranger   r   c                 C  s
   t |S rD   )r<   rE   rG   r   r   r   r   __call__      
zNodeMaker.__call__N)r   r   )__name__
__module____qualname__rH   rK   r   r   r   r   rB      s    rB   c                   @  $   e Zd Zdd Zdd Zdd ZdS )NodeUric                 C  s*   d | _ || _|rt|| _ d S d | _ d S rD   )r6   r?   r<   r   )rG   r?   r6   r   r   r   __init__   s
   
zNodeUri.__init__c                 C  s   t || j| jS rD   )rA   r?   r6   rJ   r   r   r   rK         zNodeUri.__call__c                 C  s   | j ptjjS rD   )r6   r<   r
   ResourcerF   r   r   r   rH      s   zNodeUri.rangeNrM   rN   rO   rR   rK   rH   r   r   r   r   rQ      s    rQ   c                   @  s   e Zd ZdddZdS )NodeLiteralNc                 C  s
   || _ d S rD   f)rG   rX   r   r   r   rR      rL   zNodeLiteral.__init__rD   )rM   rN   rO   rR   r   r   r   r   rV      s    rV   c                   @     e Zd Zdd Zdd ZdS )	NodeFloatc                 C  :   | j s
tt|S t| j rtt|  |S td)Nz(Function passed to float is not callable)rX   r<   rE   floatcallable	ExceptionrJ   r   r   r   rK      
   
zNodeFloat.__call__c                 C  rC   rD   )r<   XSDdoublerF   r   r   r   rH      rI   zNodeFloat.rangeNrM   rN   rO   rK   rH   r   r   r   r   rZ          rZ   c                   @  rY   )NodeIntc                 C  r[   )Nz&Function passed to int is not callable)rX   r<   rE   intr]   r^   rJ   r   r   r   rK      r_   zNodeInt.__call__c                 C  rC   rD   )r<   r`   re   rF   r   r   r   rH      rI   zNodeInt.rangeNrb   r   r   r   r   rd      rc   rd   c                   @  rY   )NodeBoolc                 C  r[   )Nz'Function passed to bool is not callable)rX   r<   rE   boolr]   r^   rJ   r   r   r   rK      r_   zNodeBool.__call__c                 C  rC   rD   )r<   r`   rg   rF   r   r   r   rH      rI   zNodeBool.rangeNrb   r   r   r   r   rf      rc   rf   c                   @  rY   )NodeReplacec                 C     || _ || _d S rD   ab)rG   rk   rl   r   r   r   rR         
zNodeReplace.__init__c                 C  s   | | j| jS rD   )r>   rk   rl   rJ   r   r   r   rK      rS   zNodeReplace.__call__N)rM   rN   rO   rR   rK   r   r   r   r   rh      s    rh   c                   @  rY   )NodeDatec                 C  s   t tj|| jS rD   )r<   rE   datetimestrptimerX   rJ   r   r   r   rK      s   zNodeDate.__call__c                 C  rC   rD   )r<   r`   dateTimerF   r   r   r   rH      rI   zNodeDate.rangeNrb   r   r   r   r   rn      s    rn   c                   @  rP   )	NodeSplitc                 C  ri   rD   seprX   )rG   rt   rX   r   r   r   rR      rm   zNodeSplit.__init__c                   s:    j stj _ t j std fdd| jD S )Nz)Function passed to split is not callable!c                   s&   g | ]}|  d kr |  qS )r   )striprX   )r   yrF   r   r   r      s   & z&NodeSplit.__call__.<locals>.<listcomp>)rX   r<   rE   r]   r^   r   rt   rJ   r   rF   r   rK      s
   
zNodeSplit.__call__c                 C  s&   | j rt| j tr| j  S t| S rD   )rX   
isinstancerB   rH   rF   r   r   r   rH      s   

zNodeSplit.rangeNrU   r   r   r   r   rr      s    rr   c                  O  s   dS )Nignorer   )argsr3   r   r   r   _config_ignore   s   rz   c                 C  
   t | |S rD   )rQ   )r?   r6   r   r   r   _config_uri   rL   r|   c                   C  s   t  S rD   )rV   r   r   r   r   _config_literal   s   r}   c                 C     t | S rD   )rZ   rW   r   r   r   _config_float  rI   r   c                 C  r{   rD   )rh   rj   r   r   r   _config_replace  rL   r   c                 C  r~   rD   )rd   rW   r   r   r   _config_int  rI   r   c                 C  r~   rD   )rf   rW   r   r   r   _config_bool  rI   r   c                 C  r~   rD   )rn   )format_r   r   r   _config_date  rI   r   c                 C  r{   rD   )rr   rs   r   r   r   _config_split  rL   r   )	rx   uriliteralr\   re   dater   r>   rg   c                 C  s
   t | tS )z$Return a function for column mapping)evalconfig_functions)vr   r   r   column)  s   
r   c                   @  rP   )r   c                 C  sT   d | _ d | _d | _d| _d | _d| _d| _d| _d | _i | _	i | _
tj| _d| _d S )NautoFr   ,)CLASSBASEPROPBASEIDENTLABELDEFINECLASSSKIPDELIMDEFAULTCOLUMNSPROPSsysstdoutOUTtriplesrF   r   r   r   rR   0  s   
zCSV2RDF.__init__c                 C  s4   | j d| | | f  |  jd7  _d S )Nz%s %s %s .
r   )r   writen3r   )rG   spor   r   r   tripleB  s   "zCSV2RDF.triplec                   s0  t   } jrtjd jj   jdkr!t jts! jf _ j	s/t
d td _	 js=t
d td _t jD ]}t| qBtt|}tt fdd|D } j D ]\}}|||< t|d	 ||< qa jrӈ  jtjtj tt|D ]M}|| || }	}
|	d
ks|
d
krq j !| j"dkrq |	tjtj#  |	tj$t%t&|
  |	tj' j  |	tj j !|t(  qd}|D ]}
zψ jdkr j	d|  }n j	d)dd t*|
 jD  } j+r |tj$t%d)t*|
 j+  jr |tj j t|
D ]l\}}|, }|d
kr j !| j"dkr9qz, j !|tj%|}t|trZ|D ]} ||| | qLn	 ||| | W q t-y } zt
dd||| ||j.f   W Y d }~qd }~ww q|d	7 }|d dkrtjd| j/t   | f  W q t-y   tjd|   w t0 }t1 D ])\}
}|\}} |tj$t%|
 |rt2|}|3|  |tj| q|D ]} |tjtj q j4  tjd| j/f  tjdt   |   d S )NzOutput to %s
r   z2No base given, using http://example.org/instances/zhttp://example.org/instances/z:No property base given, using http://example.org/property/zhttp://example.org/props/c                   s   g | ]	} j t| qS r   )r   r#   r   rF   r   r   r   ]  s    z#CSV2RDF.convert.<locals>.<listcomp>r   r   rx   r   z%dr9   c                 S  s&   g | ]}t |d ddddqS )r8   r   r9   r   r:   )r	   r=   r>   r   r   r   r   r   {  s    r   z#Could not process value for column z%d:%s in row %d, ignoring: %s i z$%d rows, %d triples, elapsed %.2fs.
zError processing line: %d
z#Converted %d rows into %d triples.
zTook %.2f seconds.
)5timer   r   stderrr   namer   rw   r-   r   warningswarnr<   	Namespacer   rH   r   nextlistdict	enumerater   itemsr   r   r   r   r
   typer   Classlenr   getr   Propertyr   rE   r&   domaindefault_node_maker    r.   r   ru   r^   messager   setr   r   addclose)rG   	csvreaderstartr   header_labelsheaderskr   r)   hr'   rowsr   r   _oeclassesucr   rF   r   convertF  s   









zCSV2RDF.convertN)rM   rN   rO   rR   r   r   r   r   r   r   r   /  s    c                  C  s  t  } ttjdd  dg d\}}t|}d|v sd|v r(tt td d|v rt	 }|
t|d  |dD ]\}}|d	krOt|d
d| _q>|dkrZt|| _q>|dkret|| _q>|dkrpt|| _q>|dkrzt|| _q>|dkrt|| _q>|dkrt|| _q>|dkr|| _q>|dkrt|| _q>|dkrt|| _q>|drt|| j t|dd  < q>|drt|| j!t|dd  < q>d|v rt|d d
d| _d|v rt|d d
d| _d|v rt|d | _d|v rt|d | _d|v r|d | _d|v r|d | _d|v r&t|d | _d |v r2t|d  | _d!|v r?t|d! | _d"|v rLt|d" | _d#|v rXt|d# | _d$|v rdt|d$ | _d%|v rpt|d% | _d&|v r|t|d& | _d'|v rt|d' | _d(|v rt|d( | _d)|v rt|d) | _d*|v rt|d* | _| D ].\}}|d+rt|| j t|d,d  < q|d-rt|| j!t|d.d  < q| jrd/|v sd0|v rd1| _| "t#t$%|| jd2 d S )3Nr   zhc:b:p:i:o:Cf:l:s:d:D:)
zout=zbase=zdelim=z	propbase=zclass=zdefault=ident=zlabel=zskip=defineclasshelpz-hz--helpz-fcsv2rdfoutwzutf-8basepropbaseclassr   identr   delimskipdefaultcol   prop   z-oz--outz-bz--basez-dz--delimz-Dz	--defaultz-pz
--propbasez-lz--labelz-iz--identz-sz--skipz-cz--classz--col   z--prop   z-Cz--defineclassT)	delimiter)&r   getoptr   argvr   printHELPexitconfigparserConfigParser	read_fileopenr   codecsr   r<   r   r   r   r   r   rg   r   r   r   r   r   re   r   r   r   
startswithr   r   r   r4   	fileinputinput)r   optsfilesconfigr   r   r   r   r   main  s   



















r   __main__)r   r   )r'   r(   r)   r*   r+   r*   rD   )r6   r7   )NN)?__doc__
__future__r   r   r   r0   ro   r   r   r   r   r   r   typingr   r   r   r   r   r   urllib.parser	   r<   rdflib.namespacer
   r   r   rdflib.termr   __all__r   r   __annotations__r#   r&   r.   excelr4   rA   rB   rQ   rV   rZ   rd   rf   rh   rn   rr   r   rz   r|   r}   r   r   r   r   r   r   r   r   r   r   rM   r   r   r   r   <module>   sz     ?

	




 
p
