o
    Ni                     @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddl	m
  mZ ddlmZ dZdZeg dZeg d	Zd
ddZdd Zdd Zdd Zdd Zdd Zedejje ddd ffdejeffdejjddgdeffdejeffd ej effd!ej effd"ejeffd#ejeffd$ejeffd%ejje dd&d ffd'ejeffd(ej effd)ejeffgZ!d*Z"G d+d, d,ej#j$Z%dS )-Titanic dataset.    )absolute_import)division)print_functionNz@ONLINE {titanic,
author = "Frank E. Harrell Jr., Thomas Cason",
title  = "Titanic dataset",
month  = "oct",
year   = "2017",
url    = "https://www.openml.org/d/40945"
}
zDataset describing the survival status of individual passengers on the Titanic. Missing values in the original dataset are represented using ?. Float and int missing values are replaced with -1, string missing values are replaced with 'Unknown'.))C	Cherbourg)Q
Queenstown)SSouthampton)?Unknown))1	1st_class)2	2nd_class)3	3rd_classsurviveddied)r   0c                 C      | dkrdS t | S )Nr   g      )npfloat32d r   Z/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/structured/titanic.pyconvert_to_float9      r   c                 C   r   )Nr   )r   int32r   r   r   r   convert_to_int=   r   r"   c                 C   s   | dkrdS | S )Nr   r   r   r   r   r   r   convert_to_stringA   s   r#   c                 C   s   ||  S Nr   )r   
dictionaryr   r   r   convert_to_labelE   s   r&   c                 C   s   | S r$   r   r   r   r   r   return_sameI   s   r'   pclassnamesc                 C   
   t | tS r$   )r&   _PCLASS_DICTr   r   r   r   <lambda>O      
 r-   namesexmalefemaleagesibspparchticketfarecabinembarkedc                 C   r+   r$   )r&   _EMBARKED_DICTr   r   r   r   r-   Y   r.   boatbodyz	home.destz6https://www.openml.org/data/get_csv/16826755/phpMYEkMlc                   @   s6   e Zd ZdZejddZdd Zdd Z	dd	 Z
d
S )Titanicr   z2.0.0z6New split API (https://tensorflow.org/datasets/splits)c              	   C   sB   t jj| tt jt jjddgddd t D dddt	d	S )
Nr   r   r)   c                 S   s   i | ]	\}\}}||qS r   r   ).0r/   dtypefuncr   r   r   
<dictcomp>m   s    

z!Titanic._info.<locals>.<dictcomp>r   features)rC   r   zhttps://www.openml.org/d/40945)builderdescriptionrC   supervised_keyshomepagecitation)
tfdscoreDatasetInfo_DESCRIPTIONrC   FeaturesDict
ClassLabelFEATURE_DICTitems	_CITATION)selfr   r   r   _infog   s   zTitanic._infoc                 C   s$   | t}tjjtjjd|idgS )N	file_path)r/   
gen_kwargs)download_URLrI   rJ   SplitGeneratorSplitTRAIN)rR   
dl_managerpathr   r   r   _split_generatorsu   s   
zTitanic._split_generatorsc                 c   s    t jj|.}t|}t|D ]\}}|d}|t|t	dd |
 D dfV  qW d   dS 1 s9w   Y  dS )zGenerate features and target given the directory path.

    Args:
      file_path: path where the csv file is stored

    Yields:
      The features and the target
    r   c                 S   s"   i | ]\}}|t | d  |qS )   )rO   )r>   r/   valuer   r   r   rA      s    z.Titanic._generate_examples.<locals>.<dictcomp>rB   N)tfiogfileGFilecsv
DictReader	enumeratepopr&   _SURVIVED_DICTrP   )rR   rT   fraw_datairowsurvive_valr   r   r   _generate_examples   s   


"zTitanic._generate_examplesN)__name__
__module____qualname____doc__rI   rJ   VersionVERSIONrS   r]   rn   r   r   r   r   r=   b   s    r=   )&rr   
__future__r   r   r   collectionsrd   numpyr   tensorflow.compat.v2compatv2r`   tensorflow_datasets.public_api
public_apirI   rQ   rL   OrderedDictr:   r,   rh   r   r"   r#   r&   r'   rC   rN   valuesstringr   r!   rO   rW   rJ   GeneratorBasedBuilderr=   r   r   r   r   <module>   sP   

