o
    Ni                     @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm  m	Z
 ddlmZ dZdZg dZg d	Zd
Zede
jfde
jfdejjedfdejjedfde
jfde
jfde
jfde
jfde
jfde
jfde
jfde
jfgZG dd dejjZdS )zForest fires dataset.    )absolute_import)division)print_functionNa  
@misc{Dua:2019 ,
author = "Dua, Dheeru and Graff, Casey",
year = "2017",
title = "{UCI} Machine Learning Repository",
url = "http://archive.ics.uci.edu/ml",
institution = "University of California, Irvine, School of Information and Computer Sciences" }

@article{cortez2007data,
  title={A data mining approach to predict forest fires using meteorological data},
  author={Cortez, Paulo and Morais, Anibal de Jesus Raimundo},
  year={2007},
  publisher={Associa{\c{c}}{\~a}o Portuguesa para a Intelig{\^e}ncia Artificial (APPIA)}
}
a  

This is a regression task, where the aim is to predict the burned area of
forest fires, in the northeast region of Portugal,
by using meteorological and other data.


Data Set Information:

In [Cortez and Morais, 2007], the output 'area' was first transformed
with a ln(x+1) function.
Then, several Data Mining methods were applied. After fitting the models,
the outputs were
post-processed with the inverse of the ln(x+1) transform. Four different
input setups were
used. The experiments were conducted using a 10-fold (cross-validation)
x 30 runs. Two
regression metrics were measured: MAD and RMSE. A Gaussian support vector
machine (SVM) fed
with only 4 direct weather conditions (temp, RH, wind and rain) obtained
the best MAD value:
12.71 +- 0.01 (mean and confidence interval within 95% using a t-student
distribution). The
best RMSE was attained by the naive mean predictor. An analysis to the
regression error curve
(REC) shows that the SVM model predicts more examples within a lower
admitted error. In effect,
the SVM model predicts better small fires, which are the majority.

Attribute Information:

For more information, read [Cortez and Morais, 2007].

1. X - x-axis spatial coordinate within the Montesinho park map: 1 to 9
2. Y - y-axis spatial coordinate within the Montesinho park map: 2 to 9
3. month - month of the year: 'jan' to 'dec'
4. day - day of the week: 'mon' to 'sun'
5. FFMC - FFMC index from the FWI system: 18.7 to 96.20
6. DMC - DMC index from the FWI system: 1.1 to 291.3
7. DC - DC index from the FWI system: 7.9 to 860.6
8. ISI - ISI index from the FWI system: 0.0 to 56.10
9. temp - temperature in Celsius degrees: 2.2 to 33.30
10. RH - relative humidity in %: 15.0 to 100
11. wind - wind speed in km/h: 0.40 to 9.40
12. rain - outside rain in mm/m2 : 0.0 to 6.4
13. area - the burned area of the forest (in ha): 0.00 to 1090.84
(this output variable is very skewed towards 0.0, thus it may make
sense to model with the logarithm transform).

)janfebmaraprmayjunjulaugsepoctnovdec)montuewedthufrisatsunzVhttps://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csvXYmonth)namesdayFFMCDMCDCISItempRHwindrainc                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )
ForestFireszARegression task aimed to predict the burned area of forest fires.z0.0.1c              	   C   s6   t jj| tt jtjdd t	 D dddt
dS )Nc                 S      i | ]\}}||qS  r'   ).0namedtyper'   r'   _/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/structured/forest_fires.py
<dictcomp>       z%ForestFires._info.<locals>.<dictcomp>areafeaturesz4https://archive.ics.uci.edu/ml/datasets/Forest+Fires)builderdescriptionr0   supervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr0   FeaturesDicttffloat32FEATURESitems	_CITATION)selfr'   r'   r+   _info}   s   zForestFires._infoc                 C   s$   | t}tjjtjjd|idgS )zReturns SplitGenerators.	file_path)r)   
gen_kwargs)download_URLr6   r7   SplitGeneratorSplitTRAIN)r@   
dl_managerdatar'   r'   r+   _split_generators   s   
zForestFires._split_generatorsc                 c   sv    t jj|)}t|}t|D ]\}}||ddd | D dfV  qW d   dS 1 s4w   Y  dS )zYields examples.r/   c                 S   r&   r'   r'   )r(   r)   valuer'   r'   r+   r,      r-   z2ForestFires._generate_examples.<locals>.<dictcomp>r.   N)	r;   iogfileGFilecsv
DictReader	enumeratepopr>   )r@   rB   fraw_datairowr'   r'   r+   _generate_examples   s   
"zForestFires._generate_examplesN)__name__
__module____qualname____doc__r6   r7   VersionVERSIONrA   rK   rX   r'   r'   r'   r+   r%   x   s    r%   )r\   
__future__r   r   r   collectionsrP   tensorflow.compat.v2compatv2r;   tensorflow_datasets.public_api
public_apir6   r?   r9   _MONTHS_DAYSrE   OrderedDictuint8r0   
ClassLabelr<   r=   r7   GeneratorBasedBuilderr%   r'   r'   r'   r+   <module>   s8   3