from pathlib import Path
from typing import List, Dict, Union

import pandas as pd
import xarray

from neuralhydrology.datasetzoo.basedataset import BaseDataset
from neuralhydrology.utils.config import Config


from neuralhydrology.datasetzoo.camelscl import preprocess_camels_cl_dataset


chili_dir = Path(r"C:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\CAMELS-CL_dataset")
preprocess_camels_cl_dataset(  chili_dir)   #)Path(


class CamelsCL(BaseDataset):
    
    def __init__(self,
                 cfg: Config,
                 is_train: bool,
                 period: str,
                 basin: str = None,
                 additional_features: List[Dict[str, pd.DataFrame]] = [],
                 id_to_int: Dict[str, int] = {},
                 scaler: Dict[str, Union[pd.Series, xarray.DataArray]] = {}):
        
        # Initialize `BaseDataset` class
        super(CamelsCL, self).__init__(cfg=cfg,
                                       is_train=is_train,
                                       period=period,
                                       basin=basin,
                                       additional_features=additional_features,
                                       id_to_int=id_to_int,
                                       scaler=scaler)

    def _load_basin_data(self, basin: str) -> pd.DataFrame:
        """Load timeseries data of one specific basin"""
        raise NotImplementedError

    def _load_attributes(self) -> pd.DataFrame:
        """Load catchment attributes"""
        raise NotImplementedError


def load_camels_cl_timeseries(data_dir: Path, basin: str) -> pd.DataFrame:
    preprocessed_dir = data_dir / "preprocessed"
    
    # make sure the CAMELS-CL data was already preprocessed and per-basin files exist.
    if not preprocessed_dir.is_dir():
        msg = [
            f"No preprocessed data directory found at {preprocessed_dir}. Use preprocessed_camels_cl_dataset ", 
             "in neuralhydrology.datasetzoo.camelscl to preprocess the CAMELS CL data set once into ",
             "per-basin files."
        ]
        raise FileNotFoundError("".join(msg))
        
    # load the data for the specific basin into a time-indexed dataframe
    basin_file = preprocessed_dir / f"{basin}.csv"
    df = pd.read_csv(basin_file, index_col='date', parse_dates=['date'])
    return df


def load_camels_cl_attributes(data_dir: Path, basins: List[str] = []) -> pd.DataFrame:
    
    # load attributes into basin-indexed dataframe
    attributes_file = data_dir / '1_CAMELScl_attributes.txt'
    df = pd.read_csv(attributes_file, sep="\t", index_col="gauge_id").transpose()

    # convert all columns, where possible, to numeric
    df = df.apply(pd.to_numeric, errors='ignore')

    # convert the two columns specifying record period start and end to datetime format
    df["record_period_start"] = pd.to_datetime(df["record_period_start"])
    df["record_period_end"] = pd.to_datetime(df["record_period_end"])

    if basins:
        if any(b not in df.index for b in basins):
            raise ValueError('Some basins are missing static attributes.')
        df = df.loc[basins]

    return df


class CamelsCL(BaseDataset):
    
    def __init__(self,
                 cfg: Config,
                 is_train: bool,
                 period: str,
                 basin: str = None,
                 additional_features: List[Dict[str, pd.DataFrame]] = [],
                 id_to_int: Dict[str, int] = {},
                 scaler: Dict[str, Union[pd.Series, xarray.DataArray]] = {}):
        
        # Initialize `BaseDataset` class
        super(CamelsCL, self).__init__(cfg=cfg,
                                       is_train=is_train,
                                       period=period,
                                       basin=basin,
                                       additional_features=additional_features,
                                       id_to_int=id_to_int,
                                       scaler=scaler)

    def _load_basin_data(self, basin: str) -> pd.DataFrame:
        """Load timeseries data of one specific basin"""
        return load_camels_cl_timeseries(data_dir=self.cfg.data_dir, basin=basin)

    def _load_attributes(self) -> pd.DataFrame:
        """Load catchment attributes"""
        return load_camels_cl_attributes(self.cfg.data_dir, basins=self.basins)


from neuralhydrology.datasetzoo.basedataset import BaseDataset
from neuralhydrology.datasetzoo.camelscl import CamelsCL
from neuralhydrology.datasetzoo.camelsgb import CamelsGB
from neuralhydrology.datasetzoo.camelsus import CamelsUS
from neuralhydrology.datasetzoo.hourlycamelsus import HourlyCamelsUS
from neuralhydrology.utils.config import Config

def get_dataset(cfg: Config,
                is_train: bool,
                period: str,
                basin: str = None,
                additional_features: list = [],
                id_to_int: dict = {},
                scaler: dict = {}) -> BaseDataset:
    
    # check config argument and select appropriate data set class
    if cfg.dataset == "camels_us":
        Dataset = CamelsUS
    elif cfg.dataset == "camels_gb":
        Dataset = CamelsGB
    elif cfg.dataset == "hourly_camels_us":
        Dataset = HourlyCamelsUS
    elif cfg.dataset == "camels_cl":
        Dataset = CamelsCL
    else:
        raise NotImplementedError(f"No dataset class implemented for dataset {cfg.dataset}")
    
    """Get data set instance, depending on the run configuration.

    Currently implemented datasets are "caravan", "camels_aus", "camels_br", "camels_cl", "camels_gb", "camels_us", and "hourly_camels_us", as well as the "generic" dataset class that can be used for any kind of dataset as long as it is in the correct format.
    Parameters
            cfg (Config) – The run configuration.
            is_train (bool) – Defines if the dataset is used for training or evaluating. If True (training), means/stds for each feature are computed and stored to the run directory. If one-hot encoding is used, the mapping for the one-hot encoding is created and also stored to disk. If False, a scaler input is expected and similarly the id_to_int input if one-hot encoding is used.
            period ({'train', 'validation', 'test'}) – Defines the period for which the data will be loaded
            basin (str, optional) – If passed, the data for only this basin will be loaded. Otherwise the basin(s) is(are) read from the appropriate basin file, corresponding to the period.
            additional_features (List[Dict[str, pd.DataFrame]], optional) – List of dictionaries, mapping from a basin id to a pandas DataFrame. This DataFrame will be added to the data loaded from the dataset and all columns are available as "dynamic_inputs", "evolving_attributes" and "target_variables"
            id_to_int (Dict[str, int], optional) – If the config argument "use_basin_id_encoding" is True in the config and period is either "validation" or "test", this input is required. It is a dictionary, mapping from basin id to an integer (the one-hot encoding).
            scaler (Dict[str, Union[pd.Series, xarray.DataArray]], optional) – If period is either "validation" or "test", this input is required. It contains the centering and scaling for each feature and is stored to the run directory during training (train_data/train_data_scaler.yml).
    """
    
    # initialize dataset
    ds = Dataset(cfg=cfg,
                 is_train=is_train,
                 period=period,
                 basin=basin,
                 additional_features=additional_features,
                 id_to_int=id_to_int,
                 scaler=scaler)
    return ds


import neuralhydrology
print(neuralhydrology.__file__)

c:\Users\VanOp\mambaforge\envs\neuralhydrology_cuda11_8\Lib\site-packages\neuralhydrology\__init__.py


import pandas as pd
Chili_att = pd.read_csv (r".\CAMELS-CL_dataset\1_CAMELScl_attributes.txt", sep="\t", index_col=0, decimal="."); Chili_att


Chili_attT= Chili_att.T
list(Chili_attT.columns)

['gauge_name',
 'gauge_lat',
 'gauge_lon',
 'record_period_start',
 'record_period_end',
 'n_obs',
 'area',
 'elev_gauge',
 'elev_mean',
 'elev_med',
 'elev_max',
 'elev_min',
 'slope_mean',
 'nested_inner',
 'nested_outer',
 'location_type',
 'geol_class_1st',
 'geol_class_1st_frac',
 'geol_class_2nd',
 'geol_class_2nd_frac',
 'carb_rocks_frac',
 'crop_frac',
 'nf_frac',
 'fp_frac',
 'grass_frac',
 'shrub_frac',
 'wet_frac',
 'imp_frac',
 'lc_barren',
 'snow_frac',
 'lc_glacier',
 'fp_nf_index',
 'forest_frac',
 'dom_land_cover',
 'dom_land_cover_frac',
 'land_cover_missing',
 'p_mean_cr2met',
 'p_mean_chirps',
 'p_mean_mswep',
 'p_mean_tmpa',
 'pet_mean',
 'aridity_cr2met',
 'aridity_chirps',
 'aridity_mswep',
 'aridity_tmpa',
 'p_seasonality_cr2met',
 'p_seasonality_chirps',
 'p_seasonality_mswep',
 'p_seasonality_tmpa',
 'frac_snow_cr2met',
 'frac_snow_chirps',
 'frac_snow_mswep',
 'frac_snow_tmpa',
 'high_prec_freq_cr2met',
 'high_prec_freq_chirps',
 'high_prec_freq_mswep',
 'high_prec_freq_tmpa',
 'high_prec_dur_cr2met',
 'high_prec_dur_chirps',
 'high_prec_dur_mswep',
 'high_prec_dur_tmpa',
 'high_prec_timing_cr2met',
 'high_prec_timing_chirps',
 'high_prec_timing_mswep',
 'high_prec_timing_tmpa',
 'low_prec_freq_cr2met',
 'low_prec_freq_chirps',
 'low_prec_freq_mswep',
 'low_prec_freq_tmpa',
 'low_prec_dur_cr2met',
 'low_prec_dur_chirps',
 'low_prec_dur_mswep',
 'low_prec_dur_tmpa',
 'low_prec_timing_cr2met',
 'low_prec_timing_chirps',
 'low_prec_timing_mswep',
 'low_prec_timing_tmpa',
 'p_mean_spread',
 'q_mean',
 'runoff_ratio_cr2met',
 'runoff_ratio_chirps',
 'runoff_ratio_mswep',
 'runoff_ratio_tmpa',
 'stream_elas_cr2met',
 'stream_elas_chirps',
 'stream_elas_mswep',
 'stream_elas_tmpa',
 'slope_fdc',
 'baseflow_index',
 'hfd_mean',
 'Q95',
 'Q5',
 'high_q_freq',
 'high_q_dur',
 'low_q_freq',
 'low_q_dur',
 'zero_q_freq',
 'swe_ratio',
 'sur_rights_n',
 'sur_rights_flow',
 'interv_degree',
 'gw_rights_n',
 'gw_rights_flow',
 'big_dam']


Chili_attT["carb_rocks_frac"]= pd.to_numeric(Chili_attT.carb_rocks_frac, errors='raise',downcast='float',) ; Chili_attT["forest_frac"]= pd.to_numeric(Chili_attT.forest_frac, errors='raise',downcast='float',)
Chili_attT.carb_rocks_frac

 1001001    0.000
 1001002    0.000
 1001003    0.000
 1020002    0.000
 1020003    0.000
            ...  
12872001    0.000
12876001    0.017
12876004    0.258
12878001    0.106
12930001    0.998
Name: carb_rocks_frac, Length: 516, dtype: float32


Chili_carbonicRockFr= Chili_attT[Chili_attT.carb_rocks_frac >0.05]; 
Chili_carbonicRockFr= Chili_carbonicRockFr[Chili_carbonicRockFr.forest_frac >0.25]
Chili_carbonicRockFr


Chili_carbonicRockFr.index #gauge_id

Index([' 4515001', ' 4515002', ' 4516001', ' 4522001', ' 4522002', ' 4523001',
       ' 4523002', ' 7102001', ' 7102005', ' 7104002', '12284005', '12286002',
       '12287001', '12288002', '12288003', '12288004', '12289001', '12289002',
       '12289003', '12825002', '12876004', '12878001', '12930001'],
      dtype='object')


limestone = pd.read_csv( "chili_carbon_basins_meteo_aridity_index.csv", sep=";",index_col=["BasinID"], usecols=[0,1,2,4,7,9,11]); limestone


limestone.query('Index_aridity > 0.65' )


POWER_Daily_T2M = pd.read_csv (r"C:\Users\VanOp\Documents\Notebooks\XGBoost\acea-water-prediction\POWER_Daily_T2M_T2M_MAX_T2M_MIN.csv", parse_dates=["Date"], index_col=["Date"]); 
POWER_Daily_T2M


POWER_Daily_T2M.loc["2017"].plot();

<Axes: xlabel='Date'>


from neuralhydrology.datasetzoo import GenericDataset 
import pickle
from pathlib import Path
import ruamel.yaml     # ModuleNotFoundError: No module named 'ruamel.yaml'
import matplotlib.pyplot as plt
import torch
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run

# by default we assume that you have at least one CUDA-capable NVIDIA GPU
if torch.cuda.is_available():
    start_run(config_file=Path("train_on_chili_carbon_basins.yml"))  # "chili_basin.yml"

# fall back to CPU-only mode
else:
    start_run(config_file=Path("train_on_chili_carbon_basins.yml"), gpu=-1)

2023-07-21 15:00:48,974: Logging to c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_2107_150048\output.log initialized.
2023-07-21 15:00:48,975: ### Folder structure created at c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_2107_150048
2023-07-21 15:00:48,976: ### Run configurations for test_on_chili_carbon_basins_netcdf
2023-07-21 15:00:48,977: experiment_name: test_on_chili_carbon_basins_netcdf
2023-07-21 15:00:48,978: train_basin_file: basin_44444423.txt
2023-07-21 15:00:48,978: validation_basin_file: basin_44444423.txt
2023-07-21 15:00:48,979: test_basin_file: basin_44444423.txt
2023-07-21 15:00:48,981: train_start_date: 2000-01-01 00:00:00
2023-07-21 15:00:48,983: train_end_date: 2016-06-30 00:00:00
2023-07-21 15:00:48,983: validation_start_date: 2016-06-30 00:00:00
2023-07-21 15:00:48,984: validation_end_date: 2019-06-30 00:00:00
2023-07-21 15:00:48,985: test_start_date: 2019-06-29 00:00:00
2023-07-21 15:00:48,986: test_end_date: 2020-06-29 00:00:00
2023-07-21 15:00:48,986: device: cuda:0
2023-07-21 15:00:48,987: validate_every: 5
2023-07-21 15:00:48,988: validate_n_random_basins: 1
2023-07-21 15:00:48,989: metrics: ['mse']
2023-07-21 15:00:48,990: model: cudalstm
2023-07-21 15:00:48,991: head: regression
2023-07-21 15:00:48,992: output_activation: linear
2023-07-21 15:00:48,992: hidden_size: 200
2023-07-21 15:00:48,993: initial_forget_bias: 3
2023-07-21 15:00:48,994: output_dropout: 0.3
2023-07-21 15:00:48,994: optimizer: Adam
2023-07-21 15:00:48,995: loss: MSE
2023-07-21 15:00:48,996: learning_rate: {0: 0.01, 3: 0.005, 8: 0.002, 11: 0.001, 15: 0.0005}
2023-07-21 15:00:48,997: batch_size: 365
2023-07-21 15:00:48,997: epochs: 20
2023-07-21 15:00:48,998: clip_gradient_norm: 1
2023-07-21 15:00:48,998: predict_last_n: 1
2023-07-21 15:00:48,999: seq_length: 365
2023-07-21 15:00:49,000: num_workers: 4
2023-07-21 15:00:49,000: log_interval: 5
2023-07-21 15:00:49,001: log_tensorboard: True
2023-07-21 15:00:49,002: log_n_figures: 1
2023-07-21 15:00:49,002: save_weights_every: 5
2023-07-21 15:00:49,003: dataset: generic
2023-07-21 15:00:49,004: data_dir: C:\Users\VanOp\Documents\Notebooks\NeuralHydrology\genericdataset
2023-07-21 15:00:49,005: dynamic_inputs: ['4_CAMELScl', '10_CAMELScl', '12_CAMELScl']
2023-07-21 15:00:49,005: target_variables: ['2_CAMELScl']
2023-07-21 15:00:49,006: clip_targets_to_zero: ['2_CAMELScl']
2023-07-21 15:00:49,007: number_of_basins: 1
2023-07-21 15:00:49,007: run_dir: c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_2107_150048
2023-07-21 15:00:49,008: train_dir: c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_2107_150048\train_data
2023-07-21 15:00:49,009: img_log_dir: c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_2107_150048\img_log
2023-07-21 15:00:49,015: ### Device cuda:0 will be used for training
cfg.head= regression
2023-07-21 15:00:49,017: Loading basin data into xarray data set.
  0%|          | 0/1 [00:00<?, ?it/s]100%|██████████| 1/1 [00:00<00:00,  1.63it/s]
2023-07-21 15:00:49,644: Create lookup table and convert to pytorch tensor
100%|██████████| 1/1 [00:02<00:00,  2.32s/it]
# Epoch 1: 100%|██████████| 6/6 [00:20<00:00,  3.50s/it, Loss: 0.7190]
2023-07-21 15:01:17,489: Epoch 1 average loss: avg_loss: 0.55176, avg_total_loss: 0.55176
# Epoch 2: 100%|██████████| 6/6 [00:17<00:00,  2.89s/it, Loss: 0.5191]
2023-07-21 15:01:34,822: Epoch 2 average loss: avg_loss: 0.50348, avg_total_loss: 0.50348
2023-07-21 15:01:34,823: Setting learning rate to 0.005
# Epoch 3: 100%|██████████| 6/6 [00:19<00:00,  3.18s/it, Loss: 0.4038]
2023-07-21 15:01:53,922: Epoch 3 average loss: avg_loss: 0.48461, avg_total_loss: 0.48461
# Epoch 4: 100%|██████████| 6/6 [00:17<00:00,  2.97s/it, Loss: 0.4205]
2023-07-21 15:02:11,752: Epoch 4 average loss: avg_loss: 0.43860, avg_total_loss: 0.43860
# Epoch 5: 100%|██████████| 6/6 [00:17<00:00,  2.84s/it, Loss: 0.3455]
2023-07-21 15:02:28,809: Epoch 5 average loss: avg_loss: 0.37346, avg_total_loss: 0.37346
# Validation: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
2023-07-21 15:02:30,242: Epoch 5 average validation loss: 0.20557 -- Median validation metrics: avg_loss: 0.20557, MSE: 2165.29199
# Epoch 6: 100%|██████████| 6/6 [00:17<00:00,  2.85s/it, Loss: 0.5503]
2023-07-21 15:02:47,376: Epoch 6 average loss: avg_loss: 0.43224, avg_total_loss: 0.43224
# Epoch 7: 100%|██████████| 6/6 [00:17<00:00,  2.86s/it, Loss: 0.3894]
2023-07-21 15:03:04,559: Epoch 7 average loss: avg_loss: 0.38553, avg_total_loss: 0.38553
2023-07-21 15:03:04,560: Setting learning rate to 0.002
# Epoch 8: 100%|██████████| 6/6 [00:17<00:00,  2.88s/it, Loss: 0.3126]
2023-07-21 15:03:21,858: Epoch 8 average loss: avg_loss: 0.36875, avg_total_loss: 0.36875
# Epoch 9: 100%|██████████| 6/6 [00:17<00:00,  2.85s/it, Loss: 0.2664]
2023-07-21 15:03:38,976: Epoch 9 average loss: avg_loss: 0.32242, avg_total_loss: 0.32242
# Epoch 10: 100%|██████████| 6/6 [00:17<00:00,  2.87s/it, Loss: 0.1909]
2023-07-21 15:03:56,212: Epoch 10 average loss: avg_loss: 0.27256, avg_total_loss: 0.27256
# Validation: 100%|██████████| 1/1 [00:00<00:00,  1.46it/s]
2023-07-21 15:03:57,207: Epoch 10 average validation loss: 0.15328 -- Median validation metrics: avg_loss: 0.15328, MSE: 1696.08008
2023-07-21 15:03:57,209: Setting learning rate to 0.001
# Epoch 11: 100%|██████████| 6/6 [00:16<00:00,  2.83s/it, Loss: 0.1914]
2023-07-21 15:04:14,200: Epoch 11 average loss: avg_loss: 0.20622, avg_total_loss: 0.20622
# Epoch 12: 100%|██████████| 6/6 [00:17<00:00,  2.87s/it, Loss: 0.1557]
2023-07-21 15:04:31,447: Epoch 12 average loss: avg_loss: 0.17937, avg_total_loss: 0.17937
# Epoch 13: 100%|██████████| 6/6 [00:17<00:00,  2.94s/it, Loss: 0.1270]
2023-07-21 15:04:49,090: Epoch 13 average loss: avg_loss: 0.15748, avg_total_loss: 0.15748
# Epoch 14: 100%|██████████| 6/6 [00:17<00:00,  2.85s/it, Loss: 0.2334]
2023-07-21 15:05:06,186: Epoch 14 average loss: avg_loss: 0.18797, avg_total_loss: 0.18797
2023-07-21 15:05:06,188: Setting learning rate to 0.0005
# Epoch 15: 100%|██████████| 6/6 [00:17<00:00,  2.84s/it, Loss: 0.1837]
2023-07-21 15:05:23,216: Epoch 15 average loss: avg_loss: 0.17510, avg_total_loss: 0.17510
# Validation: 100%|██████████| 1/1 [00:00<00:00,  1.45it/s]
2023-07-21 15:05:24,179: Epoch 15 average validation loss: 0.19724 -- Median validation metrics: avg_loss: 0.19724, MSE: 2179.04346
# Epoch 16: 100%|██████████| 6/6 [00:17<00:00,  2.91s/it, Loss: 0.1134]
2023-07-21 15:05:41,641: Epoch 16 average loss: avg_loss: 0.13859, avg_total_loss: 0.13859
# Epoch 17: 100%|██████████| 6/6 [00:17<00:00,  2.87s/it, Loss: 0.1289]
2023-07-21 15:05:58,880: Epoch 17 average loss: avg_loss: 0.12768, avg_total_loss: 0.12768
# Epoch 18: 100%|██████████| 6/6 [00:17<00:00,  2.88s/it, Loss: 0.0915]
2023-07-21 15:06:16,147: Epoch 18 average loss: avg_loss: 0.11773, avg_total_loss: 0.11773
# Epoch 19: 100%|██████████| 6/6 [00:17<00:00,  2.85s/it, Loss: 0.1177]
2023-07-21 15:06:33,278: Epoch 19 average loss: avg_loss: 0.11736, avg_total_loss: 0.11736
# Epoch 20: 100%|██████████| 6/6 [00:17<00:00,  2.86s/it, Loss: 0.0963]
2023-07-21 15:06:50,440: Epoch 20 average loss: avg_loss: 0.10327, avg_total_loss: 0.10327
# Validation: 100%|██████████| 1/1 [00:00<00:00,  1.44it/s]
2023-07-21 15:06:51,411: Epoch 20 average validation loss: 0.17807 -- Median validation metrics: avg_loss: 0.17807, MSE: 1850.04150


import pickle
from pathlib import Path

import matplotlib.pyplot as plt
import torch
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run


import xarray as xr
ds_44444444 = xr.open_dataset(r"C:\Users\VanOp\Documents\Notebooks\NeuralHydrology\GenericDataset\time_series/44444444.nc", )
ds_44444444

<xarray.Dataset>
Dimensions:           (date: 3833)
Coordinates:
  * date              (date) datetime64[ns] 2010-01-01 2010-01-02 ... 2020-06-29
Data variables: (12/38)
    Date_excel        (date) datetime64[ns] ...
    Rainfall_Terni    (date) float64 ...
    Flow_Rate_Lupa    (date) float64 ...
    doy               (date) float64 ...
    Month             (date) float64 ...
    Year              (date) float64 ...
    ...                ...
    Rainfall_720      (date) float64 ...
    pp_10             (date) float64 ...
    pp_40             (date) float64 ...
    T2M               (date) float64 ...
    T2M_MAX           (date) float64 ...
    T2M_MIN           (date) float64 ...
Attributes:
    long_name:     Water spring Lupa [Italy]
    Italian_name:  Sorgente di Lupa, Monte Coserno, Italia
    units:         Liters/ minute
    Frequency:     Daily
    description:   Outflow and other key features of water spring Lupa. Start...

array(['2010-01-01T00:00:00.000000000', '2010-01-02T00:00:00.000000000',
       '2010-01-03T00:00:00.000000000', ..., '2020-06-27T00:00:00.000000000',
       '2020-06-28T00:00:00.000000000', '2020-06-29T00:00:00.000000000'],
      dtype='datetime64[ns]')

[3833 values with dtype=datetime64[ns]]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]


rename44444444vars= {"Rainfall_Terni": "4_CAMELScl", "PET_hg": "12_CAMELScl", "Flow_Rate_Lupa": "2_CAMELScl", "T2M": "10_CAMELScl"} #  temp as EvapoTransp
ds_44444444_carbonicChile =ds_44444444.rename( rename44444444vars ); ds_44444444_carbonicChile

<xarray.Dataset>
Dimensions:           (date: 3833)
Coordinates:
  * date              (date) datetime64[ns] 2010-01-01 2010-01-02 ... 2020-06-29
Data variables: (12/38)
    Date_excel        (date) datetime64[ns] ...
    4_CAMELScl        (date) float64 ...
    2_CAMELScl        (date) float64 ...
    doy               (date) float64 ...
    Month             (date) float64 ...
    Year              (date) float64 ...
    ...                ...
    Rainfall_720      (date) float64 ...
    pp_10             (date) float64 ...
    pp_40             (date) float64 ...
    10_CAMELScl       (date) float64 ...
    T2M_MAX           (date) float64 ...
    T2M_MIN           (date) float64 ...
Attributes:
    long_name:     Water spring Lupa [Italy]
    Italian_name:  Sorgente di Lupa, Monte Coserno, Italia
    units:         Liters/ minute
    Frequency:     Daily
    description:   Outflow and other key features of water spring Lupa. Start...

array(['2010-01-01T00:00:00.000000000', '2010-01-02T00:00:00.000000000',
       '2010-01-03T00:00:00.000000000', ..., '2020-06-27T00:00:00.000000000',
       '2020-06-28T00:00:00.000000000', '2020-06-29T00:00:00.000000000'],
      dtype='datetime64[ns]')

[3833 values with dtype=datetime64[ns]]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]


ds_44444444_carbonicChile.to_netcdf('44444444_carbonicChile.nc')


import xarray as xr
ds_44444444 = xr.open_dataset(r"C:\Users\VanOp\Documents\Notebooks\NeuralHydrology\GenericDataset\time_series/44444444.nc", )
ds_44444444

<xarray.Dataset>
Dimensions:           (date: 3833)
Coordinates:
  * date              (date) datetime64[ns] 2010-01-01 2010-01-02 ... 2020-06-29
Data variables: (12/38)
    Date_excel        (date) datetime64[ns] ...
    Rainfall_Terni    (date) float64 ...
    Flow_Rate_Lupa    (date) float64 ...
    doy               (date) float64 ...
    Month             (date) float64 ...
    Year              (date) float64 ...
    ...                ...
    Rainfall_720      (date) float64 ...
    pp_10             (date) float64 ...
    pp_40             (date) float64 ...
    T2M               (date) float64 ...
    T2M_MAX           (date) float64 ...
    T2M_MIN           (date) float64 ...
Attributes:
    long_name:     Water spring Lupa [Italy]
    Italian_name:  Sorgente di Lupa, Monte Coserno, Italia
    units:         Liters/ minute
    Frequency:     Daily
    description:   Outflow and other key features of water spring Lupa. Start...

array(['2010-01-01T00:00:00.000000000', '2010-01-02T00:00:00.000000000',
       '2010-01-03T00:00:00.000000000', ..., '2020-06-27T00:00:00.000000000',
       '2020-06-28T00:00:00.000000000', '2020-06-29T00:00:00.000000000'],
      dtype='datetime64[ns]')

[3833 values with dtype=datetime64[ns]]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]

[3833 values with dtype=float64]


ds_44444444.sel(date=slice("2015-01-01", "2015-12-31")).Rainfall_Terni.sum()

<xarray.DataArray 'Rainfall_Terni' ()>
array(702.8)

array(702.8)


print( ds_44444444.Rainfall_Terni.mean() / ds_44444444.PET_hg.mean() , ds_44444444.Rainfall_Terni.mean() , ds_44444444.PET_hg.mean(), )

<xarray.DataArray ()>
array(0.68574257) <xarray.DataArray 'Rainfall_Terni' ()>
array(2.89232977) <xarray.DataArray 'PET_hg' ()>
array(4.21780696)


from neuralhydrology.datasetzoo import GenericDataset 
import pickle
from pathlib import Path
import ruamel.yaml     
import matplotlib.pyplot as plt
import torch
from neuralhydrology.evaluation import metrics
from neuralhydrology.nh_run import start_run, eval_run

# by default we assume that you have at least one CUDA-capable NVIDIA GPU
if torch.cuda.is_available():
    start_run(config_file=Path("train_on_chili_carbon_basins.yml"))

# fall back to CPU-only mode
else:
    start_run(config_file=Path("train_on_chili_carbon_basins.yml"), gpu=-1)

2023-07-16 19:38:07,007: Logging to c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_1607_193807\output.log initialized.
2023-07-16 19:38:07,008: ### Folder structure created at c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_1607_193807
2023-07-16 19:38:07,009: ### Run configurations for test_on_chili_carbon_basins_netcdf
2023-07-16 19:38:07,011: experiment_name: test_on_chili_carbon_basins_netcdf
2023-07-16 19:38:07,012: train_basin_file: basin_44444423.txt
2023-07-16 19:38:07,013: validation_basin_file: basin_44444423.txt
2023-07-16 19:38:07,014: test_basin_file: basin_44444423.txt
2023-07-16 19:38:07,015: train_start_date: 2010-01-01 00:00:00
2023-07-16 19:38:07,016: train_end_date: 2018-06-29 00:00:00
2023-07-16 19:38:07,017: validation_start_date: 2018-06-30 00:00:00
2023-07-16 19:38:07,017: validation_end_date: 2019-06-29 00:00:00
2023-07-16 19:38:07,019: test_start_date: 2019-06-30 00:00:00
2023-07-16 19:38:07,020: test_end_date: 2020-06-29 00:00:00
2023-07-16 19:38:07,021: device: cuda:0
2023-07-16 19:38:07,022: validate_every: 5
2023-07-16 19:38:07,022: validate_n_random_basins: 1
2023-07-16 19:38:07,023: metrics: ['NSE']
2023-07-16 19:38:07,023: model: cudalstm
2023-07-16 19:38:07,024: head: regression
2023-07-16 19:38:07,026: output_activation: linear
2023-07-16 19:38:07,026: hidden_size: 100
2023-07-16 19:38:07,027: initial_forget_bias: 3
2023-07-16 19:38:07,028: output_dropout: 0.3
2023-07-16 19:38:07,029: optimizer: Adam
2023-07-16 19:38:07,029: loss: MSE
2023-07-16 19:38:07,030: learning_rate: {0: 0.01, 5: 0.005, 10: 0.002}
2023-07-16 19:38:07,031: batch_size: 256
2023-07-16 19:38:07,032: epochs: 15
2023-07-16 19:38:07,033: clip_gradient_norm: 1
2023-07-16 19:38:07,033: predict_last_n: 1
2023-07-16 19:38:07,034: seq_length: 365
2023-07-16 19:38:07,035: num_workers: 4
2023-07-16 19:38:07,036: log_interval: 5
2023-07-16 19:38:07,037: log_tensorboard: True
2023-07-16 19:38:07,038: log_n_figures: 1
2023-07-16 19:38:07,039: save_weights_every: 5
2023-07-16 19:38:07,039: dataset: generic
2023-07-16 19:38:07,040: data_dir: C:\Users\VanOp\Documents\Notebooks\NeuralHydrology\genericdataset
2023-07-16 19:38:07,041: dynamic_inputs: ['4_CAMELScl', '10_CAMELScl', '12_CAMELScl']
2023-07-16 19:38:07,042: target_variables: ['2_CAMELScl']
2023-07-16 19:38:07,043: clip_targets_to_zero: ['2_CAMELScl']
2023-07-16 19:38:07,043: number_of_basins: 1
2023-07-16 19:38:07,044: run_dir: c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_1607_193807
2023-07-16 19:38:07,045: train_dir: c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_1607_193807\train_data
2023-07-16 19:38:07,045: img_log_dir: c:\Users\VanOp\Documents\Notebooks\NeuralHydrology\data\runs\test_on_chili_carbon_basins_netcdf_1607_193807\img_log
2023-07-16 19:38:07,127: ### Device cuda:0 will be used for training
cfg.head= regression
2023-07-16 19:38:07,129: Loading basin data into xarray data set.
100%|██████████| 1/1 [00:00<00:00, 17.09it/s]
2023-07-16 19:38:07,260: Create lookup table and convert to pytorch tensor
100%|██████████| 1/1 [00:02<00:00,  2.49s/it]
# Epoch 1: 100%|██████████| 11/11 [00:19<00:00,  1.76s/it, Loss: 0.3042]
2023-07-16 19:38:33,670: Epoch 1 average loss: avg_loss: 0.41924, avg_total_loss: 0.41924
# Epoch 2: 100%|██████████| 11/11 [00:19<00:00,  1.76s/it, Loss: 0.4652]
2023-07-16 19:38:53,076: Epoch 2 average loss: avg_loss: 0.46479, avg_total_loss: 0.46479
# Epoch 3: 100%|██████████| 11/11 [00:19<00:00,  1.78s/it, Loss: 0.1472]
2023-07-16 19:39:12,711: Epoch 3 average loss: avg_loss: 0.26870, avg_total_loss: 0.26870
# Epoch 4: 100%|██████████| 11/11 [00:17<00:00,  1.56s/it, Loss: 0.1258]
2023-07-16 19:39:29,906: Epoch 4 average loss: avg_loss: 0.16194, avg_total_loss: 0.16194
2023-07-16 19:39:29,907: Setting learning rate to 0.005
# Epoch 5: 100%|██████████| 11/11 [00:19<00:00,  1.74s/it, Loss: 0.1014]
2023-07-16 19:39:49,070: Epoch 5 average loss: avg_loss: 0.09892, avg_total_loss: 0.09892
# Validation: 100%|██████████| 1/1 [00:00<00:00,  1.86it/s]
2023-07-16 19:39:49,920: Epoch 5 average validation loss: 0.49444 -- Median validation metrics: avg_loss: 0.49444, NSE: -2.20408
# Epoch 6: 100%|██████████| 11/11 [00:16<00:00,  1.52s/it, Loss: 0.0590]
2023-07-16 19:40:06,694: Epoch 6 average loss: avg_loss: 0.09935, avg_total_loss: 0.09935
# Epoch 7: 100%|██████████| 11/11 [00:16<00:00,  1.53s/it, Loss: 0.1127]
2023-07-16 19:40:23,476: Epoch 7 average loss: avg_loss: 0.09637, avg_total_loss: 0.09637
# Epoch 8: 100%|██████████| 11/11 [00:15<00:00,  1.38s/it, Loss: 0.0502]
2023-07-16 19:40:38,672: Epoch 8 average loss: avg_loss: 0.07980, avg_total_loss: 0.07980
# Epoch 9: 100%|██████████| 11/11 [00:16<00:00,  1.52s/it, Loss: 0.0362]
2023-07-16 19:40:55,346: Epoch 9 average loss: avg_loss: 0.04693, avg_total_loss: 0.04693
2023-07-16 19:40:55,347: Setting learning rate to 0.002
# Epoch 10: 100%|██████████| 11/11 [00:16<00:00,  1.52s/it, Loss: 0.0313]
2023-07-16 19:41:12,086: Epoch 10 average loss: avg_loss: 0.03799, avg_total_loss: 0.03799
# Validation: 100%|██████████| 1/1 [00:00<00:00,  5.70it/s]
2023-07-16 19:41:12,559: Epoch 10 average validation loss: 0.46570 -- Median validation metrics: avg_loss: 0.46570, NSE: -1.86134
# Epoch 11: 100%|██████████| 11/11 [00:16<00:00,  1.54s/it, Loss: 0.0315]
2023-07-16 19:41:29,494: Epoch 11 average loss: avg_loss: 0.03324, avg_total_loss: 0.03324
# Epoch 12: 100%|██████████| 11/11 [00:16<00:00,  1.52s/it, Loss: 0.0365]
2023-07-16 19:41:46,256: Epoch 12 average loss: avg_loss: 0.03136, avg_total_loss: 0.03136
# Epoch 13: 100%|██████████| 11/11 [00:16<00:00,  1.54s/it, Loss: 0.0289]
2023-07-16 19:42:03,213: Epoch 13 average loss: avg_loss: 0.03017, avg_total_loss: 0.03017
# Epoch 14: 100%|██████████| 11/11 [00:16<00:00,  1.54s/it, Loss: 0.0219]
2023-07-16 19:42:20,176: Epoch 14 average loss: avg_loss: 0.02727, avg_total_loss: 0.02727
# Epoch 15: 100%|██████████| 11/11 [00:15<00:00,  1.37s/it, Loss: 0.0298]
2023-07-16 19:42:35,264: Epoch 15 average loss: avg_loss: 0.02743, avg_total_loss: 0.02743
# Validation: 100%|██████████| 1/1 [00:00<00:00, 12.18it/s]
2023-07-16 19:42:35,639: Epoch 15 average validation loss: 0.32271 -- Median validation metrics: avg_loss: 0.32271, NSE: -1.11577


run_dir = Path("runs/test_on_chili_carbon_basins_netcdf_2107_150048")
eval_run(run_dir=run_dir, period="test")

2023-07-21 15:22:50,719: Using the model weights from runs\test_on_chili_carbon_basins_netcdf_2107_150048\model_epoch020.pt
# Evaluation:   0%|          | 0/1 [00:00<?, ?it/s]# Evaluation: 100%|██████████| 1/1 [00:00<00:00,  2.38it/s]
2023-07-21 15:22:51,168: Stored results at runs\test_on_chili_carbon_basins_netcdf_2107_150048\test\model_epoch020\test_results.p


with open(run_dir / "test" / "model_epoch020" / "test_results.p", "rb") as fp:
    results = pickle.load(fp)

results.keys()

dict_keys(['44444423'])


import numpy as np
np.sqrt(results['44444423']['1D'][ 'MSE'])

21.117903561435213


results['44444423']['1D']['xr']

<xarray.Dataset>
Dimensions:         (date: 367, time_step: 1)
Coordinates:
  * date            (date) datetime64[ns] 2019-06-29 2019-06-30 ... 2020-06-29
  * time_step       (time_step) int64 0
Data variables:
    2_CAMELScl_obs  (date, time_step) float32 131.7 131.4 131.3 ... 73.14 72.88
    2_CAMELScl_sim  (date, time_step) float32 154.9 152.4 151.2 ... 141.7 140.3

array(['2019-06-29T00:00:00.000000000', '2019-06-30T00:00:00.000000000',
       '2019-07-01T00:00:00.000000000', ..., '2020-06-27T00:00:00.000000000',
       '2020-06-28T00:00:00.000000000', '2020-06-29T00:00:00.000000000'],
      dtype='datetime64[ns]')

array([0], dtype=int64)

array([[131.66   ],
       [131.43   ],
       [131.33   ],
       [131.14   ],
       [130.89   ],
       [130.53   ],
       [130.24   ],
       [129.95   ],
       [129.54   ],
       [129.14   ],
       [128.77   ],
       [128.36   ],
       [127.87   ],
       [127.74   ],
       [127.32   ],
       [126.83   ],
       [126.19   ],
       [125.75   ],
       [125.23   ],
       [124.59   ],
...
       [ 79.3    ],
       [ 79.12   ],
       [ 78.63   ],
       [ 78.29   ],
       [ 77.9    ],
       [ 77.43   ],
       [ 77.14   ],
       [ 76.89   ],
       [ 76.42   ],
       [ 76.39   ],
       [ 76.01   ],
       [ 75.64   ],
       [ 75.31   ],
       [ 74.88   ],
       [ 74.58   ],
       [ 74.29   ],
       [ 73.92999],
       [ 73.6    ],
       [ 73.14   ],
       [ 72.88   ]], dtype=float32)

array([[154.9317  ],
       [152.37596 ],
       [151.22658 ],
       [149.54028 ],
       [148.5991  ],
       [147.98694 ],
       [146.55951 ],
       [144.97794 ],
       [144.93843 ],
       [143.64885 ],
       [142.6536  ],
       [144.75589 ],
       [142.22386 ],
       [142.28716 ],
       [140.3632  ],
       [137.48692 ],
       [138.81094 ],
       [137.82477 ],
       [134.42285 ],
       [132.22884 ],
...
       [143.39638 ],
       [150.94485 ],
       [149.4174  ],
       [147.6711  ],
       [150.84291 ],
       [152.28006 ],
       [153.8006  ],
       [155.66666 ],
       [154.98962 ],
       [154.05135 ],
       [153.48686 ],
       [153.9152  ],
       [153.37144 ],
       [151.9454  ],
       [150.06647 ],
       [149.02385 ],
       [146.79445 ],
       [143.89311 ],
       [141.69885 ],
       [140.33688 ]], dtype=float32)

PandasIndex(DatetimeIndex(['2019-06-29', '2019-06-30', '2019-07-01', '2019-07-02',
               '2019-07-03', '2019-07-04', '2019-07-05', '2019-07-06',
               '2019-07-07', '2019-07-08',
               ...
               '2020-06-20', '2020-06-21', '2020-06-22', '2020-06-23',
               '2020-06-24', '2020-06-25', '2020-06-26', '2020-06-27',
               '2020-06-28', '2020-06-29'],
              dtype='datetime64[ns]', name='date', length=367, freq=None))

PandasIndex(Int64Index([0], dtype='int64', name='time_step'))


# extract observations and simulations
qobs = results['44444423']['1D']['xr']['2_CAMELScl_obs']
qsim = results['44444423']['1D']['xr']['2_CAMELScl_sim']

fig, ax = plt.subplots(figsize=(16,10)); plt.grid( "both", "both", )
ax.plot(qobs['date'], qobs)
ax.plot(qsim['date'], qsim)
ax.set_ylabel("2_CAMELScl")
ax.set_title(f"Test period - MSE {results['44444423']['1D']['MSE']:.3f}");


# extract observations and simulations
qobs = results['44444423']['1D']['xr']['2_CAMELScl_obs']
qsim = results['44444423']['1D']['xr']['2_CAMELScl_sim']

fig, ax = plt.subplots(figsize=(16,10)); plt.grid( "both", "both", )
ax.plot(qobs['date'], qobs)
ax.plot(qsim['date'], qsim)
ax.set_ylabel("2_CAMELScl")
ax.set_title(f"Test period - NSE {results['44444423']['1D']['NSE']:.3f}");


values = metrics.calculate_all_metrics(qobs.isel(time_step=-1), qsim.isel(time_step=-1))
for key, val in values.items():
    print(f"{key}: {val:.3f}")

NSE: -0.954
MSE: 445.966
RMSE: 21.118
KGE: 0.352
Alpha-NSE: 1.364
Beta-KGE: 1.097
Beta-NSE: 0.620
Pearson-r: 0.473
FHV: 17.839
FMS: -2.468
FLV: -38.208
Peak-Timing: 0.000
Peak-MAPE: 7.292


from diagrams import Diagram
from diagrams.programming.flowchart import Preparation, Action, Collate, Database, Decision, Delay, Display, Document, InputOutput, Inspection, InternalStorage 
graph_attr = {    "fontsize": "14",
    "bgcolor": "grey"} # 
with Diagram("Adding Chilean basins 'CamelsCL' data for training a pyTorch model", outformat="svg", graph_attr=graph_attr, show=False) as diag:
    Decision("Add Chilean basins \n 'CamelsCL' data") >> Action("download Chilean\n Camels data\n zip files") >> Preparation("preprocess Chilean \n Camels data \n zip files") >> InputOutput('select basins containing\n carbonic/limestone rocks\n of Chilean Camels\n \
             preprocessed files') >> InternalStorage("Chilean Camels carbon.netcdf")
    #InputOutput("InputOutput") >> InternalStorage("InternalStorage") >> Inspection("Inspection") << Document("stat")
diag

Warning: node '70d893c049e14a24af4b03b90b529546', graph 'Adding Chilean basins 'CamelsCL' data for training a pyTorch model' size too small for label
Warning: node '843fb6a0f7774830a7f7477beb398097', graph 'Adding Chilean basins 'CamelsCL' data for training a pyTorch model' size too small for label
Warning: node '1a8456ffd30244479ee8f9960b048ed5', graph 'Adding Chilean basins 'CamelsCL' data for training a pyTorch model' size too small for label
Warning: node '991bdf3ebe714613ba0dabea41131474', graph 'Adding Chilean basins 'CamelsCL' data for training a pyTorch model' size too small for label
Warning: node '76761a147e5b4fb1bab797b7e6506b26', graph 'Adding Chilean basins 'CamelsCL' data for training a pyTorch model' size too small for label

	1001001	1001002	1001003	1020002	1020003	1021001	1021002	1041002	1044001	1050002	...	12820001	12825002	12861001	12863002	12865001	12872001	12876001	12876004	12878001	12930001
gauge_id
gauge_name	Rio Caquena En Nacimiento	Rio Caquena En Vertedero	Rio Colpacagua En Desembocadura	Rio Desaguadero Cotacotani	Rio Lauca En Estancia El Lago	Rio Lauca En Japu (O En El Limite)	Rio Guallatire En Guallatire	Rio Isluga En Bocatoma	Rio Cancosa En El Tambo	Rio Piga En Collacagua	...	Rio Caleta En Tierra Del Fuego	Rio Azopardo En Desembocadura	Rio Cullen En Frontera	Rio San Martin En San Sebastian	Rio Chico En Ruta Y-895	Rio Herminita En Ruta Y-895	Rio Grande En Tierra Del Fuego	Rio Catalina En Pampa Guanacos	Rio Rasmussen En Frontera (Estancia VicuÑA)	Rio Robalo En Puerto Williams
gauge_lat	-18.0769	-17.9942	-18.0156	-18.1936	-18.2325	-18.5833	-18.4931	-19.2711	-19.8586	-20.0344	...	-53.8586	-54.5028	-52.8453	-53.3164	-53.5436	-53.8056	-53.8928	-54.0411	-54.0181	-54.9469
gauge_lon	-69.1961	-69.2550	-69.2308	-69.2458	-69.3319	-69.0467	-69.1494	-68.6797	-68.5858	-68.8311	...	-69.9989	-68.8244	-68.6317	-68.6511	-68.6908	-68.6725	-68.8844	-68.7975	-68.6528	-67.6392
record_period_start	1976-07-21	1969-11-27	1988-06-16	1964-12-07	1937-02-06	1963-04-11	1971-05-26	1995-05-25	1994-08-10	1959-11-15	...	2006-01-01	2006-02-14	2005-01-14	2006-04-25	2005-01-11	2005-01-12	1981-05-12	2007-09-26	2004-01-21	2003-01-01
record_period_end	2004-05-25	2017-07-31	2017-05-18	2017-07-31	2017-07-31	2017-09-30	2018-03-09	2017-07-31	2017-07-31	2017-07-31	...	2018-03-09	2017-01-01	2017-05-31	2016-12-07	2017-04-30	2016-02-16	2018-03-09	2016-08-31	2017-05-31	2018-03-09
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
sur_rights_flow	0.9513300	1.0508300	0.9708300	0.0000000	0.4782283	0.6897583	0.0390000	0.1277000	0.0400000	0.0000000	...	0.0000000	0.1000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0166667	0.0000000	0.0000000	0.0000000
interv_degree	2.142517940859	0.939702024815	4.595532913399	0.000000000000	1.402388897726	0.271734844754	0.107030393551	0.254187459852	0.185065510535	0.000000000000	...	0.000000000000	0.002005982793	0.000000000000	0.000000000000	0.000000000000	0.000000000000	0.000572426625	0.000000000000	0.000000000000	0.000000000000
gw_rights_n	0	0	0	0	0	4	0	0	0	4	...	0	0	8	0	0	1	1	0	0	0
gw_rights_flow	0.0000000	0.0000000	0.0000000	0.0000000	0.0000000	0.0760000	0.0000000	0.0000000	0.0000000	0.3000000	...	0.0000000	0.0000000	0.0327000	0.0000000	0.0000000	0.0007200	0.0009100	0.0000000	0.0000000	0.0000000
big_dam	0	0	0	1	1	1	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0

gauge_id	gauge_name	gauge_lat	gauge_lon	record_period_start	record_period_end	n_obs	area	elev_gauge	elev_mean	elev_med	...	low_q_freq	low_q_dur	zero_q_freq	swe_ratio	sur_rights_n	sur_rights_flow	interv_degree	gw_rights_n	gw_rights_flow	big_dam
4515001	Rio Mostazal Antes Junta Rio Tulahuencito	-30.8456	-70.7139	1959-05-01	1967-10-31	3045	463.09412	975	2825.0618	3030	...	NaN	NaN	NaN	NaN	904	0.0020000	0.001362104377	0	0.0000000	0
4515002	Rio Mostazal En Caren	-30.8422	-70.7694	1972-07-24	2017-07-31	13538	640.15443	739	2588.8570	2609	...	135.63377193	32.173333	0.0000000000	0.706802388323	979	0.0380000	0.028564754408	3	0.0058100	0
4516001	Rio Grande En Coipo	-30.7828	-70.8222	1942-12-01	1978-04-26	9143	2134.15486	585	2548.1074	2619	...	NaN	NaN	NaN	NaN	1099	0.1209600	0.019323654359	9	0.1318600	0
4522001	Rio Rapel En Paloma	-30.7333	-70.6167	1941-10-02	1983-03-23	4702	510.53423	1492	3221.4487	3426	...	NaN	NaN	NaN	NaN	14	0.0077000	0.007385110330	0	0.0000000	0
4522002	Rio Rapel En Junta	-30.7081	-70.8728	1959-04-01	2017-07-31	18595	820.55412	564	2661.3161	2786	...	151.50912757	80.828571	0.0000000000	0.849858914678	23	0.0484500	0.031221368731	7	0.0599400	0
4523001	Rio Grande En Agua Chica	-30.7022	-70.9000	1946-09-15	1983-02-28	12390	3015.77483	553	2544.0132	2626	...	NaN	NaN	NaN	NaN	1134	0.1839100	0.024036478820	22	0.1995000	0
4523002	Rio Grande En Puntilla San Juan	-30.7047	-70.9244	1942-03-01	2018-03-09	25038	3529.39699	436	2483.5774	2522	...	109.11014586	29.842857	0.0000000000	0.622453074633	1134	0.1839100	0.020630349139	23	0.2013000	0
7102001	Rio Teno En Los QueÑEs	-34.9931	-70.8097	1938-04-01	1985-01-31	16737	848.90491	800	2190.6503	2273	...	NaN	NaN	NaN	NaN	59	0.1881000	0.004722311305	1	0.0055000	1
7102005	Rio Teno Bajo Quebrada Infiernillo	-35.0450	-70.6353	1985-01-14	2017-02-21	8414	600.73655	1264	2412.0217	2484	...	NaN	NaN	NaN	NaN	39	0.1000000	0.003582191171	0	0.0000000	1
7104002	Rio Teno Despues De Junta Con Claro	-34.9961	-70.8206	1947-09-29	2018-03-09	21859	1205.26412	651	2090.3034	2146	...	11.09861818	7.620690	0.0000000000	0.153429268412	81	0.5871000	0.010686223674	1	0.0055000	1
12284005	Rio Don Guillermo En Cerro Castillo	-51.2667	-72.4833	1980-06-07	2013-04-17	8780	500.09546	748	438.9808	428	...	194.69760303	22.635714	0.0000000000	NaN	2	0.0000000	0.000000000000	2	0.0026000	0
12286002	Rio Rincon En Ruta Y-290	-51.3139	-72.8292	2010-01-29	2018-03-09	2837	75.83370	33	720.6250	723	...	NaN	NaN	NaN	NaN	2	0.2804167	0.049319662345	0	0.0000000	0
12287001	Rio Grey Antes Junta Serrano	-51.1833	-73.0167	1981-10-25	2018-03-09	12997	867.01569	22	837.7016	806	...	19.73771525	10.722222	0.0000000000	NaN	2	0.0020417	0.000016598690	0	0.0000000	0
12288002	Rio Geikie En Desembocadura	-51.3019	-73.2075	2011-07-21	2016-08-03	1000	473.77500	11	867.2147	901	...	NaN	NaN	NaN	NaN	0	0.0000000	0.000000000000	0	0.0000000	0
12288003	Rio Tindall En Desembocadura	-51.2564	-73.1561	2011-07-19	2015-03-29	1004	120.03402	16	484.8138	545	...	NaN	NaN	NaN	NaN	0	0.0000000	0.000000000000	0	0.0000000	0
12288004	Rio Caadon 1 En Desembocadura	-51.3128	-73.2750	2009-10-17	2012-10-07	1022	158.75649	62	682.7917	590	...	NaN	NaN	NaN	NaN	0	0.0000000	0.000000000000	0	0.0000000	0
12289001	Rio Serrano En Desembocadura	-51.3328	-73.1092	1994-12-15	2018-03-09	7975	8583.25515	24	590.6925	455	...	NaN	NaN	NaN	NaN	94	1.3087491	0.003252225460	3	0.0036000	0
12289002	Rio Serrano En Desague Lago Del Toro	-51.2000	-72.9333	1986-05-22	2018-03-09	11301	5287.76796	28	544.3793	426	...	3.37106698	6.600000	0.0000000000	NaN	55	0.9357999	0.011007888440	2	0.0026000	0
12289003	Rio Serrano Antes Junta Grey	-51.2167	-72.9833	1970-04-29	1986-03-13	3182	5295.86969	20	543.6802	425	...	NaN	NaN	NaN	NaN	56	0.9378207	0.013860117211	2	0.0026000	0
12825002	Rio Azopardo En Desembocadura	-54.5028	-68.8244	2006-02-14	2017-01-01	3823	3524.51740	27	321.8879	252	...	NaN	NaN	NaN	NaN	3	0.1000000	0.002005982793	0	0.0000000	0
12876004	Rio Catalina En Pampa Guanacos	-54.0411	-68.7975	2007-09-26	2016-08-31	1475	82.69491	152	270.3954	239	...	NaN	NaN	NaN	NaN	0	0.0000000	0.000000000000	0	0.0000000	0
12878001	Rio Rasmussen En Frontera (Estancia VicuÑA)	-54.0181	-68.6528	2004-01-21	2017-05-31	4799	468.92621	130	307.6843	271	...	NaN	NaN	NaN	NaN	0	0.0000000	0.000000000000	0	0.0000000	0
12930001	Rio Robalo En Puerto Williams	-54.9469	-67.6392	2003-01-01	2018-03-09	4846	20.64562	57	520.8493	542	...	NaN	NaN	NaN	NaN	0	0.0000000	0.000000000000	0	0.0000000	0

	Basinname	Location	Area	Precip_anual_media_CR2MET_mm	Index_aridity	Elev_max,Elev_media,Elev_puntosalida
BasinID
4515001	Rio Mostazal Antes Junta Rio Tulahuencito	(Lat. -30.85, Lon. -70.71)	463.1	367	2.1	4401, 2825, 842
4515002	Rio Mostazal En Caren	(Lat. -30.84, Lon. -70.77)	640.2	347	2.5	4401, 2589, 692
4516001	Rio Grande En Coipo	(Lat. -30.78, Lon. -70.82)	2134.2	304	3.1	4401, 2548, 576
4522001	Rio Rapel En Paloma	(Lat. -30.73, Lon. -70.62)	510.5	367	1.8	4825, 3221, 1188
4522002	Rio Rapel En Junta	(Lat. -30.71, Lon. -70.87)	820.6	333	2.5	4825, 2661, 496
4523001	Rio Grande En Agua Chica	(Lat. -30.7, Lon. -70.9)	3015.8	310	3.0	4825, 2544, 447
4523002	Rio Mostazal En Caren	(Lat. -30.84, Lon. -70.77)	640.2	347	2.5	4401, 2589, 692
7102001	Rio Teno En Los QueÑEs	(Lat. -34.99, Lon. -70.81)	848.9	1636	0.5	3944, 2191, 660
7102005	Rio Teno Bajo Quebrada Infiernillo	(Lat. -35.05, Lon. -70.64)	600.7	1661	0.5	3944, 2412, 996
7104002	Estero El Manzano Antes Junta Rio Teno	(Lat. -34.97, Lon. -70.94)	133.7	1289	0.8	2581, 1276, 522
12284005	Rio Don Guillermo En Cerro Castillo	(Lat. -51.27, Lon. -72.48)	500.0	402	1.9	1101, 439, 34
12286002	Rio Rincon En Ruta Y-290	(Lat. -51.31, Lon. -72.83)	75.7	1523	0.4	1551, 721, 33
12287001	Rio Grey Antes Junta Serrano	(Lat. -51.18, Lon. -73.02)	865.4	1785	0.3	5876, 838, 22
12288002	Rio Geikie En Desembocadura	(Lat. -51.3, Lon. -73.21)	472.8	2374	0.2	5168, 867, 10
12288003	Rio Tindall En Desembocadura	(Lat. -51.26, Lon. -73.16)	119.8	1499	0.4	1522, 485, 6
12288004	Rio Caadon 1 En Desembocadura	(Lat. -51.31, Lon. -73.28)	158.4	2336	0.3	5168, 683, 43
12289001	Rio Serrano En Desembocadura	(Lat. -51.33, Lon. -73.11)	8574.6	816	0.9	5876, 591, 6
12289002	Rio Serrano En Desague Lago Del Toro	(Lat. -51.2, Lon. -72.93)	5284.5	448	1.8	2163, 544, 19
12289003	Rio Serrano Antes Junta Grey	(Lat. -51.22, Lon. -72.98)	5292.6	450	1.7	2163, 544, 18
12825002	Rio Azopardo En Desembocadura	(Lat. -54.5, Lon. -68.82)	3524.5	379	1.7	1397, 322, 29
12876004	Rio Catalina En Pampa Guanacos	(Lat. -54.04, Lon. -68.8)	82.7	507	1.3	748, 270, 149
12878001	Rio Rasmussen En Frontera (Estancia VicuÑA)	(Lat. -54.02, Lon. -68.65)	468.9	529	1.3	877, 308, 104
12930001	Rio Robalo En Puerto Williams	(Lat. -54.95, Lon. -67.64)	20.6	520	1.2	1009, 521, 68

	Basinname	Location	Area	Precip_anual_media_CR2MET_mm	Index_aridity	Elev_max,Elev_media,Elev_puntosalida
BasinID
4515001	Rio Mostazal Antes Junta Rio Tulahuencito	(Lat. -30.85, Lon. -70.71)	463.1	367	2.1	4401, 2825, 842
4515002	Rio Mostazal En Caren	(Lat. -30.84, Lon. -70.77)	640.2	347	2.5	4401, 2589, 692
4516001	Rio Grande En Coipo	(Lat. -30.78, Lon. -70.82)	2134.2	304	3.1	4401, 2548, 576
4522001	Rio Rapel En Paloma	(Lat. -30.73, Lon. -70.62)	510.5	367	1.8	4825, 3221, 1188
4522002	Rio Rapel En Junta	(Lat. -30.71, Lon. -70.87)	820.6	333	2.5	4825, 2661, 496
4523001	Rio Grande En Agua Chica	(Lat. -30.7, Lon. -70.9)	3015.8	310	3.0	4825, 2544, 447
4523002	Rio Mostazal En Caren	(Lat. -30.84, Lon. -70.77)	640.2	347	2.5	4401, 2589, 692
7104002	Estero El Manzano Antes Junta Rio Teno	(Lat. -34.97, Lon. -70.94)	133.7	1289	0.8	2581, 1276, 522
12284005	Rio Don Guillermo En Cerro Castillo	(Lat. -51.27, Lon. -72.48)	500.0	402	1.9	1101, 439, 34
12289001	Rio Serrano En Desembocadura	(Lat. -51.33, Lon. -73.11)	8574.6	816	0.9	5876, 591, 6
12289002	Rio Serrano En Desague Lago Del Toro	(Lat. -51.2, Lon. -72.93)	5284.5	448	1.8	2163, 544, 19
12289003	Rio Serrano Antes Junta Grey	(Lat. -51.22, Lon. -72.98)	5292.6	450	1.7	2163, 544, 18
12825002	Rio Azopardo En Desembocadura	(Lat. -54.5, Lon. -68.82)	3524.5	379	1.7	1397, 322, 29
12876004	Rio Catalina En Pampa Guanacos	(Lat. -54.04, Lon. -68.8)	82.7	507	1.3	748, 270, 149
12878001	Rio Rasmussen En Frontera (Estancia VicuÑA)	(Lat. -54.02, Lon. -68.65)	468.9	529	1.3	877, 308, 104
12930001	Rio Robalo En Puerto Williams	(Lat. -54.95, Lon. -67.64)	20.6	520	1.2	1009, 521, 68

	T2M	T2M_MAX	T2M_MIN
Date
2000-01-01	-0.77	6.28	-4.07
2000-01-02	1.12	7.52	-2.31
2000-01-03	1.83	9.37	-3.14
2000-01-04	3.89	7.69	0.69
2000-01-05	4.11	9.60	-0.07
...	...	...	...
2021-07-18	22.64	27.59	17.92
2021-07-19	24.52	30.96	19.05
2021-07-20	26.51	35.16	17.04
2021-07-21	27.45	34.83	21.20
2021-07-22	27.26	35.01	19.70

Adding a New Dataset to the NeuralHydrology library¶

Motivation¶

Before we start¶

Using the GenericDataset¶

Adding a Dataset Class¶

Template¶

Preprocessing CAMELS-CL¶

Class skeleton¶

Data loading functions¶

Putting everything together¶

Integrating the dataset class into NeuralHydrology¶

the selection of carbonic rock basins¶

Training run on the Chile dataset - carbonic selection.¶

Renaming some features due to otherwise named columns¶

The config files used for train, test and validation:¶

Evaluation run on test part of dataset based on Chilean carbonic rock basins¶

Load and inspect model predictions¶

flowchart "How to add a Chilean basins dataset to NH"¶