from pathlib import Path
import torch
from torch.utils.data import DataLoader
from torch.nn.utils import clip_grad_norm_
from torchmetrics.regression import MeanAbsolutePercentageError, MeanSquaredError, MeanAbsoluteError

# Custom imports
from metrics import RMSEMetric
from datasets import TabularMFSequentialBuckling
from models import NeuralNetwork

def eval_model(test_dataset, model_file, mlp_config):
    """ A function that loads a MLP, use it to make predictions on the
     testing set, save them, and assess the perfromance of the model """

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # Define test dataloader
    test_dataloader = DataLoader(
        test_dataset, batch_size=mlp_config['batch_size'], shuffle=False
    )

    # Load model
    model_state = torch.load(
        Path.cwd().joinpath(
            './models/varying_dataset_size/{:s}'.format(model_file)
        )
    )
    input_size = train_dataset[0][0].shape[0]
    output_size = train_dataset[0][1].shape[0]
    model = NeuralNetwork(input_size, output_size, mlp_config)
    model.load_state_dict(model_state['model_state_dict'])
    model.to(device)

    # Predict test set and save predictions
    metrics = {
        'MSE': MeanSquaredError().to(device),
        'MAPE': MeanAbsolutePercentageError().to(device),
        'RMSE': RMSEMetric().to(device),
        'MAE': MeanAbsoluteError().to(device)
    }
    test_metrics, pred_data = eval_func(model, metrics, test_dataset, test_dataloader)
    features = pred_data[:, :4]
    y = pred_data[:, 4]
    preds = pred_data[:, 5]

    # Compute final RMSE, MAPE
    rmse_func = RMSEMetric()
    mape_func = MeanAbsolutePercentageError()
    rmse = rmse_func(preds, y).item()
    mape = mape_func(preds, y).item()
    print('    RMSE = {:f}'.format(rmse))
    print('    MAPE = {:f} %'.format(mape * 100))

def eval_func(model, metrics, dataset, eval_dataloader):
    """ A function that iterates through the test dataset, makes predictions
     and potentially save them """
    model.eval()
    running_eval_metrics = {}
    for key in metrics.keys():
        running_eval_metrics[key] = 0.

    full_features = []
    full_targets = []
    full_preds = []
    with torch.no_grad():
        for features, target in eval_dataloader:
            outputs = model(features)

            for key, metric_func in metrics.items():
                running_eval_metrics[key] += metric_func(outputs, target).item()

            if dataset.normalize_input:
                features = features * dataset.input_std + dataset.input_mean
            full_features.append(features[:, :4].cpu())
            full_targets.append(target.cpu())
            full_preds.append(outputs.cpu())
    
    full_features = torch.cat(full_features, dim=0)
    full_targets = torch.cat(full_targets, dim=0)
    full_preds = torch.cat(full_preds, dim=0)
    preds_array = torch.cat((full_features, full_targets, full_preds), dim=1)

    # Final calc
    for key in metrics.keys():
        running_eval_metrics[key] /= len(eval_dataloader)

    return running_eval_metrics, preds_array

if __name__ == '__main__':

    # Input vals
    DATASET_ROOT = Path('F:/ConcreteShellFEA') # Change to path to ConcreteShellFEA
    
    # Prep datasets
    # Training
    train_input_path = DATASET_ROOT.joinpath(
        './datasets/PerfectShell_NonlinearFEA/tabular/nonlinear/input/training/pca_input/pca_training.h5'
    )
    train_buckling_path = DATASET_ROOT.joinpath(
        './datasets/PerfectShell_NonlinearFEA/tabular/nonlinear/output/training/buckling_output/buckling_training.csv'
    )
    train_dataset = TabularMFSequentialBuckling(train_input_path, train_buckling_path)
    # Validation
    val_input_path = DATASET_ROOT.joinpath(
        './datasets/PerfectShell_NonlinearFEA/tabular/nonlinear/input/validation/pca_input/pca_validation.h5'
    )
    val_buckling_path = DATASET_ROOT.joinpath(
        './datasets/PerfectShell_NonlinearFEA/tabular/nonlinear/output/validation/buckling_output/buckling_validation.csv'
    )
    val_dataset = TabularMFSequentialBuckling(val_input_path, val_buckling_path)
    # Testing
    test_input_path = DATASET_ROOT.joinpath(
        './datasets/PerfectShell_NonlinearFEA/tabular/nonlinear/input/testing/pca_input/pca_testing.h5'
    )
    test_buckling_path = DATASET_ROOT.joinpath(
        './datasets/PerfectShell_NonlinearFEA/tabular/nonlinear/output/testing/buckling_output/buckling_testing.csv'
    )
    test_dataset = TabularMFSequentialBuckling(test_input_path, test_buckling_path)

    # Calculate input mean and std for normalisation based on training dataset
    input_mean = torch.mean(
        torch.cat(
            (
                train_dataset.shell_characteristics,
                train_dataset.preproc_inputs,
                train_dataset.linear_buckling_data
            ),
            dim=1
        ),
        dim=0
    )
    input_std = torch.std(
        torch.cat(
            (
                train_dataset.shell_characteristics,
                train_dataset.preproc_inputs,
                train_dataset.linear_buckling_data
            ),
            dim=1
        ),
        dim=0
    )

    # Update datasets with calculated mean and std
    train_dataset.input_mean = input_mean
    train_dataset.input_std = input_std
    train_dataset.normalize_input = True
    val_dataset.input_mean = input_mean
    val_dataset.input_std = input_std
    val_dataset.normalize_input = True
    test_dataset.input_mean = input_mean
    test_dataset.input_std = input_std
    test_dataset.normalize_input = True

    # Model configuration
    mlp_config = {
        'num_layers': 5,
        'neurons_per_layer': 32,
        'activation': 'Softplus',
        'dropout': 0.,
        'batch_norm': False,
        'initializer': 'xavier_uniform_',
        'optimizer': 'Adam',
        'learning_rate': 0.001,
        'batch_size': 8,
    }

    training_set_sizes = [
        50, 100, 250, 500, 750, 1000, 1250, 1500, 1750, 2000, 2250, 2500,
        2750, 3000, 3200
    ]
    num_samples_lf_MLP = [800, 12800]
    for lf_samples in num_samples_lf_MLP:
        print('*** Number of samples used to train LF MLP = {:d} ***'.format(lf_samples))
        for d_size in training_set_sizes:
            print('*** Training set size = {:d} ***'.format(d_size))
            # Evaluate accuracy
            model_file = 'mf_sequential_{:d}_lf_{:d}.pth'.format(d_size, lf_samples)
            eval_model(
                test_dataset, model_file, mlp_config
            )