# TrainingSet with precomputed energy, force (and stress if present and desired) data. This can be
# one or more TrainingSets
training_set = nlread("training_data.hdf5", TrainingSet)[0]

# Either fetch the calculator from the training set or if not present,
# set up analogously as to used in the training set generator to
# calculate isolated atom energies during training.
calculator = LCAOCalculator()

# Generate a list of fitting parameters with different initial guesses for non-linear coefficients
# This is particularly useful for finding the best initial non-linear coefficients for MTP models
fitting_parameters_list = scanOverNonLinearCoefficients(
    number_of_initial_guesses=30,
    basis_size=PredefinedBasisSmall,
    outer_cutoff_radii=3.0 * Angstrom,
    mtp_filename_suffix="MTP_fit.mtp",
    random_seed=42,
    perform_optimization=False,
)

# Setup ML model training object for multiple models
multiple_machine_learned_force_field_trainers = (
    MultipleMachineLearnedForceFieldTrainers(
        fitting_parameters_list=fitting_parameters_list,
        training_sets=training_set,
        calculator=calculator,
        # Optional parameters can be set as desired
        train_test_split=0.8,
        random_seed=1234,
    )
)

# Run the training for all models
multiple_machine_learned_force_field_trainers.train()

# After training, retrieve the model collection for validation/analysis
model_collection = multiple_machine_learned_force_field_trainers.modelCollection()

# Print summary of all trained models
nlprint(model_collection)

# Get the best model information based on R2Score on the test dataset
best_score, best_index, best_identifier, best_evaluator = model_collection.getBestModel(
    statistical_measure=R2Score,
    dataset_type=MLParameterOptions.DATASET_TYPE.TEST,
)

# The best model calculator can then be retrieved from the best evaluator for further use
best_calculator = best_evaluator.calculator()
