Corrected resampled t-test to compare model improvement significance#
import os
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from mlproject.postprocess.utils import load_cv_results, get_ttest_summary_df
from mlproject.plotting.model_comparison import plot_errors
matplotlib.rcParams['pdf.fonttype'] = 42
target_names = ["last_phdos_peak","max_pfc",
"log_g_vrh", "log_k_vrh",
"log_klat_300", "log_kp_300",
"log_msd_all_300", "log_msd_all_600",
"log_msd_max_300", "log_msd_max_600",
"log_msd_mean_300", "log_msd_mean_600",
"Cv_25", "Cv_305", "Cv_705",
"H_25", "H_305", "H_705",
"S_25", "S_305", "S_705",
"U_25", "U_305", "U_705"]
# mapping of targets with corresponding units (log scaled targets are assigned None units)
target_units = {
"max_pfc": "$eV/\AA^2$",
"last_phdos_peak": "$cm^{-1}$",
"log_g_vrh": None,
"log_k_vrh": None,
"log_klat_300": None,
"log_kp_300": None,
"log_msd_all_300": None,
"log_msd_all_600": None,
"log_msd_mean_300": None,
"log_msd_mean_600": None,
"log_msd_max_300": None,
"log_msd_max_600": None,
"Cv_25": "meV/atom",
"Cv_305": "meV/atom",
"Cv_705": "meV/atom",
"U_25": "meV/atom",
"U_305": "meV/atom",
"U_705": "meV/atom",
"S_25": "meV/atom",
"S_305": "meV/atom",
"S_705": "meV/atom",
"H_25": "meV/atom",
"H_305": "meV/atom",
"H_705": "meV/atom"
}
os.makedirs("corrected_resampled_t_test_results", exist_ok=True)
models_parent_dir = "/path/to/parent/dir/with/saved/models/" # top directory where 10-fold CV run results are saved
⚠️ Caution
Before running the next cell blocks, make sure you provided the path to the 10-fold CV results to `models_parent_dir` variable above.
Not setting the correct path will lead to inconsistent results.
Before running the next cell blocks, make sure you provided the path to the 10-fold CV results to `models_parent_dir` variable above.
Not setting the correct path will lead to inconsistent results.
Run paired corrected resampled t-test on RF model results with (Matminer+LOB) and without (Matminer) LOBSTER descriptor sets and save summary+plots
rf_summary_dfs = [] # empty list to collect summary results dataframes for each target
for target_name in target_names:
summary_ttest_df = get_ttest_summary_df(model_type="rf", models_dir=models_parent_dir, target_name=target_name, alternative="greater")
mat_fold_mae, mat_fold_err = load_cv_results(model_type="rf", models_dir=models_parent_dir, target_name=target_name, feat_set_type="matminer", n_folds=10)
mat_lob_fold_mae, mat_lob_fold_err = load_cv_results(model_type="rf", models_dir=models_parent_dir, target_name=target_name, feat_set_type="matminer_lob", n_folds=10)
fig = plot_errors(error_lists=[mat_fold_mae, mat_lob_fold_mae], labels=['MATMINER', 'MATMINER+LOBSTER'], summary_ttest_df=summary_ttest_df,
target=target_name, model_type="RF", bins=40, show_stats_in_title=True, plot_type="boxplot", target_unit=target_units.get(target_name))
os.makedirs(f"corrected_resampled_t_test_results/{target_name}", exist_ok=True)
fig.savefig(f"corrected_resampled_t_test_results/{target_name}/rf_t_test.png", dpi=300);
#fig.savefig(f"corrected_resampled_t_test_results/{target_name}/rf_t_test.pdf", dpi=300)
fig = plot_errors(error_lists=[mat_fold_mae, mat_lob_fold_mae], labels=['MATMINER', 'MATMINER+LOBSTER'], summary_ttest_df=summary_ttest_df,
target=target_name, model_type="RF", bins=40, show_stats_in_title=True, plot_type="fold_comparison", target_unit=target_units.get(target_name))
fig.savefig(f"corrected_resampled_t_test_results/{target_name}/rf_fold_comparison.png", dpi=300);
#fig.savefig(f"corrected_resampled_t_test_results/{target_name}/rf_fold_comparison.pdf", dpi=300)
rf_summary_dfs.append(summary_ttest_df)
pd.concat(rf_summary_dfs).to_json("corrected_resampled_t_test_results/rf_ttest_summary.json")
Run paired corrected resampled t-test on MODNet model results with (Matminer+LOB) and without (Matminer) LOBSTER descriptor sets and save summary+plots
modnet_summary_dfs = [] # empty list to collect summary results dataframes for each target
for target_name in target_names:
summary_ttest_df = get_ttest_summary_df(model_type="modnet", models_dir=models_parent_dir, target_name=target_name, alternative="greater")
mat_fold_mae, mat_fold_err = load_cv_results(model_type="modnet", models_dir=models_parent_dir, target_name=target_name, feat_set_type="matminer", n_folds=10)
mat_lob_fold_mae, mat_lob_fold_err = load_cv_results(model_type="modnet", models_dir=models_parent_dir, target_name=target_name, feat_set_type="matminer_lob", n_folds=10)
fig = plot_errors(error_lists=[mat_fold_mae, mat_lob_fold_mae], labels=['MATMINER', 'MATMINER+LOBSTER'], summary_ttest_df=summary_ttest_df,
target=target_name, model_type="MODNet", bins=40, show_stats_in_title=True, plot_type="boxplot", target_unit=target_units.get(target_name))
fig.savefig(f"corrected_resampled_t_test_results/{target_name}/modnet_t_test.png", dpi=300);
#fig.savefig(f"corrected_resampled_t_test/{target_name}/modnet_t_test.pdf", dpi=300)
fig = plot_errors(error_lists=[mat_fold_mae, mat_lob_fold_mae], labels=['MATMINER', 'MATMINER+LOBSTER'], summary_ttest_df=summary_ttest_df,
target=target_name, model_type="MODNet", bins=40, show_stats_in_title=True, plot_type="fold_comparison", target_unit=target_units.get(target_name))
fig.savefig(f"corrected_resampled_t_test_results/{target_name}/modnet_fold_comparison.png", dpi=300);
#fig.savefig(f"corrected_resampled_t_test/{target_name}/modnet_fold_comparison.pdf", dpi=300)
modnet_summary_dfs.append(summary_ttest_df)
pd.concat(modnet_summary_dfs).to_json("corrected_resampled_t_test_results/modnet_ttest_summary.json")