ARFS Top Descriptors#
Script to sort and plot top descriptors selected by ARFS
import os
import matplotlib
import matplotlib.pyplot as plt
from mlproject.postprocess.feature_importances import get_arfs_mean_feature_importances
from mlproject.plotting.importances import plot_feature_importance
matplotlib.rcParams['pdf.fonttype'] = 42
Provide absolute path to https://github.com/DigiMatChem/paper-ml-with-lobster-descriptors/tree/main/data after cloning the repository locally to data_parent_dir variable below
models_parent_dir = "/path/to/parent/dir/with/saved/models/" # top directory with all models saved
data_parent_dir = "absolute/path/to/paper-ml-with-lobster-descriptors/data/"
target_names = ["last_phdos_peak","max_pfc",
"log_g_vrh", "log_k_vrh",
"log_klat_300", "log_kp_300",
"log_msd_all_300", "log_msd_all_600",
"log_msd_max_300", "log_msd_max_600",
"log_msd_mean_300", "log_msd_mean_600",
"Cv_25", "Cv_305", "Cv_705",
"H_25", "H_305", "H_705",
"S_25", "S_305", "S_705",
"U_25", "U_305", "U_705"]
os.makedirs("arfs_descriptors", exist_ok=True)
for target_name in target_names:
n_feats = 30 if any(prefix in target_name for prefix in ["Cv_", "H_", "U_", "S_"]) else 20
arfs_summary = get_arfs_mean_feature_importances(models_parent_dir=models_parent_dir,
target_name=target_name)
arfs_summary.to_json(f"arfs_descriptors/arfs_summary_{target_name}.json")
fig = plot_feature_importance(arfs_summary, target_name=target_name, model_name="", importance_type="ARFS selected", n_feats=n_feats)
fig.savefig(f"arfs_descriptors/arfs_{target_name}.pdf")
fig.savefig(f"arfs_descriptors/arfs_{target_name}.png", dpi=300)
plt.close()