ARFS Top Descriptors

ARFS Top Descriptors#

Script to sort and plot top descriptors selected by ARFS

import os
import matplotlib
import matplotlib.pyplot as plt
from mlproject.postprocess.feature_importances import get_arfs_mean_feature_importances
from mlproject.plotting.importances import plot_feature_importance
matplotlib.rcParams['pdf.fonttype'] = 42

Provide absolute path to https://github.com/DigiMatChem/paper-ml-with-lobster-descriptors/tree/main/data after cloning the repository locally to data_parent_dir variable below

models_parent_dir = "/path/to/parent/dir/with/saved/models/" # top directory with all models saved
data_parent_dir = "absolute/path/to/paper-ml-with-lobster-descriptors/data/"
target_names = ["last_phdos_peak","max_pfc", 
                "log_g_vrh", "log_k_vrh",
                "log_klat_300", "log_kp_300",
                "log_msd_all_300", "log_msd_all_600",
                "log_msd_max_300", "log_msd_max_600",
                "log_msd_mean_300", "log_msd_mean_600",
                "Cv_25", "Cv_305", "Cv_705", 
                "H_25", "H_305", "H_705", 
               "S_25", "S_305", "S_705", 
               "U_25", "U_305", "U_705"]
os.makedirs("arfs_descriptors", exist_ok=True)
for target_name in target_names:
    n_feats = 30 if any(prefix in target_name for prefix in ["Cv_", "H_", "U_", "S_"]) else 20
    arfs_summary = get_arfs_mean_feature_importances(models_parent_dir=models_parent_dir,
                                     target_name=target_name)

    arfs_summary.to_json(f"arfs_descriptors/arfs_summary_{target_name}.json")

    

    fig = plot_feature_importance(arfs_summary, target_name=target_name, model_name="", importance_type="ARFS selected", n_feats=n_feats)
    fig.savefig(f"arfs_descriptors/arfs_{target_name}.pdf")
    fig.savefig(f"arfs_descriptors/arfs_{target_name}.png", dpi=300)
    plt.close()