Correlation Plots#

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from mlproject.data.preprocessing import get_dataset
matplotlib.rcParams['pdf.fonttype'] = 42

Provide absolute path to https://github.com/DigiMatChem/paper-ml-with-lobster-descriptors/tree/main/data after cloning the repository locally to data_parent_dir variable below

data_parent_dir = "absolute/path/to/paper-ml-with-lobster-descriptors/data/"
ph_peak, peak_feat = get_dataset(data_parent_dir=data_parent_dir, target_name="last_phdos_peak",
                           feat_type="matminer_lob")
max_pfc, pfc_feat = get_dataset(data_parent_dir=data_parent_dir, target_name="max_pfc",
                           feat_type="matminer_lob")

Get figure 5 (a) and (b)#

plt.figure(figsize=(10, 10))  # set a figure size
plt.scatter(x=max_pfc.loc[ph_peak.index, :], y=ph_peak.loc[ph_peak.index, :], s=50, alpha=0.80)
plt.xscale("log")
plt.yscale("log")
plt.xlabel(r"Log10 (max_pfc [eV / $\AA ^2$])", fontsize=18)
plt.ylabel(r"Log10 (last_ph_peak [$cm^{-1}$])", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18);
#plt.savefig(f"max_pfc_peak_pp.pdf")
plt.figure(figsize=(10, 10))  # set a figure size
plt.scatter(x=max_pfc.loc[ph_peak.index, :], y=-1*pfc_feat.loc[ph_peak.index, ["bwdf_at_dist0 (eV)"]], s=50, alpha=0.80)
plt.xscale("log")
plt.yscale("log")
plt.xlabel(r"Log10 (max_pfc [eV / $\AA ^2$])", fontsize=18)
plt.ylabel(r"Log10 (Strongest bond $=$ | min ICOHP | [eV])", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18);
#plt.savefig(f"max_pfc_icohp_pp.pdf")

Get figure S1 (a), (b), (c) and (d)#

Plot normalized ICOHP vs log_klat_300 and ICOBI vs log_klat_300#

The norm_icohp_icobi_w_log_klat.json can be found here

df_s1 = pd.read_json("norm_icohp_icobi_w_log_klat.json")
plt.figure(figsize=(10, 10))  # set a figure size
plt.scatter(df_s1.loc[:, "normalized_ICOBI"], 10**df_s1.loc[:,"log_klat_300"], s=50, alpha=0.80)
plt.xscale('log')
plt.yscale('log')
xticks = np.logspace(np.log10(df_s1.loc[:, "normalized_ICOBI"].min()), np.log10(df_s1.loc[:, "normalized_ICOBI"].max()), num=5)
plt.xticks(xticks, labels=[f"{t:.2f}" for t in xticks], fontsize=14)
r_value = round(pearsonr(df_s1.loc[:, "normalized_ICOBI"], df_s1.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(fr"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel("Log10 (normalized ICOBI)", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("norm_icobi_klat.pdf", dpi=300)
plt.figure(figsize=(10, 10))  # set a figure size
plt.scatter(df_s1.loc[:, "normalized_ICOHP"], 10**df_s1.loc[:,"log_klat_300"], s=50, alpha=0.80)
plt.xscale('log')
plt.yscale('log')
xticks = np.logspace(np.log10(df_s1.loc[:, "normalized_ICOHP"].min()), np.log10(df_s1.loc[:, "normalized_ICOHP"].max()), num=5)
plt.xticks(xticks, labels=[f"{t:.2f}" for t in xticks], fontsize=14)
r_value = round(pearsonr(df_s1.loc[:, "normalized_ICOHP"], df_s1.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(fr"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel("Log10 (|normalized ICOHP [eV]|)", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("norm_icohp_klat.pdf", dpi=300)

Plot SISSO identified Rung 1 and Rung 2 against log_klat_300#

log_klat_300, klat_feat = get_dataset(data_parent_dir=data_parent_dir, target_name="log_klat_300",
                           feat_type="matminer_lob")

Rung 2 SISSO descriptor:#

\(\left(\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\left(\sqrt[3]{\text{EIN\_ICOHP}}\right)\right) [\text{\unicode{x212B}}^{-3}]\)#

Rung 1 SISSO descriptor:#

\(\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\,[\text{\unicode{x212B}}^{-3}]\)#

# Calculate the descriptor values
rung_2_desc = (klat_feat.loc[:, "pair_bwdf_skew_mean"] / klat_feat.loc[:, "DensityFeatures_vpa (A^3/atom)"]) * klat_feat.loc[:, "EIN_ICOHP"]**(1/3)
rung_1_desc = (klat_feat.loc[:, "pair_bwdf_skew_mean"] / klat_feat.loc[:, "DensityFeatures_vpa (A^3/atom)"])
plt.figure(figsize=(10, 10))  # set a figure size
plt.scatter(np.arcsinh(rung_2_desc),10**log_klat_300.loc[:,"log_klat_300"],s=50, alpha=0.80)
#plt.xscale('log')
plt.yscale('log')
r_value = round(pearsonr(rung_2_desc, log_klat_300.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(rf"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel(r"$\operatorname{arcsinh}\left(\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\left(\sqrt[3]{\text{EIN\_ICOHP}}\right)\right) [\AA^{-3}]$", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("rung2_des_klat.pdf", dpi=300)
plt.figure(figsize=(10, 10))  # set a figure size
plt.scatter(np.arcsinh(rung_1_desc),10**log_klat_300.loc[:,"log_klat_300"],s=50, alpha=0.80)
#plt.xscale('log')
plt.yscale('log')
r_value = round(pearsonr(rung_1_desc, log_klat_300.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(rf"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel(r"$\operatorname{arcsinh}\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\,[\AA^{-3}]$", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("rung1_des_klat.pdf", dpi=300)