Correlation Plots#
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from mlproject.data.preprocessing import get_dataset
matplotlib.rcParams['pdf.fonttype'] = 42
Provide absolute path to https://github.com/DigiMatChem/paper-ml-with-lobster-descriptors/tree/main/data after cloning the repository locally to data_parent_dir variable below
data_parent_dir = "absolute/path/to/paper-ml-with-lobster-descriptors/data/"
ph_peak, peak_feat = get_dataset(data_parent_dir=data_parent_dir, target_name="last_phdos_peak",
feat_type="matminer_lob")
max_pfc, pfc_feat = get_dataset(data_parent_dir=data_parent_dir, target_name="max_pfc",
feat_type="matminer_lob")
Get figure 5 (a) and (b)#
plt.figure(figsize=(10, 10)) # set a figure size
plt.scatter(x=max_pfc.loc[ph_peak.index, :], y=ph_peak.loc[ph_peak.index, :], s=50, alpha=0.80)
plt.xscale("log")
plt.yscale("log")
plt.xlabel(r"Log10 (max_pfc [eV / $\AA ^2$])", fontsize=18)
plt.ylabel(r"Log10 (last_ph_peak [$cm^{-1}$])", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18);
#plt.savefig(f"max_pfc_peak_pp.pdf")
plt.figure(figsize=(10, 10)) # set a figure size
plt.scatter(x=max_pfc.loc[ph_peak.index, :], y=-1*pfc_feat.loc[ph_peak.index, ["bwdf_at_dist0 (eV)"]], s=50, alpha=0.80)
plt.xscale("log")
plt.yscale("log")
plt.xlabel(r"Log10 (max_pfc [eV / $\AA ^2$])", fontsize=18)
plt.ylabel(r"Log10 (Strongest bond $=$ | min ICOHP | [eV])", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18);
#plt.savefig(f"max_pfc_icohp_pp.pdf")
Get figure S1 (a), (b), (c) and (d)#
Plot normalized ICOHP vs log_klat_300 and ICOBI vs log_klat_300#
The norm_icohp_icobi_w_log_klat.json can be found here
df_s1 = pd.read_json("norm_icohp_icobi_w_log_klat.json")
plt.figure(figsize=(10, 10)) # set a figure size
plt.scatter(df_s1.loc[:, "normalized_ICOBI"], 10**df_s1.loc[:,"log_klat_300"], s=50, alpha=0.80)
plt.xscale('log')
plt.yscale('log')
xticks = np.logspace(np.log10(df_s1.loc[:, "normalized_ICOBI"].min()), np.log10(df_s1.loc[:, "normalized_ICOBI"].max()), num=5)
plt.xticks(xticks, labels=[f"{t:.2f}" for t in xticks], fontsize=14)
r_value = round(pearsonr(df_s1.loc[:, "normalized_ICOBI"], df_s1.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(fr"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel("Log10 (normalized ICOBI)", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("norm_icobi_klat.pdf", dpi=300)
plt.figure(figsize=(10, 10)) # set a figure size
plt.scatter(df_s1.loc[:, "normalized_ICOHP"], 10**df_s1.loc[:,"log_klat_300"], s=50, alpha=0.80)
plt.xscale('log')
plt.yscale('log')
xticks = np.logspace(np.log10(df_s1.loc[:, "normalized_ICOHP"].min()), np.log10(df_s1.loc[:, "normalized_ICOHP"].max()), num=5)
plt.xticks(xticks, labels=[f"{t:.2f}" for t in xticks], fontsize=14)
r_value = round(pearsonr(df_s1.loc[:, "normalized_ICOHP"], df_s1.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(fr"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel("Log10 (|normalized ICOHP [eV]|)", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("norm_icohp_klat.pdf", dpi=300)
Plot SISSO identified Rung 1 and Rung 2 against log_klat_300#
log_klat_300, klat_feat = get_dataset(data_parent_dir=data_parent_dir, target_name="log_klat_300",
feat_type="matminer_lob")
Rung 2 SISSO descriptor:#
\(\left(\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\left(\sqrt[3]{\text{EIN\_ICOHP}}\right)\right) [\text{\unicode{x212B}}^{-3}]\)#
Rung 1 SISSO descriptor:#
\(\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\,[\text{\unicode{x212B}}^{-3}]\)#
# Calculate the descriptor values
rung_2_desc = (klat_feat.loc[:, "pair_bwdf_skew_mean"] / klat_feat.loc[:, "DensityFeatures_vpa (A^3/atom)"]) * klat_feat.loc[:, "EIN_ICOHP"]**(1/3)
rung_1_desc = (klat_feat.loc[:, "pair_bwdf_skew_mean"] / klat_feat.loc[:, "DensityFeatures_vpa (A^3/atom)"])
plt.figure(figsize=(10, 10)) # set a figure size
plt.scatter(np.arcsinh(rung_2_desc),10**log_klat_300.loc[:,"log_klat_300"],s=50, alpha=0.80)
#plt.xscale('log')
plt.yscale('log')
r_value = round(pearsonr(rung_2_desc, log_klat_300.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(rf"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel(r"$\operatorname{arcsinh}\left(\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\left(\sqrt[3]{\text{EIN\_ICOHP}}\right)\right) [\AA^{-3}]$", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("rung2_des_klat.pdf", dpi=300)
plt.figure(figsize=(10, 10)) # set a figure size
plt.scatter(np.arcsinh(rung_1_desc),10**log_klat_300.loc[:,"log_klat_300"],s=50, alpha=0.80)
#plt.xscale('log')
plt.yscale('log')
r_value = round(pearsonr(rung_1_desc, log_klat_300.loc[:,"log_klat_300"]).statistic, 2)
plt.annotate(rf"$r = {r_value}$ ", (0.12, 0.1), xycoords="figure fraction", fontsize=18)
plt.xlabel(r"$\operatorname{arcsinh}\left(\frac{\text{pair\_bwdf\_skew\_mean}}{\text{DensityFeatures\_vpa}}\right)\,[\AA^{-3}]$", fontsize=14)
plt.ylabel("Log10 (klat_300 [W/m/K]))", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
#plt.savefig("rung1_des_klat.pdf", dpi=300)