Lattice thermal conductivity

Lattice thermal conductivity#

📝 Note

clean_data function is reused as is as provided by the authors of lattice thermal conductivity dataset from here and if you use this code make sure you also include the LICENSE and follow its terms.

Please kindly cite the work accordingly if you use following code snippet

Phonon Data: M. Ohnishi et al., arXiv:2504.21245 (2025).

import numpy as np
import pandas as pd
def clean_data(df, tol1={'gap': 10, 'kappa': 500}, tol2={'kappa': 2000},
               thred_fc2=0.1, thred_fc3=0.1):
    """
    Remove materials with large phonon gap (>=10) and large thermal conductivity (>= 500), 
    and excessive thermal conductivity (>=2000).
    """    
    
    n0 = len(df)
    print(" Number of original data : ", len(df))
    
    ## Remove large fc2 and fc3
    df = df[(df['fc2_error'] < thred_fc2) & (df['fc3_error'] < thred_fc3)]
    df = df.reset_index(drop=True)
    if n0 != len(df):
        print(f" - Removed {n0 - len(df)} rows with fc2_error>={thred_fc2} or fc3_error>={thred_fc3}")
        n0 = len(df)
    
    ## Remove too large gap and kappa (large kappa due to absence of 4ph scattering)
    n0 = len(df)
    df = df[~((df["max_gap"] >= tol1['gap']) & (df["kp"] >= tol1['kappa']))]
    df = df.reset_index(drop=True)
    if n0 != len(df):
        print(f" - Removed {n0 - len(df)} rows with gap>={tol1['gap']} and kappa>={tol1['kappa']}")
        n0 = len(df)
    
    ## Remove excessively large kappa
    df = df[~(df["kp"] >= tol2['kappa'])]
    df = df.reset_index(drop=True)
    if n0 != len(df):
        print(f" - Removed {n0 - len(df)} rows with kappa>={tol2['kappa']}")
        n0 = len(df)

    print(" Number of available data : ", len(df))

    return df
# load the mpids for which lobster calcs are available in our LOBSTER database. This will be used for find overlaps in kappa DB
with open("lobster_calc_mpids.txt", "r", encoding="utf-8") as f:
    lobster_calc_mpids = [line.rstrip("\n") for line in f]
lobster_calcs = pd.read_json("all_calcs_smaller_basis.json")

The data_all.csv file with lattice thermal conductivity data needs to be downloaded the following github repository: phonon_e3nn

kappa_df = pd.read_csv("data_all.csv")
# exclude unphysical entries
cleaned_kappa_df = clean_data(kappa_df)
# exclude duplicates
cleaned_kappa_df.set_index("mp_id", inplace=True)
kappa_df = cleaned_kappa_df[~cleaned_kappa_df.index.duplicated()]
overlapp = [] # store overlapping mpids in kappa dataset
for r, c in kappa_df.iterrows():
    if r in lobster_calc_mpids:
        overlapp.append(r)
klat_df = kappa_df.loc[overlapp, ["klat"]]
kp_df = kappa_df.loc[overlapp, ["kp"]]
klat_df["log_klat_300"] = np.log10(kappa_df["klat"])
klat_df.drop(columns=["klat"], inplace=True)

#klat_df.to_json("log_klat_300.json") # Uncomment to save data
kp_df["log_kp_300"] = np.log10(kp_df["kp"])
kp_df.drop(columns=["kp"], inplace=True)

#kp_df.to_json("log_kp_300.json") # Uncomment to save data