Read and extract last phonon dos peak frequency (cm^{-1})

Read and extract last phonon dos peak frequency (\(cm^{-1}\))#

import os
import warnings
import json
import pandas as pd
from scipy.signal import find_peaks
import matplotlib.pyplot as plt

warnings.filterwarnings("ignore")

Please download the required data to get the complete dataset from here

Then provide the path to these json files in the phonon_data_path variable below. For demonstration we have provided just one example entry in this repository

phonon_data_path = "example_phonon_db_files/phonon_data/"
parent_dir = os.getcwd()
# load a list of mpids that are part of matbench benchmark dataset for last phonon dos peak dataset  
with open("pfc_mpids.txt", "r", encoding="utf-8") as f:
    benchmark_mpids = [line.rstrip("\n") for line in f]
os.chdir(phonon_data_path)
ph_dos_data = {}
for f in os.listdir():
    with open(f) as file:
        data = json.load(file)
        ph_dos_data[data["metadata"]["material_id"]] = {"ph_dos" : data["phonon"]["ph_dos"],
                                                   "dos_frequencies": data["phonon"]["dos_frequencies"]}
os.chdir(parent_dir)
os.makedirs("last_ph_dos_peak", exist_ok=True)
ph_dos_peak_freq = {}
for mpid in ph_dos_data:
    if mpid in benchmark_mpids:
        dos_peak_index = find_peaks(ph_dos_data[mpid]["ph_dos"])[0][-1]
        ph_dos_peak_freq[mpid] = ph_dos_data[mpid]["dos_frequencies"][dos_peak_index]

        # Uncomment the following lines to save the peak identified in phonon dos 
        #plt.plot(ph_dos_data[mpid]["dos_frequencies"], ph_dos_data[mpid]["ph_dos"])
        #plt.vlines(ph_dos_data[mpid]["dos_frequencies"][dos_peak_index], ymax=max(ph_dos_data[mpid]["ph_dos"]), ymin=-1, linestyles="--", color="red")
        #plt.xlabel("Frequencies")
        #plt.ylabel("Phonon Dos")
        #plt.savefig(f"last_ph_dos_peak/{mpid}.png")
        #plt.close()
df = pd.DataFrame.from_dict(ph_dos_peak_freq, columns=["last_phdos_peak"], orient="index")
df.to_json("last_phdos_peak.json")