Read and extract last phonon dos peak frequency (\(cm^{-1}\))#
import os
import warnings
import json
import pandas as pd
from scipy.signal import find_peaks
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
Please download the required data to get the complete dataset from here
Then provide the path to these json files in the phonon_data_path variable below. For demonstration we have provided just one example entry in this repository
phonon_data_path = "example_phonon_db_files/phonon_data/"
parent_dir = os.getcwd()
# load a list of mpids that are part of matbench benchmark dataset for last phonon dos peak dataset
with open("pfc_mpids.txt", "r", encoding="utf-8") as f:
benchmark_mpids = [line.rstrip("\n") for line in f]
os.chdir(phonon_data_path)
ph_dos_data = {}
for f in os.listdir():
with open(f) as file:
data = json.load(file)
ph_dos_data[data["metadata"]["material_id"]] = {"ph_dos" : data["phonon"]["ph_dos"],
"dos_frequencies": data["phonon"]["dos_frequencies"]}
os.chdir(parent_dir)
os.makedirs("last_ph_dos_peak", exist_ok=True)
ph_dos_peak_freq = {}
for mpid in ph_dos_data:
if mpid in benchmark_mpids:
dos_peak_index = find_peaks(ph_dos_data[mpid]["ph_dos"])[0][-1]
ph_dos_peak_freq[mpid] = ph_dos_data[mpid]["dos_frequencies"][dos_peak_index]
# Uncomment the following lines to save the peak identified in phonon dos
#plt.plot(ph_dos_data[mpid]["dos_frequencies"], ph_dos_data[mpid]["ph_dos"])
#plt.vlines(ph_dos_data[mpid]["dos_frequencies"][dos_peak_index], ymax=max(ph_dos_data[mpid]["ph_dos"]), ymin=-1, linestyles="--", color="red")
#plt.xlabel("Frequencies")
#plt.ylabel("Phonon Dos")
#plt.savefig(f"last_ph_dos_peak/{mpid}.png")
#plt.close()
df = pd.DataFrame.from_dict(ph_dos_peak_freq, columns=["last_phdos_peak"], orient="index")
df.to_json("last_phdos_peak.json")