Extract converged MSD values for all(mean of unique sites), max and mean @ 300 and 600K

Extract converged MSD values for all(mean of unique sites), max and mean @ 300 and 600K#

import pickle
import json
import pandas as pd
import numpy as np
from pymatgen.core import Structure
from pymatgen.analysis.structure_matcher import StructureMatcher
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

def mapp_phonon_pr_to_lob_pr(mpid, lobster_calc_path="", phononpy_calc_path="", **kwargs):
    """
    Mapps sites of phonopy primitive cell to primitive cell used for LOBSTER calculation

    Args:
        mpid: name of the calculation directory
        phononpy_calc_path: parent directory where phonopy calculation files are stored for all mpids
        lobster_calc_path: parent directory where LOBSTER calculation files are stored for all mpids
        **kwargs: kwargs for StructureMatcher class of pymatgen 

    Returns:
        Dict with phonopy primitive bonds mapped to icoxxs for equivalent bonds of lobster primitive and a bool indicating if mapping was successfull
    """


    lob_st = Structure.from_file(f"{lobster_calc_path}/{mpid}/CONTCAR.gz")
    phonopy_primitive = Structure.from_file(f"{phononpy_calc_path}/{mpid}/POSCAR")


    # wrap coordinates to unit cell of phonon primitive structure
    phonopy_primitive = Structure(lattice=phonopy_primitive.lattice,
         species=phonopy_primitive.species,
         coords=phonopy_primitive.frac_coords,
         to_unit_cell=True)
    
    # initialize structure matcher
    default_kwargs = {"primitive_cell": False, "scale": False, "attempt_supercell": False, "ltol": 0.2, "stol": 0.3, "angle_tol": 5}
    if kwargs:
        default_kwargs.update(kwargs) 
    sm = StructureMatcher(**default_kwargs)
    
    # get sc matrix and translation
    sc_mat, shift , mapping = sm.get_transformation(phonopy_primitive, lob_st)
    M_inv = np.linalg.inv(sc_mat)

    # check if values in shift are close to zero, if yes then set it explicitly to zero
    shift = np.array([i if not np.isclose(i, 0, atol=1e-5) else 0 for i in shift])

    
    # set a mapping dict of phonopy primitive atoms as keys and equivalent atoms in lobster primitive atoms as values
    mapping_dict = {ix: i for ix, i in enumerate(mapping)}


    site_matches = []
    for ix, i in enumerate(mapping):
        new_basis_coord = np.mod(np.dot(M_inv.T, lob_st.frac_coords[i]) + shift, 1)
        
        site_match = np.allclose(phonopy_primitive.frac_coords[ix], new_basis_coord, atol=0.02)

        site_matches.append(site_match)

        
    return all(site_matches), mapping_dict

def map_msd_data(
    mpid: str,
    lobster_path: str,
    phonon_path: str,
    msd_data: dict,
):
    """
    Maps phonon sites to LOBSTER sites and extracts mean squared displacement data.

    Parameters
    ----------
    mpid : str
        Materials Project ID.
    lobster_path : str
        Path to the LOBSTER calculation directory.
    phonon_path : str
        Path to the phonon calculation directory.
    msd_data : dict
        Dictionary containing phonon/thermal displacement data.

    Returns
    -------
    mapped_data : dict
        Dictionary with mapped msd data for inequivalent sites at 300 and 600K.
    """

    # Map phonon sites to lobster sites
    all_mapped, mapping_dict = mapp_phonon_pr_to_lob_pr(
        mpid=mpid,
        lobster_calc_path=lobster_path,
        phononpy_calc_path=phonon_path,
    )

    # Load structures
    ph_struct = Structure.from_file(f"{phonon_path}/{mpid}/POSCAR")
    lob_struct = Structure.from_file(f"{lobster_path}/{mpid}/CONTCAR.gz")

    # Symmetry analysis
    sga_lob = SpacegroupAnalyzer(structure=lob_struct)
    inequivalent_sites = {
        ix: site for ix, site in enumerate(sga_lob.get_symmetry_dataset().equivalent_atoms)
    }

    sg_ph_struct = ph_struct.get_space_group_info()[-1]
    sg_lob_struct = lob_struct.get_space_group_info()[-1]

    mapped_data={mpid : {}}
    
    if all_mapped and sg_lob_struct == sg_ph_struct:
        converged_at_mesh = tdisp_data[mpid][0.1]["converged_at"]
        index_mesh = tdisp_data[mpid][0.1]["mesh"].index(converged_at_mesh)

        for temp in (300, 600):
            mapped_data[mpid].update({temp : {}})
            tdisp_temp  = tdisp_data[mpid][0.1]["tdisp_sites_mean"][temp]

            for ph_site, lob_site in mapping_dict.items():
                equ_site = inequivalent_sites.get(lob_site)
                tdip_val = tdisp_temp[ph_struct.species[ph_site].symbol + str(ph_site + 1)][index_mesh]
                
                if equ_site not in mapped_data[mpid][temp]:
                    mapped_data[mpid][temp][equ_site] = [tdip_val]
                else:
                    mapped_data[mpid][temp][equ_site].append(tdip_val)

    return mapped_data

Load the save convergence_data.pkl obtained after running the msd_convergence.ipynb script

with open("convergence_data.pkl", "rb") as f:
    msd_data = pickle.load(f)

with open("msd_stable_mpids.txt", "r", encoding="utf-8") as f:
    stable_mpids = [line.rstrip("\n") for line in f]

Adjust the lobster_path and phonon_path as per locations where you have stored the files

lobster_path = "parent/path/to/all/lobster/calcs/"
phonon_path = "example_phonon_db_files/phonopy_fc"

# Initialize empty dict to store converged msd data
all_data = {}
for mpid in stable_mpids:
    mapped_data = map_msd_data(mpid=mpid, lobster_path=lobster_path, phonon_path=phonon_path, tdisp_data=msd_data)
    all_data.update(mapped_data)

with open("all_data.json", "w") as f:
    json.dump(all_data, f)

# Initialize empty dicts to store converged msd data seperately i.e all sites, mean and max for each temperature
msd_300_all = {}
msd_600_all = {}
msd_300_mean = {}
msd_600_mean = {}
msd_300_max = {}
msd_600_max = {}

for mpid in all_data:
    if all_data[mpid]:
        for temp in (300, 600):
            if temp == 300:
                site_msd_300 = []
                for site in all_data[mpid][temp]:
                    msd_300_all[f"{mpid}_{site}"] = np.log10(np.mean(all_data[mpid][temp][site])) # mean of msd at unique site
                    site_msd_300.extend(all_data[mpid][temp][site])
                msd_300_mean[mpid] = np.log10(np.mean(site_msd_300)) # mean of msd from all sites in a structure
                msd_300_max[mpid] = np.log10(np.max(site_msd_300)) # max of msd from all sites in a structure
            else:
                site_msd_600 = []
                for site in all_data[mpid][temp]:
                    msd_600_all[f"{mpid}_{site}"] = np.log10(np.mean(all_data[mpid][temp][site])) # mean of msd at unique site
                    site_msd_600.extend(all_data[mpid][temp][site])
                msd_600_mean[mpid] = np.log10(np.mean(site_msd_600)) # mean of msd from all sites in a structure
                msd_600_max[mpid] = np.log10(np.max(site_msd_600)) # max of msd from all sites in a structure
                    

df_log_msd_300_all = pd.DataFrame.from_dict(data=msd_300_all, columns=["log_msd_all_300"], orient="index")
df_log_msd_600_all = pd.DataFrame.from_dict(data=msd_600_all, columns=["log_msd_all_600"], orient="index")
df_log_msd_300_mean = pd.DataFrame.from_dict(data=msd_300_mean, columns=["log_msd_mean_300"], orient="index")
df_log_msd_600_mean = pd.DataFrame.from_dict(data=msd_600_mean, columns=["log_msd_mean_600"], orient="index")
df_log_msd_300_max = pd.DataFrame.from_dict(data=msd_300_max, columns=["log_msd_max_300"], orient="index")
df_log_msd_600_max = pd.DataFrame.from_dict(data=msd_600_max, columns=["log_msd_max_600"], orient="index")

# Save msd datasets
df_log_msd_300_all.to_json("log_msd_all_300.json")
df_log_msd_600_all.to_json("log_msd_all_600.json")
df_log_msd_300_mean.to_json("log_msd_mean_300.json")
df_log_msd_600_mean.to_json("log_msd_mean_600.json")
df_log_msd_300_max.to_json("log_msd_max_300.json")
df_log_msd_600_max.to_json("log_msd_max_600.json")