Source code for pylimer_tools.io.read_pylimer_tools_output_file
"""
This module provides a few functions to read output from pylimer_tools_cpp's simulators.
"""
import pandas as pd
from pylimer_tools.utils.cache_utility import do_cache, load_cache
[docs]
def read_avg_file(filename: str) -> pd.DataFrame:
"""
Read an averages-output file from one of the simulators shipped with pylimer_tools.
This function parses the output file format used by pylimer_tools_cpp simulators,
handling multiple data sections and converting them to a pandas DataFrame.
The function also caches results to improve performance on subsequent reads.
:param filename: Path to the averages file to read
:type filename: str
:return: DataFrame containing the parsed averages data, grouped by OutputStep
:rtype: pd.DataFrame
:note: The function automatically filters out lines containing "-nan" values,
null characters, or fewer than 3 columns.
:note: The returned DataFrame is grouped by OutputStep, keeping only the last
entry for each step.
"""
cache = load_cache(filename, "my-avg")
if cache is not None:
return cache
data_frames = []
with open(filename, "r") as f:
first_line_split = f.readline().removeprefix("#").strip().split()
data = []
for line in f:
if "-nan" in line or "\x00" in line or len(line.split()) < 3:
continue
stripped_line = line.removeprefix("#").strip()
if stripped_line.startswith(first_line_split[0]):
data_frames.append(
pd.DataFrame(
data, columns=first_line_split))
first_line_split = stripped_line.split()
data = []
elif stripped_line != "":
data.append(stripped_line.split())
if not len(data) == 0:
data_frames.append(pd.DataFrame(data, columns=first_line_split))
df = pd.concat(data_frames, ignore_index=True)
result = df.apply(pd.to_numeric, errors="ignore")
result = result.groupby("OutputStep", as_index=False).last()
assert not result["OutputStep"].duplicated().any()
do_cache(result, filename, "my-avg")
return result