import warnings
class BaseOutput:
"""
Base Class that takes the outfile as str and perfors various parsing
operations.
Subclasses are to be mainly defined via the self.parsers list.
"""
def __init__(self, outfile: str):
self.result = self.ResultFromOutput(outfile)
self.parsers = []
def ResultFromOutput(self, outfile: str):
return outfile
# TODO
def parseAll(self):
for parser in self.parsers:
parser.parse(self)
class BaseParser:
"""
Base Class that takes the whole result section, find the relevant section
and furthermore searches for
1) a key in the section
2) a relative result index, as by str.split()
3) (optional) a relative unit index
Subclasses are to be mainly defined via specifying self.location_dict and,
optionally, the get_section methods.
"""
def __init__(self, result):
self.section = self.get_section(result)
self.location_dict = dict()
def get_section(self, result):
"""
preformating for the section which gets parsed
"""
return self.split_with_newlines(result)
# TODO
def locate(self, search_tuple: tuple):
"""
finds search term and returns NumUnit dataclass
"""
start, end = self.find_sublist(search_tuple[0])
if end is None:
return None
# raise Exception("sublist not found")
if isinstance(search_tuple[1], int):
number = self.section[end + search_tuple[1]-1]
elif isinstance(search_tuple[1], list):
number = " ".join([self.section[end + x-1]
for x in search_tuple[1]])
else:
raise KeyError("wrong type of search key passed")
unit = None
if len(search_tuple) == 3:
if isinstance(search_tuple[2], int):
unit = self.section[end + search_tuple[2]-1]
elif isinstance(search_tuple[2], list):
unit = " ".join([self.section[end + x-1]
for x in search_tuple[2]])
else:
raise KeyError("wrong type of search key passed")
return NumUnit(number, unit)
def location_group(self):
result_dict = dict()
for key, value in self.location_dict.items():
result_dict[key] = self.locate(value)
return result_dict
def find_sublist(self, search):
"""
method to make it easier to search sublists resulting from str.split()
returns start and end index
"""
if isinstance(search, str):
search = search.split()
search_length = len(search)
try:
i = self.section.index(search[0])
while i < len(self.section):
if self.section[i:i+search_length] == search:
return i, i+search_length
i = self.section[i+1:].index(search[0])+i+1
except Exception:
return None, None
return None, None
def set_result(self, outputclass, search_tuple):
"""
shorthand function that directly sets the search result on the output class
"""
search_result = self.locate(search_tuple)
if isinstance(search_result, NumUnit):
key = search_tuple[0].strip("=").strip().replace(" ", "_")
outputclass[key] = search_result
def split_with_newlines(self, text):
return [word for line in text.splitlines() for word in (line.split() or [''])]
def parse(self, outputclass):
warnings.warn("parser not implemented")
class NumUnit:
"""
Class to wrap a typical calculated value with its unit
"""
def __init__(self, number, unit=None):
self.number = float(number)
self.unit = unit
def __repr__(self):
if self.unit:
return f"{self.number} {self.unit}"
else:
return str(self.number)
def __float__(self):
return self.number
class ValUnit:
"""
Class to wrap an arbitrary value with a unit
"""
def __init__(self, value, unit=None):
try:
self.value = float(value.replace("D", "e"))
except:
self.value = value
self.unit = unit
def __repr__(self):
if self.unit:
return f"{self.value} {self.unit}"
else:
return str(self.value)
[docs]
class MopacOutput(BaseOutput):
"""
Main Output class, calls parsers and structures outputs
Custom parsers can be written, inherinting from input.BaseParser.
Every Output class has a list at self.parser which, custom parsers can
simply be added. If a parser has been added after Output initialization,
parsing can be redone using Output.parseAll()
By convention, custom parsers are able to set attributes on the Ouput class
by having the Output passed as an argument at parse time.
a .aux file is passed, as is standard when creating the output via the
MopacInput.run() method. Using this, all properties can be parsed in an
unsupervised manner. Results of this can be found under self.auxDict
"""
def __init__(self, outfile: str, stdout=None, stderr=None, aux: str = None):
super().__init__(outfile)
self.outfile = outfile
if aux:
self.aux = aux
try:
lines = outfile.split("\n")[:2]
self.header = lines[0].strip()
self.comment = lines[1].strip()
except:
self.header = "Not found"
self.comment = "Not found"
self.stdout = stdout
self.stderr = stderr
self.parsers = [XyzParser(self.result), StandardParser(self.result)]
if hasattr(self, "aux"):
self.parsers.append(AuxParser(self.aux))
self.parseAll()
[docs]
def keys(self):
return self.__dict__.keys()
def __getitem__(self, key):
return self.__dict__[key]
def __setitem__(self, key, value):
self.__dict__[key] = value
[docs]
def toMol(self):
"""
returns an rdkit.Chem mol object
"""
from rdkit import Chem
from rdkit.Chem import rdDetermineBonds
if hasattr(self, "xyz"):
mol = Chem.MolFromXYZBlock(self.xyz)
try:
rdDetermineBonds.DetermineConnectivity(mol, useHueckel=False,
useVdw=True)
except:
warnings.warn("failed to infer bond order")
return mol
class XyzParser(BaseParser):
def parse(self, outputclass):
_, end = self.find_sublist("CARTESIAN COORDINATES")
if isinstance(end, int):
end += 1
result = self.section[end:]
i = 0
line_j = 1
xyz = ""
while isinstance(line_j, int):
if str(line_j) == result[i]:
line_j += 1
xyz += " ".join(result[i+1:i+5])+"\n"
i += 5
else:
xyz = f"{line_j-1}\n\n" + xyz
line_j = None
if xyz != "0\n\n":
outputclass.xyz = xyz[:-1]
class StandardParser(BaseParser):
def parse(self, outputclass):
self.set_result(outputclass,
("FINAL HEAT OF FORMATION =", 4, 5))
self.set_result(outputclass,
("COSMO AREA =", 1, [2, 3]))
self.set_result(outputclass,
("COSMO VOLUME =", 1, [2, 3]))
self.set_result(outputclass,
("GRADIENT NORM =", 3, [4, 5]))
self.set_result(outputclass,
("IONIZATION POTENTIAL =", 1, 2))
homo = self.locate(("HOMO LUMO ENERGIES (EV) =", 1))
if isinstance(homo, NumUnit):
homo.unit = "EV"
outputclass["HOMO"] = homo
lumo = self.locate(("HOMO LUMO ENERGIES (EV) =", 1))
if isinstance(lumo, NumUnit):
lumo.unit = "EV"
outputclass["LUMO"] = lumo
self.set_result(outputclass,
("NO. OF FILLED LEVELS =", 1))
self.set_result(outputclass,
("MOLECULAR WEIGHT =", 1))
_, pointgroup_i = self.find_sublist("POINT GROUP:")
if pointgroup_i:
outputclass["POINT_GROUP"] = self.section[pointgroup_i]
_, formula_i = self.find_sublist("Empirical Formula:")
if formula_i:
formula_e = self.section[formula_i:].index("=")
outputclass["Empirical_Formula"] = " ".join(
self.section[formula_i:formula_i+formula_e])
if self.section[formula_i+formula_e+2] == "atoms":
outputclass["Atom_Count"] = self.section[formula_i+formula_e+1]
if "xyz" in outputclass.keys() and outputclass["xyz"].split("\n")[0] != outputclass["Atom_Count"]:
warnings.warn(
"Atom count of molecular formula incongruent with xyz block, proceed with caution")
class AuxParser(BaseParser):
def get_section(self, result):
return result.split("\n")
def parse(self, outputclass):
try:
assert self.section[0] == " START OF MOPAC PROGRAM"
assert self.section[1] == " START OF MOPAC FILE"
assert self.section[-3] == " END OF MOPAC FILE"
assert self.section[-2] == " END OF MOPAC PROGRAM"
except Exception as e:
warnings.warn("AUX assertions failed, proceed with caution")
self.section = self.section[2:-3]
main_dic = dict()
section_dic = dict()
section_header = ""
in_header = False
title = ""
unit = None
line_memory = []
for line in self.section:
if " ########" in line:
in_header = not in_header
if len(section_dic) > 0:
main_dic[section_header] = section_dic
if in_header:
section_header = ""
continue
if in_header:
section_header += line.strip().strip("#").strip()
else:
if "=" in line:
subline = line[:line.index("=")]
if section_header != "":
if title != "" and len(line_memory) > 0:
section_dic[title] = ValUnit(
"\n".join(line_memory), unit)
if ":" in line:
dpoint_index = line.index(":")
title = subline[:dpoint_index].strip()
unit = subline[dpoint_index+1:].strip("=")
else:
title = subline.strip().strip("=")
unit = None
line_memory = []
eq_index = line.index("=")+1
if len(line) > eq_index:
line_memory.append(line[eq_index:].strip())
else:
line_memory.append(line.strip())
outputclass.auxDict = main_dic