Source code for pymopac.output

import warnings


class BaseOutput:
    """
    Base Class that takes the outfile as str and perfors various parsing
    operations.
    Subclasses are to be mainly defined via the self.parsers list.
    """

    def __init__(self, outfile: str):
        self.result = self.ResultFromOutput(outfile)
        self.parsers = []

    def ResultFromOutput(self, outfile: str):
        return outfile
        # TODO

    def parseAll(self):
        for parser in self.parsers:
            parser.parse(self)


class BaseParser:
    """
    Base Class that takes the whole result section, find the relevant section
    and furthermore searches for
        1) a key in the section
        2) a relative result index, as by str.split()
        3) (optional) a relative unit index
    Subclasses are to be mainly defined via specifying self.location_dict and,
    optionally, the get_section methods.
    """

    def __init__(self, result):
        self.section = self.get_section(result)
        self.location_dict = dict()

    def get_section(self, result):
        """
        preformating for the section which gets parsed
        """
        return self.split_with_newlines(result)
        # TODO

    def locate(self, search_tuple: tuple):
        """
        finds search term and returns NumUnit dataclass
        """
        start, end = self.find_sublist(search_tuple[0])
        if end is None:
            return None
            # raise Exception("sublist not found")

        if isinstance(search_tuple[1], int):
            number = self.section[end + search_tuple[1]-1]
        elif isinstance(search_tuple[1], list):
            number = " ".join([self.section[end + x-1]
                              for x in search_tuple[1]])
        else:
            raise KeyError("wrong type of search key passed")

        unit = None
        if len(search_tuple) == 3:
            if isinstance(search_tuple[2], int):
                unit = self.section[end + search_tuple[2]-1]
            elif isinstance(search_tuple[2], list):
                unit = " ".join([self.section[end + x-1]
                                 for x in search_tuple[2]])
            else:
                raise KeyError("wrong type of search key passed")

        return NumUnit(number, unit)

    def location_group(self):
        result_dict = dict()
        for key, value in self.location_dict.items():
            result_dict[key] = self.locate(value)
        return result_dict

    def find_sublist(self, search):
        """
        method to make it easier to search sublists resulting from str.split()
        returns start and end index
        """
        if isinstance(search, str):
            search = search.split()
        search_length = len(search)
        try:
            i = self.section.index(search[0])
            while i < len(self.section):
                if self.section[i:i+search_length] == search:
                    return i, i+search_length
                i = self.section[i+1:].index(search[0])+i+1
        except Exception:
            return None, None
        return None, None

    def set_result(self, outputclass, search_tuple):
        """
        shorthand function that directly sets the search result on the output class
        """
        search_result = self.locate(search_tuple)
        if isinstance(search_result, NumUnit):
            key = search_tuple[0].strip("=").strip().replace(" ", "_")
            outputclass[key] = search_result

    def split_with_newlines(self, text):
        return [word for line in text.splitlines() for word in (line.split() or [''])]

    def parse(self, outputclass):
        warnings.warn("parser not implemented")


class NumUnit:
    """
    Class to wrap a typical calculated value with its unit
    """

    def __init__(self, number, unit=None):
        self.number = float(number)
        self.unit = unit

    def __repr__(self):
        if self.unit:
            return f"{self.number} {self.unit}"
        else:
            return str(self.number)

    def __float__(self):
        return self.number


class ValUnit:
    """
    Class to wrap an arbitrary value with a unit
    """

    def __init__(self, value, unit=None):
        try:
            self.value = float(value.replace("D", "e"))
        except:
            self.value = value
        self.unit = unit

    def __repr__(self):
        if self.unit:
            return f"{self.value} {self.unit}"
        else:
            return str(self.value)



[docs]
class MopacOutput(BaseOutput):
    """
    Main Output class, calls parsers and structures outputs

    Custom parsers can be written, inherinting from input.BaseParser.
    Every Output class has a list at self.parser which, custom parsers can
    simply be added. If a parser has been added after Output initialization,
    parsing can be redone using Output.parseAll()
    By convention, custom parsers are able to set attributes on the Ouput class
    by having the Output passed as an argument at parse time.

    a .aux file is passed, as is standard when creating the output via the
    MopacInput.run() method. Using this, all properties can be parsed in an
    unsupervised manner. Results of this can be found under self.auxDict
    """

    def __init__(self, outfile: str, stdout=None, stderr=None, aux: str = None):
        super().__init__(outfile)
        self.outfile = outfile
        if aux:
            self.aux = aux
        try:
            lines = outfile.split("\n")[:2]
            self.header = lines[0].strip()
            self.comment = lines[1].strip()
        except:
            self.header = "Not found"
            self.comment = "Not found"
        self.stdout = stdout
        self.stderr = stderr
        self.parsers = [XyzParser(self.result), StandardParser(self.result)]
        if hasattr(self, "aux"):
            self.parsers.append(AuxParser(self.aux))
        self.parseAll()


[docs]
    def keys(self):
        return self.__dict__.keys()


    def __getitem__(self, key):
        return self.__dict__[key]

    def __setitem__(self, key, value):
        self.__dict__[key] = value


[docs]
    def toMol(self):
        """
        returns an rdkit.Chem mol object
        """
        from rdkit import Chem
        from rdkit.Chem import rdDetermineBonds
        if hasattr(self, "xyz"):
            mol = Chem.MolFromXYZBlock(self.xyz)
            try:
                rdDetermineBonds.DetermineConnectivity(mol, useHueckel=False,
                                                       useVdw=True)
            except:
                warnings.warn("failed to infer bond order")
            return mol




class XyzParser(BaseParser):
    def parse(self, outputclass):
        _, end = self.find_sublist("CARTESIAN COORDINATES")
        if isinstance(end, int):
            end += 1
        result = self.section[end:]
        i = 0
        line_j = 1
        xyz = ""
        while isinstance(line_j, int):
            if str(line_j) == result[i]:
                line_j += 1
                xyz += " ".join(result[i+1:i+5])+"\n"
                i += 5
            else:
                xyz = f"{line_j-1}\n\n" + xyz
                line_j = None
        if xyz != "0\n\n":
            outputclass.xyz = xyz[:-1]


class StandardParser(BaseParser):
    def parse(self, outputclass):
        self.set_result(outputclass,
                        ("FINAL HEAT OF FORMATION =", 4, 5))
        self.set_result(outputclass,
                        ("COSMO AREA              =", 1, [2, 3]))
        self.set_result(outputclass,
                        ("COSMO VOLUME            =", 1, [2, 3]))
        self.set_result(outputclass,
                        ("GRADIENT NORM           =", 3, [4, 5]))
        self.set_result(outputclass,
                        ("IONIZATION POTENTIAL    =", 1, 2))

        homo = self.locate(("HOMO LUMO ENERGIES (EV) =", 1))
        if isinstance(homo, NumUnit):
            homo.unit = "EV"
            outputclass["HOMO"] = homo
        lumo = self.locate(("HOMO LUMO ENERGIES (EV) =", 1))
        if isinstance(lumo, NumUnit):
            lumo.unit = "EV"
            outputclass["LUMO"] = lumo

        self.set_result(outputclass,
                        ("NO. OF FILLED LEVELS    =", 1))
        self.set_result(outputclass,
                        ("MOLECULAR WEIGHT        =", 1))
        _, pointgroup_i = self.find_sublist("POINT GROUP:")
        if pointgroup_i:
            outputclass["POINT_GROUP"] = self.section[pointgroup_i]
        _, formula_i = self.find_sublist("Empirical Formula:")
        if formula_i:
            formula_e = self.section[formula_i:].index("=")
            outputclass["Empirical_Formula"] = " ".join(
                self.section[formula_i:formula_i+formula_e])
            if self.section[formula_i+formula_e+2] == "atoms":
                outputclass["Atom_Count"] = self.section[formula_i+formula_e+1]
                if "xyz" in outputclass.keys() and outputclass["xyz"].split("\n")[0] != outputclass["Atom_Count"]:
                    warnings.warn(
                        "Atom count of molecular formula incongruent with xyz block, proceed with caution")


class AuxParser(BaseParser):
    def get_section(self, result):
        return result.split("\n")

    def parse(self, outputclass):
        try:
            assert self.section[0] == " START OF MOPAC PROGRAM"
            assert self.section[1] == " START OF MOPAC FILE"
            assert self.section[-3] == " END OF MOPAC FILE"
            assert self.section[-2] == " END OF MOPAC PROGRAM"
        except Exception as e:
            warnings.warn("AUX assertions failed, proceed with caution")

        self.section = self.section[2:-3]
        main_dic = dict()
        section_dic = dict()
        section_header = ""
        in_header = False

        title = ""
        unit = None
        line_memory = []

        for line in self.section:
            if " ########" in line:
                in_header = not in_header
                if len(section_dic) > 0:
                    main_dic[section_header] = section_dic
                if in_header:
                    section_header = ""
                continue
            if in_header:
                section_header += line.strip().strip("#").strip()
            else:
                if "=" in line:
                    subline = line[:line.index("=")]
                    if section_header != "":
                        if title != "" and len(line_memory) > 0:
                            section_dic[title] = ValUnit(
                                "\n".join(line_memory), unit)
                        if ":" in line:
                            dpoint_index = line.index(":")
                            title = subline[:dpoint_index].strip()
                            unit = subline[dpoint_index+1:].strip("=")
                        else:
                            title = subline.strip().strip("=")
                            unit = None
                        line_memory = []
                        eq_index = line.index("=")+1
                        if len(line) > eq_index:
                            line_memory.append(line[eq_index:].strip())
                else:
                    line_memory.append(line.strip())

        outputclass.auxDict = main_dic