Source code for fragmenstein.monster.mcs_mapping.compare_atom

from rdkit import Chem
from rdkit.Chem import rdFMCS
# TypedDict & Unpack fixed in .legacy:
from typing import Dict, List, Union, Tuple, Optional, TypeVar, Sequence, TypedDict, Unpack  # noqa
from functools import singledispatchmethod  # monkeypatched by .legacy (for Py3.7)
import itertools

from .types import IndexMap, BasicFMCSMode, ExtendedFMCSMode


class VanillaCompareAtoms(rdFMCS.MCSAtomCompare):
    """
    Give than the following does not work, one has to do it in full:

    .. code-block:: python
        :caption: This will raise an error:
        super().__call__(parameters, mol1, atom_idx1, mol2, atom_idx2)

    This class replicates the vanilla functionality
    """

    def __init__(self, comparison: rdFMCS.AtomCompare = rdFMCS.AtomCompare.CompareAnyHeavyAtom):
        """
        Whereas the atomCompare is an enum, this is a callable class.
        But in parameters there is no compareElement booleans etc. only Isotope...
        In https://github.com/rdkit/rdkit/blob/master/Code/GraphMol/FMCS/Wrap/testFMCS.py
        it is clear one needs to make one's own.
        """
        super().__init__()  # noqa the p_obect (python object aka ctypes.py_object) is not used
        self.comparison = comparison

    def __call__(self,  # noqa signature matches... it is just Boost being Boost
                 parameters: rdFMCS.MCSAtomCompareParameters,
                 mol1: Chem.Mol,
                 atom_idx1: int,
                 mol2: Chem.Mol,
                 atom_idx2: int) -> bool:
        a1: Chem.Atom = mol1.GetAtomWithIdx(atom_idx1)
        a2: Chem.Atom = mol2.GetAtomWithIdx(atom_idx2)
        # ------- isotope ------------------------
        if parameters.MatchIsotope and a1.GetIsotope() != a2.GetIsotope():  # noqa
            return False
        if sum([a1.GetAtomicNum() == 0, a2.GetAtomicNum() == 0]) == 1:
            return False
        elif self.comparison == rdFMCS.AtomCompare.CompareIsotopes and a1.GetIsotope() != a2.GetIsotope():  # noqa
            return False
        elif self.comparison == rdFMCS.AtomCompare.CompareElements and a1.GetAtomicNum() != a2.GetAtomicNum():  # noqa
            return False
        elif self.comparison == rdFMCS.AtomCompare.CompareAnyHeavyAtom \
                and (a1.GetAtomicNum() == 1 or a2.GetAtomicNum() == 1):  # noqa
            return False
        elif self.comparison == rdFMCS.AtomCompare.CompareAny:
            pass
        # ------- valence ------------------------
        if parameters.MatchValences and a1.GetTotalValence() != a2.GetTotalValence():  # noqa
            return False
        # ------- chiral ------------------------
        if parameters.MatchChiralTag and not self.CheckAtomChirality(parameters, mol1, atom_idx1, mol2, atom_idx2):
            return False
        # ------- formal ------------------------
        if parameters.MatchFormalCharge and not self.CheckAtomCharge(parameters, mol1, atom_idx1, mol2, atom_idx2):
            return False
        # ------- ring ------------------------
        if parameters.RingMatchesRingOnly and not self.CheckAtomRingMatch(parameters, mol1, atom_idx1, mol2, atom_idx2):
            return False
        # ------- complete ------------------------
        if parameters.CompleteRingsOnly:
            # don't know its intended to be used
            pass
        # ------- distance ------------------------
        if parameters.MaxDistance:  # integer...
            # todo fill!
            pass
        return True



[docs]
class SpecialCompareAtoms(VanillaCompareAtoms):
    """
    This works like the ``_get_atom_maps`` did prior to Fragmentein version 0.9.
    The mapping as discussed in GitHub issue #23 is in the format

        mapping = { 'hit1': {1:1,2:5} 'hit2': {3:3,4:4,4:6}}

    The hit index is first, followup index is the second.
    The index `-1` for a followup index is the same as not providing the hit index,
    it is described here solely for clarity not for use.

        mapping = { 'hit1': {1:1,2:5, 3:-1} 'hit2': {3:3,4:4,4:6}}

    The index `-2` for a followup index will result in the hit atom index
    not matching any followup index.

        mapping = { 'hit1': {1:1,2:5, 3:-2} 'hit2': {3:3,4:4,4:6}}

    If ``exclusive_mapping`` argument of __init__ is True,
    then if a followup index is present in one hit, but not in a second hit,
    then no atom of the second hit will match that followup atom.
    A negative index for a hit atom index means that no atom in that hit will match the
    corresponding followup index.

        mapping = { 'hit1': {1:1,2:5,-1:3, -2: 7} 'hit2': {3:3,4:4,4:6}}

    However, a positive integer on a different hit overrides it, therefore,
    in the above followup atom 3 cannot be matched to any atom in hit1, but will match
    atom 3 in hit2. Followup atom 7 will not match either.

    .. code-block:: python

        SpecialCompareAtoms(custom_map=mapping, exclusive_mapping=True)
    """


[docs]
    def __init__(self,
                 comparison: rdFMCS.AtomCompare = rdFMCS.AtomCompare.CompareAnyHeavyAtom,
                 custom_map: Optional[Dict[str, Dict[int, int]]] = None, exclusive_mapping: bool = True):
        super().__init__(comparison=comparison)  # what is p_object?
        self.custom_map = self.fix_custom_map(custom_map)  # custom as in user-defined
        self.banned = self._get_strict_banned() if exclusive_mapping else self._get_lax_banned()


    def _get_strict_banned(self) -> List[int]:
        """
        A list of followup indices that cannot be unmapped
        called if exclusive_mapping is True
        """
        return [foll_idx for mapping in self.custom_map.values() for foll_idx in mapping.values()]

    def _get_lax_flipped_map(self) -> List[int]:
        """
        A list of followup indices that cannot be mapped as per negative hit index
        called if exclusive_mapping is False
        """
        return [foll_idx for mapping in self.custom_map.values() for hit_idx, foll_idx in mapping.items()
                if hit_idx < 0]


[docs]
    def get_custom(self, hit_mol: Chem.Mol, hit_atom_idx: int) -> int:
        """
        What idx of followup corresponds to the ``hit_atom_idx`` index of ``hit_mol``?
        If nothing, -1 is returned
        """
        name: str = hit_mol.GetProp('_Name')
        return self.custom_map[name].get(hit_atom_idx, -1)


    def __call__(self,
                 parameters: rdFMCS.MCSAtomCompareParameters,
                 hit: Chem.Mol,
                 hit_atom_idx: int,
                 followup: Chem.Mol,
                 followup_atom_idx: int) -> bool:
        if followup.GetProp('_Name') == hit.GetProp('_Name'):
            # self to self mapping
            return super().__call__(parameters, hit, hit_atom_idx, followup, followup_atom_idx)
        if hit.GetProp('_Name') not in self.custom_map:
            # self to self mapping
            return super().__call__(parameters, hit, hit_atom_idx, followup, followup_atom_idx)
        hit_atom = hit.GetAtomWithIdx(hit_atom_idx)
        followup_atom = followup.GetAtomWithIdx(followup_atom_idx)
        symbols = {hit_atom.GetSymbol(), followup_atom.GetSymbol()}
        # ------- Custom -----------------------
        # get the user defined map target -- see docstring for bla bla
        custom: int = self.get_custom(hit, hit_atom_idx)
        if custom == followup_atom_idx:
            # it is the custom map!
            return True
        elif custom != -1:
            # a different index was given
            # or a non -1 negative number
            return False
        elif followup_atom_idx in self.banned:
            # banned is fully filled if exclusive_mapping was true during init
            # otherwise its user provided negatives
            return False
        else:
            # followup index not assigned
            pass
        # ------- Dummy ------------------------
        # dummy atom cannot match non-dummy atom:
        if '*' in symbols and len(symbols) > 1:
            return False
        # ------- protons ------------------------
        # proton cannot match non-proton:
        if 'H' in symbols and len(symbols) > 1:
            return False
        # ------- vanilla ------------------------
        return super().__call__(parameters, hit, hit_atom_idx, followup, followup_atom_idx)


[docs]
    @singledispatchmethod
    def get_valid_matches(self,
                          parameters: rdFMCS.MCSAtomCompareParameters,
                          common: Chem.Mol,
                          hit: Chem.Mol,
                          followup: Chem.Mol) -> List[Dict[int, int]]:
        """
        Returns a list of possible matches, each a dictionary of hit to follow indices,
        that obey the criteria of the atomic comparison.
        (Formerly this was a IndexAtom)

        This however does not check if all the atoms in custom are present.
        For that, ``Monster._validate_vs_custom`` is called in the method ``Monster.get_mcs_mappings``,
        after calling ``Monster._get_atom_maps``, which calls this method.
        (``Monster.get_mcs_mappings`` tries different matching schema,
        while ``Monster._get_atom_maps`` is for one single scheme).
        The primary reason why this is so, is that there are two tiers of requirements:

        1. The custom map must be present vs
        2. The custom map may be present, but has to be obeyed.

        parameters can be rdFMCS.MCSAtomCompareParameters or rdFMCS.MCSParameters
        """

        matches = []
        # find the common matches and make sure they match each other:
        # GetSubstructMatches is independent of AtomCompare.
        for hit_match, followup_match in itertools.product(hit.GetSubstructMatches(common, uniquify=False),
                                                           followup.GetSubstructMatches(common, uniquify=False)):
            # re `map(int, hit_match)` I do not know under what condition is it not an int...
            # but it was so in previous iteration
            for h, f in zip(map(int, hit_match), map(int, followup_match)):
                if not self(parameters=parameters,
                            hit=hit,
                            followup=followup,
                            hit_atom_idx=h,
                            followup_atom_idx=f):
                    break  # one index does not match. None should match.
            else:
                matches.append(dict(zip(map(int, hit_match), map(int, followup_match))))
        # remove duplicates
        hash_dict = lambda match: hash(tuple(sorted(match.items(), key=lambda i: i[0])))
        unique_matches = {hash_dict(match): match for match in matches}
        return list(unique_matches.values())


    @get_valid_matches.register
    def _(self,
          parameters: rdFMCS.MCSParameters,
          common: Chem.Mol,
          hit: Chem.Mol,
          followup: Chem.Mol) -> List[IndexMap]:
        return self.get_valid_matches(parameters.AtomCompareParameters, common, hit, followup)


[docs]
    def fix_custom_map(self,
                          custom_map: Dict[str, Union[Sequence[Tuple[int, int]], Dict[int, int]]]) \
            -> Dict[str, Dict[int, int]]:
        """
        This will be deprecated in the future. As Monster.fix_custom_map is better.

        Make sure its Dict[str, Dict[int, int]]

        There is a bit of confusion about the custom map.
        Converts the custom map from dict of lists of 2-element tuples to dict of dicts.
        """
        if custom_map is None:
            # in Monster {h.GetProp('_Name'): {} for h in self.hits}
            return {}
        assert isinstance(custom_map, dict), 'User defined map has to be mol name to Dict[int, int]'
        for name, hit_map in custom_map.items():
            custom_map[name] = dict(hit_map)
        return custom_map