Source code for ampal.assembly

"""Defines various containers for AMPAL objects."""

from collections import Counter
import itertools

from ampal.base_ampal import BaseAmpal, Polymer, find_atoms_within_distance
from ampal.ligands import LigandGroup, Ligand
from ampal.analyse_protein import (
    sequence_molecular_weight,
    sequence_molar_extinction_280,
    sequence_isoelectric_point,
)


[docs]class AmpalContainer(object):
    """Custom list type class that holds multiple model states.

    Notes
    -----
    In this case, a state is defined as a set of coordinates that
    represents a protein model and an associated score or set of scores.

    Parameters
    ----------
    ampal_objects : [AMPAL], optional
        A list of AMPAL objects with which to initialise the
        AMPAL container. This can be an `Assembly`, `Polymer`
        or `Monomer`.
    id : str, optional
        Identifier for the AMPAL container.

    Attributes
    ----------
    id : str
        Identifier for the AMPAL container.
    """

    def __init__(self, ampal_objects=None, id=None):
        self.id = "AMPAL Container" if not id else id
        if ampal_objects:
            self._ampal_objects = ampal_objects
        else:
            self._ampal_objects = []

    def __add__(self, other):
        """Merges two `AmpalContainers`.

        Notes
        -----
        Generates new `AmpalContainer`.
        """
        if isinstance(other, AmpalContainer):
            merged_ac = self._ampal_objects[:] + other._ampal_objects[:]
        else:
            raise TypeError(
                "Only AmpalContainer objects may be merged with an "
                'AmpalContainer using unary operator "+".'
            )
        return AmpalContainer(ampal_objects=merged_ac)

    def __repr__(self):
        return "<AmpalContainer ({}) containing {} AMPAL Objects>".format(
            self.id, len(self._ampal_objects)
        )

    def __len__(self):
        return len(self._ampal_objects)

    def __getitem__(self, item):
        if isinstance(item, str):
            id_dict = {p.id.split("_")[-1]: p for p in self._ampal_objects}
            return id_dict[item]
        elif isinstance(item, int):
            return self._ampal_objects[item]
        else:
            return AmpalContainer(self._ampal_objects[item])

[docs]    def append(self, item):
        """Adds an AMPAL object to the `AmpalContainer`."""
        self._ampal_objects.append(item)
        return

[docs]    def extend(self, ampal_container):
        """Extends an `AmpalContainer` with another `AmpalContainer`."""
        if isinstance(ampal_container, AmpalContainer):
            self._ampal_objects.extend(ampal_container._ampal_objects)
        else:
            raise TypeError(
                "Only AmpalContainer objects may be merged with " "an AmpalContainer."
            )
        return

    @property
    def pdb(self):
        """Compiles the PDB strings for each state into a single file."""
        header_title = "{:<80}\n".format("HEADER    {}".format(self.id))
        data_type = "{:<80}\n".format("EXPDTA    ISAMBARD Model")
        pdb_strs = []
        for ampal in self:
            if isinstance(ampal, Assembly):
                pdb_str = ampal.make_pdb(header=False, footer=False)
            else:
                pdb_str = ampal.pdb
            pdb_strs.append(pdb_str)
        merged_strs = "ENDMDL\n".join(pdb_strs) + "ENDMDL\n"
        merged_pdb = "".join([header_title, data_type, merged_strs])
        return merged_pdb

[docs]    def sort_by_tag(self, tag):
        """Sorts the `AmpalContainer` by a tag on the component objects.

        Parameters
        ----------
        tag : str
            Key of tag used for sorting.
        """
        return AmpalContainer(sorted(self._ampal_objects, key=lambda x: x.tags[tag]))


[docs]class Assembly(BaseAmpal):
    """A container that holds `Polymer` type objects.

    Notes
    -----
    Has a simple hierarchy: `Assembly` contains one or more `Polymer`,
    which in turn contains one or more `Monomer`.

    Parameters
    ----------
    molecules : Polymer or [Polymer], optional
        `Polymer` or list containing `Polymer` objects to be assembled.
    assembly_id : str, optional
        An ID that the user can use to identify the `Assembly`. This
        is used when generating a pdb file using `Assembly().pdb`.

    Raises
    ------
    TypeError
        `Assembly` objects can only be initialised empty, using a `Polymer`
        or a list of `Polymers`.
    """

    def __init__(self, molecules=None, assembly_id=""):
        if molecules:
            if isinstance(molecules, Polymer):
                self._molecules = [molecules]
            elif isinstance(molecules, list) and isinstance(molecules[0], Polymer):
                self._molecules = list(molecules)
            else:
                raise TypeError(
                    "Assembly objects can only be initialised empty, using "
                    "a Polymer or a list of Polymers."
                )
        else:
            self._molecules = []
        self.id = str(assembly_id)
        self.tags = {}

    def __add__(self, other):
        """Merges together two `Assemblies`.

        Raises
        ------
        TypeError
            Raised if other is any type other than `Assembly`.
        """
        if isinstance(other, Assembly):
            merged_assembly = self._molecules[:] + other._molecules[:]
        else:
            raise TypeError(
                "Only Assembly objects may be merged with an Assembly using "
                'unary operator "+".'
            )
        return Assembly(molecules=merged_assembly, assembly_id=self.id)

    def __len__(self):
        return len(self._molecules)

    def __getitem__(self, item):
        if isinstance(item, str):
            id_dict = {str(p.id): p for p in self._molecules}
            return id_dict[item]
        elif isinstance(item, int):
            return self._molecules[item]
        else:
            return Assembly(self._molecules[item], assembly_id=self.id)

    def __repr__(self):
        repr_strs = []
        mol_types = Counter([x.molecule_type for x in self._molecules])
        if "protein" in mol_types:
            repr_strs.append(
                "{} {}".format(
                    mol_types["protein"],
                    "Polypeptide" if len(self._molecules) == 1 else "Polypeptides",
                )
            )
        if "nucleic_acid" in mol_types:
            repr_strs.append(
                "{} {}".format(
                    mol_types["nucleic_acid"],
                    "Polynucleotide"
                    if len(self._molecules) == 1
                    else "Polynucleotides",
                )
            )
        ligand_count = 0
        if "ligands" in mol_types:
            repr_strs.append(
                "{} {}".format(
                    mol_types["ligands"],
                    "Ligand Group" if len(self._molecules) == 1 else "Ligand Groups",
                )
            )
        for mol in self._molecules:
            if mol.molecule_type == "ligands":
                ligand_count += len(mol)
            else:
                ligand_count += 0 if not mol.ligands else len(mol.ligands)
        if ligand_count:
            repr_strs.append(
                "{} {}".format(
                    ligand_count, "Ligand" if ligand_count == 1 else "Ligands"
                )
            )
        if "pseudo_group" in mol_types:
            repr_strs.append(
                "{} {}".format(
                    mol_types["pseudo_group"],
                    "Pseudo Group" if len(self._molecules) == 1 else "Pseudo Groups",
                )
            )
        id_str = "" if not self.id else "({}) ".format(self.id)
        return "<Assembly {}containing {}>".format(id_str, ", ".join(repr_strs))

[docs]    def append(self, item):
        """Adds a `Polymer` to the `Assembly`.

        Raises
        ------
        TypeError
            Raised if other is any type other than `Polymer`.
        """
        if isinstance(item, Polymer):
            self._molecules.append(item)
        else:
            raise TypeError("Only Polymer objects can be appended to an Assembly.")
        return

[docs]    def extend(self, assembly):
        """Extends the `Assembly` with the contents of another `Assembly`.

        Raises
        ------
        TypeError
            Raised if other is any type other than `Assembly`.
        """
        if isinstance(assembly, Assembly):
            self._molecules.extend(assembly._molecules)
        else:
            raise TypeError("Only Assembly objects may be merged with an Assembly.")
        return

[docs]    def get_monomers(self, ligands=True, pseudo_group=False):
        """Retrieves all the `Monomers` from the `Assembly` object.

        Parameters
        ----------
        ligands : bool, optional
            If `true`, will include ligand `Monomers`.
        pseudo_group : bool, optional
            If `True`, will include pseudo atoms.
        """
        base_filters = dict(ligands=ligands, pseudo_group=pseudo_group)
        restricted_mol_types = [x[0] for x in base_filters.items() if not x[1]]
        in_groups = [x for x in self.filter_mol_types(restricted_mol_types)]
        monomers = itertools.chain(
            *(p.get_monomers(ligands=ligands) for p in in_groups)
        )
        return monomers

[docs]    def get_ligands(self, solvent=True):
        """Retrieves all ligands from the `Assembly`.

        Parameters
        ----------
        solvent : bool, optional
            If `True`, solvent molecules will be included.
        """
        if solvent:
            ligand_list = [x for x in self.get_monomers() if isinstance(x, Ligand)]
        else:
            ligand_list = [
                x
                for x in self.get_monomers()
                if isinstance(x, Ligand) and x.mol_code != "HOH"
            ]
        return LigandGroup(monomers=ligand_list)

[docs]    def get_atoms(self, ligands=True, pseudo_group=False, inc_alt_states=False):
        """Flat list of all the `Atoms` in the `Assembly`.

        Parameters
        ----------
        ligands : bool, optional
            Include ligand `Atoms`.
        pseudo_group : bool, optional
            Include pseudo_group `Atoms`.
        inc_alt_states : bool, optional
            Include alternate sidechain conformations.

        Returns
        -------
        atoms : itertools.chain
            All the `Atoms` as a iterator.
        """
        atoms = itertools.chain(
            *(
                list(m.get_atoms(inc_alt_states=inc_alt_states))
                for m in self.get_monomers(ligands=ligands, pseudo_group=pseudo_group)
            )
        )
        return atoms

[docs]    def is_within(self, cutoff_dist, point, ligands=True):
        """Returns all atoms in AMPAL object within `cut-off` distance from the `point`."""
        return find_atoms_within_distance(
            self.get_atoms(ligands=ligands), cutoff_dist, point
        )

[docs]    def relabel_all(self):
        """Relabels all Polymers, Monomers and Atoms with default labeling."""
        self.relabel_polymers()
        self.relabel_monomers()
        self.relabel_atoms()
        return

[docs]    def relabel_polymers(self, labels=None):
        """Relabels the component Polymers either in alphabetical order or
           using a list of labels.

        Parameters
        ----------
        labels : list, optional
            A list of new labels.

        Raises
        ------
        ValueError
            Raised if the number of labels does not match the number of
            component Polymer objects.
        """
        if labels:
            if len(self._molecules) == len(labels):
                for polymer, label in zip(self._molecules, labels):
                    polymer.id = label
            else:
                raise ValueError(
                    "Number of polymers ({}) and number of labels ({}) must be equal.".format(
                        len(self._molecules), len(labels)
                    )
                )
        else:
            for i, polymer in enumerate(self._molecules):
                polymer.id = chr(i + 65)
        return

[docs]    def relabel_monomers(self):
        """Relabels all Monomers in the component Polymers in numerical order."""
        for polymer in self._molecules:
            polymer.relabel_monomers()
        return

[docs]    def relabel_atoms(self, start=1):
        """Relabels all Atoms in numerical order, offset by the start parameter.

        Parameters
        ----------
        start : int, optional
            Defines an offset for the labelling.
        """
        counter = start
        for atom in self.get_atoms(ligands=True):
            atom.id = counter
            counter += 1
        return

    @property
    def pdb(self):
        """Runs make_pdb in default mode."""
        return self.make_pdb()

[docs]    def make_pdb(
        self,
        ligands=True,
        alt_states=False,
        pseudo_group=False,
        header=True,
        footer=True,
    ):
        """Generates a PDB string for the Assembly.

        Parameters
        ----------
        ligands : bool, optional
            If `True`, will include ligands in the output.
        alt_states : bool, optional
            If `True`, will include alternate conformations in the output.
        pseudo_group : bool, optional
            If `True`, will include pseudo atoms in the output.
        header : bool, optional
            If `True` will write a header for output.
        footer : bool, optional
            If `True` will write a footer for output.

        Returns
        -------
        pdb_str : str
            String of the pdb for the Assembly. Generated by collating
            Polymer().pdb calls for the component Polymers.
        """
        base_filters = dict(ligands=ligands, pseudo_group=pseudo_group)
        restricted_mol_types = [x[0] for x in base_filters.items() if not x[1]]
        in_groups = [x for x in self.filter_mol_types(restricted_mol_types)]

        pdb_header = (
            "HEADER {:<80}\n".format("ISAMBARD Model {}".format(self.id))
            if header
            else ""
        )
        pdb_body = "".join(
            [
                x.make_pdb(alt_states=alt_states, inc_ligands=ligands)
                + "{:<80}\n".format("TER")
                for x in in_groups
            ]
        )
        pdb_footer = "{:<80}\n".format("END") if footer else ""
        pdb_str = "".join([pdb_header, pdb_body, pdb_footer])
        return pdb_str

    # Protein specific methods
    @property
    def backbone(self):
        """Generates a new `Assembly` containing only the backbone atoms.

        Notes
        -----
        Metadata is not currently preserved from the parent object.
        Sequence data is retained, but only the main chain atoms are
        retained.

        Returns
        -------
        bb_assembly : ampal.Protein
            `Assembly` containing only the backbone atoms of the original
            `Assembly`.
        """
        bb_molecules = [p.backbone for p in self._molecules if hasattr(p, "backbone")]
        bb_assembly = Assembly(bb_molecules, assembly_id=self.id)
        return bb_assembly

    @property
    def primitives(self):
        """Generates a new `Assembly` containing the primitives of each Polymer.

        Notes
        -----
        Metadata is not currently preserved from the parent object.

        Returns
        -------
        prim_assembly : ampal.Protein
            `Assembly` containing only the primitives of the `Polymers`
            in the original `Assembly`.
        """
        prim_molecules = [
            p.primitive for p in self._molecules if hasattr(p, "primitive")
        ]
        prim_assembly = Assembly(molecules=prim_molecules, assembly_id=self.id)
        return prim_assembly

    @property
    def sequences(self):
        """Returns the sequence of each `Polymer` in the `Assembly` as a list.

        Returns
        -------
        sequences : [str]
            List of sequences.
        """
        seqs = [x.sequence for x in self._molecules if hasattr(x, "sequence")]
        return seqs

    @property
    def molecular_weight(self):
        """Returns the molecular weight of the `Assembly` in Daltons."""
        return sequence_molecular_weight("".join(self.sequences))

    @property
    def molar_extinction_280(self):
        """Returns the extinction co-efficient of the `Assembly` at 280 nm."""
        return sequence_molar_extinction_280("".join(self.sequences))

    @property
    def isoelectric_point(self):
        """Returns the isoelectric point of the `Assembly`."""
        return sequence_isoelectric_point("".join(self.sequences))

    @property
    def fasta(self):
        """Generates a FASTA string for the `Assembly`.

        Notes
        -----
        Explanation of FASTA format: https://en.wikipedia.org/wiki/FASTA_format
        Recommendation that all lines of text be shorter than 80
        characters is adhered to. Format of PDBID|CHAIN|SEQUENCE is
        consistent with files downloaded from the PDB. Uppercase
        PDBID used for consistency with files downloaded from the PDB.
        Useful for feeding into cdhit and then running sequence clustering.

        Returns
        -------
        fasta_str : str
            String of the fasta file for the `Assembly`.
        """
        fasta_str = ""
        max_line_length = 79
        for p in self._molecules:
            if hasattr(p, "sequence"):
                fasta_str += ">{0}:{1}|PDBID|CHAIN|SEQUENCE\n".format(
                    self.id.upper(), p.id
                )
                seq = p.sequence
                split_seq = [
                    seq[i : i + max_line_length]
                    for i in range(0, len(seq), max_line_length)
                ]
                for seq_part in split_seq:
                    fasta_str += "{0}\n".format(seq_part)
        return fasta_str

[docs]    def tag_torsion_angles(self, force=False):
        """Tags each `Monomer` in the `Assembly` with its torsion angles.

        Parameters
        ----------
        force : bool, optional
            If `True`, the tag will be run even if `Monomers` are already
            tagged.
        """
        for polymer in self._molecules:
            if polymer.molecule_type == "protein":
                polymer.tag_torsion_angles(force=force)
        return

[docs]    def tag_ca_geometry(
        self, force=False, reference_axis=None, reference_axis_name="ref_axis"
    ):
        """Tags each `Monomer` in the `Assembly` with its helical geometry.

        Parameters
        ----------
        force : bool, optional
            If True the tag will be run even if `Monomers` are already tagged.
        reference_axis : list(numpy.array or tuple or list), optional
            Coordinates to feed to geometry functions that depend on
            having a reference axis.
        reference_axis_name : str, optional
            Used to name the keys in tags at `Chain` and `Residue` level.
        """
        for polymer in self._molecules:
            if polymer.molecule_type == "protein":
                polymer.tag_ca_geometry(
                    force=force,
                    reference_axis=reference_axis,
                    reference_axis_name=reference_axis_name,
                )
        return

[docs]    def filter_mol_types(self, mol_types):
        return [x for x in self._molecules if x.molecule_type not in mol_types]


__author__ = "Christopher W. Wood, Gail J. Bartlett"
Source code for ampal.assembly

AMPAL

Navigation

Related Topics