"""AMPAL objects that represent protein."""
from collections import OrderedDict
import warnings
import numpy
from ampal.base_ampal import Polymer, Monomer, Atom
from ampal.pseudo_atoms import Primitive
from ampal.analyse_protein import (
make_primitive_extrapolate_ends,
measure_torsion_angles,
residues_per_turn,
polymer_to_reference_axis_distances,
crick_angles,
alpha_angles,
sequence_molecular_weight,
sequence_molar_extinction_280,
sequence_isoelectric_point,
measure_sidechain_torsion_angles,
)
from ampal.interactions import (
generate_covalent_bond_graph,
generate_bond_subgraphs_from_break,
find_covalent_bonds,
)
from .amino_acids import (
get_aa_code,
get_aa_letter,
ideal_backbone_bond_lengths,
ideal_backbone_bond_angles,
)
from .geometry import (
Quaternion,
unit_vector,
dihedral,
find_transformations,
distance,
angle_between_vectors,
)
from .ampal_warnings import MalformedPDBWarning
[docs]def flat_list_to_polymer(atom_list, atom_group_s=4):
"""Takes a flat list of atomic coordinates and converts it to a `Polymer`.
Parameters
----------
atom_list : [Atom]
Flat list of coordinates.
atom_group_s : int, optional
Size of atom groups.
Returns
-------
polymer : Polypeptide
`Polymer` object containing atom coords converted `Monomers`.
Raises
------
ValueError
Raised if `atom_group_s` != 4 or 5
"""
atom_labels = ["N", "CA", "C", "O", "CB"]
atom_elements = ["N", "C", "C", "O", "C"]
atoms_coords = [
atom_list[x : x + atom_group_s] for x in range(0, len(atom_list), atom_group_s)
]
atoms = [[Atom(x[0], x[1]) for x in zip(y, atom_elements)] for y in atoms_coords]
if atom_group_s == 5:
monomers = [Residue(OrderedDict(zip(atom_labels, x)), "ALA") for x in atoms]
elif atom_group_s == 4:
monomers = [Residue(OrderedDict(zip(atom_labels, x)), "GLY") for x in atoms]
else:
raise ValueError(
"Parameter atom_group_s must be 4 or 5 so atoms can be labeled correctly."
)
polymer = Polypeptide(monomers=monomers)
return polymer
[docs]def flat_list_to_dummy_chain(atom_list, atom_group_s=1):
"""Converts flat list of coordinates into dummy C-alpha carbons
Parameters
----------
atom_list : [Atom]
Flat list of co-ordinates.
atom_group_s : int, optional
Size of atom groups.
Returns
-------
polymer : Polypeptide
`Polymer` object containing atom coord converted `Monomers`
with 'DUM' atom name.
"""
atom_labels = ["CA"]
atom_elements = ["C"]
atoms_coords = [
atom_list[x : x + atom_group_s] for x in range(0, len(atom_list), atom_group_s)
]
atoms = [[Atom(x[0], x[1]) for x in zip(y, atom_elements)] for y in atoms_coords]
monomers = [Residue(OrderedDict(zip(atom_labels, x)), "DUM") for x in atoms]
polymer = Polypeptide(monomers=monomers)
return polymer
[docs]def align(target, mobile, target_i=0, mobile_i=0):
"""Aligns one Polypeptide (mobile) to another (target).
Notes
-----
This function directly modifies atoms of the mobile Polypeptide!
It does not return a new object.
Parameters
----------
target : Polypeptide
Polypeptide to be aligned to.
mobile : Polypeptide
Polypeptide to be moved during alignment.
target_i : int, optional
Index of `Residue` in target to align to.
mobile_i : int, optional
Index of `Residue` in mobile to be aligned.
"""
# First, align N->CA vectors.
s1, e1, s2, e2 = [
x._vector
for x in [
mobile[mobile_i]["N"],
mobile[mobile_i]["CA"],
target[target_i]["N"],
target[target_i]["CA"],
]
]
translation, angle, axis, point = find_transformations(
s1, e1, s2, e2, radians=False
)
# Rotation first, Then translation.
mobile.rotate(angle=angle, axis=axis, point=point, radians=False)
mobile.translate(vector=translation)
# Second, rotate about N->CA axis to align CA->C vectors.
angle = dihedral(
mobile[mobile_i]["C"],
mobile[mobile_i]["N"],
mobile[mobile_i]["CA"],
target[target_i]["C"],
)
axis = target[target_i]["CA"] - target[target_i]["N"]
point = target[target_i]["N"]._vector
mobile.rotate(angle=angle, axis=axis, point=point)
return
[docs]class Polypeptide(Polymer):
"""Container for `Residues`, inherits from `Polymer`.
Parameters
----------
monomers : Residue or [Residue], optional
`Residue` or list containing `Residue` objects to form the
`Polypeptide`.
polymer_id : str, optional
An ID that the user can use to identify the `Polypeptide`. This is
used when generating a pdb file using `Polypeptide().pdb`.
parent : ampal.Assembly, optional
Reference to `Assembly` containing the `Polymer`.
sl : int, optional
The default smoothing level used when calculating the
backbone primitive.
Attributes
----------
id : str
`Polypeptide` ID
parent : ampal.Assembly or None
Reference to `Assembly` containing the `Polypeptide`
molecule_type : str
A description of the type of `Polymer` i.e. Protein, DNA etc.
ligands : ampal.LigandGroup
A `LigandGroup` containing all the `Ligands` associated with this
`Polypeptide` chain.
tags : dict
A dictionary containing information about this AMPAL object.
The tags dictionary is used by AMPAL to cache information
about this object, but is also intended to be used by users
to store any relevant information they have.
sl : int
The default smoothing level used when calculating the
backbone primitive.
Raises
------
TypeError
`Polymer` type objects can only be initialised empty or using
a `Monomer`.
"""
def __init__(self, monomers=None, polymer_id=" ", parent=None, sl=2):
super().__init__(
monomers=monomers,
polymer_id=polymer_id,
molecule_type="protein",
parent=parent,
sl=sl,
)
def __add__(self, other):
if isinstance(other, Polymer):
merged_polymer = self._monomers + other._monomers
else:
raise TypeError("Only Polymer objects may be merged with a Polymer.")
return Polypeptide(monomers=merged_polymer, polymer_id=self.id)
def __getitem__(self, item):
if isinstance(item, str):
id_dict = {str(m.id): m for m in self._monomers}
return id_dict[item]
elif isinstance(item, int):
return self._monomers[item]
return Polypeptide(self._monomers[item], polymer_id=self.id)
def __repr__(self):
if len(self.sequence) > 15:
seq = self.sequence[:12] + "..."
else:
seq = self.sequence
return "<Polypeptide containing {} {}. Sequence: {}>".format(
len(self._monomers),
"Residue" if len(self._monomers) == 1 else "Residues",
seq,
)
[docs] def get_slice_from_res_id(self, start, end):
"""Returns a new `Polypeptide` containing the `Residues` in start/end range.
Parameters
----------
start : str
string representing start residue id (PDB numbering)
end : str
string representing end residue id (PDB numbering)
Returns
-------
slice_polymer : Polymer
Polymer containing the residue range specified by start-end
"""
id_dict = {str(m.id): m for m in self._monomers}
slice_polymer = Polypeptide(
[id_dict[str(x)] for x in range(int(start), int(end) + 1)], self.id
)
return slice_polymer
@property
def backbone(self):
"""Returns a new `Polymer` containing only the backbone atoms.
Notes
-----
Metadata is not currently preserved from the parent object.
Sequence data is retained, but only the main chain atoms are retained.
Returns
-------
bb_poly : Polypeptide
Polymer containing only the backbone atoms of the original
Polymer.
"""
bb_poly = Polypeptide([x.backbone for x in self._monomers], self.id)
return bb_poly
@property
def primitive(self):
"""Primitive of the backbone.
Notes
-----
This is the average of the positions of all the CAs in frames
of `sl` `Residues`.
"""
cas = self.get_reference_coords()
primitive_coords = make_primitive_extrapolate_ends(cas, smoothing_level=self.sl)
primitive = Primitive.from_coordinates(primitive_coords)
primitive.relabel_monomers([x.id for x in self])
primitive.id = self.id
primitive.parent = self
return primitive
@property
def fasta(self):
"""Generates sequence data for the protein in FASTA format."""
max_line_length = 79
fasta_str = ">{0}:{1}|PDBID|CHAIN|SEQUENCE\n".format(
self.parent.id.upper(), self.id
)
seq = self.sequence
split_seq = [
seq[i : i + max_line_length] for i in range(0, len(seq), max_line_length)
]
for seq_part in split_seq:
fasta_str += "{0}\n".format(seq_part)
return fasta_str
@property
def sequence(self):
"""Returns the sequence of the `Polymer` as a string.
Returns
-------
sequence : str
String of the `Residue` sequence of the `Polypeptide`.
"""
seq = [x.mol_letter for x in self._monomers]
return "".join(seq)
@property
def molecular_weight(self):
"""Returns the molecular weight of the `Assembly` in Daltons."""
return sequence_molecular_weight(self.sequence)
@property
def molar_extinction_280(self):
"""Returns the extinction co-efficient of the `Assembly` at 280 nm."""
return sequence_molar_extinction_280(self.sequence)
@property
def isoelectric_point(self):
"""Returns the isoelectric point of the `Assembly`."""
return sequence_isoelectric_point(self.sequence)
@property
def backbone_bond_lengths(self):
"""Dictionary containing backbone bond lengths as lists of floats.
Returns
-------
bond_lengths : dict
Keys are `n_ca`, `ca_c`, `c_o` and `c_n`, referring to the
N-CA, CA-C, C=O and C-N bonds respectively. Values are
lists of floats : the bond lengths in Angstroms.
The lists of n_ca, ca_c and c_o are of length k for
a Polypeptide containing k Residues. The list of c_n bonds
is of length k-1 for a Polypeptide containing k Residues
(C-N formed between successive `Residue` pairs).
"""
bond_lengths = dict(
n_ca=[distance(r["N"], r["CA"]) for r in self.get_monomers(ligands=False)],
ca_c=[distance(r["CA"], r["C"]) for r in self.get_monomers(ligands=False)],
c_o=[distance(r["C"], r["O"]) for r in self.get_monomers(ligands=False)],
c_n=[
distance(r1["C"], r2["N"])
for r1, r2 in [(self[i], self[i + 1]) for i in range(len(self) - 1)]
],
)
return bond_lengths
@property
def backbone_bond_angles(self):
"""Dictionary containing backbone bond angles as lists of floats.
Returns
-------
bond_angles : dict
Keys are `n_ca_c`, `ca_c_o`, `ca_c_n` and `c_n_ca`, referring
to the N-CA-C, CA-C=O, CA-C-N and C-N-CA angles respectively.
Values are lists of floats : the bond angles in degrees.
The lists of n_ca_c, ca_c_o are of length k for a `Polypeptide`
containing k `Residues`. The list of ca_c_n and c_n_ca are of
length k-1 for a `Polypeptide` containing k `Residues` (These
angles are across the peptide bond, and are therefore formed
between successive `Residue` pairs).
"""
bond_angles = dict(
n_ca_c=[
angle_between_vectors(r["N"] - r["CA"], r["C"] - r["CA"])
for r in self.get_monomers(ligands=False)
],
ca_c_o=[
angle_between_vectors(r["CA"] - r["C"], r["O"] - r["C"])
for r in self.get_monomers(ligands=False)
],
ca_c_n=[
angle_between_vectors(r1["CA"] - r1["C"], r2["N"] - r1["C"])
for r1, r2 in [(self[i], self[i + 1]) for i in range(len(self) - 1)]
],
c_n_ca=[
angle_between_vectors(r1["C"] - r2["N"], r2["CA"] - r2["N"])
for r1, r2 in [(self[i], self[i + 1]) for i in range(len(self) - 1)]
],
)
return bond_angles
[docs] def c_join(
self,
other,
psi=-40.76,
omega=-178.25,
phi=-65.07,
o_c_n_angle=None,
c_n_ca_angle=None,
c_n_length=None,
relabel=True,
):
"""Joins other to self at the C-terminus via a peptide bond.
Notes
-----
This function directly modifies self. It does not return a new object.
Parameters
----------
other: Residue or Polypeptide
psi: float, optional
Psi torsion angle (degrees) between final `Residue` of self
and first `Residue` of other.
omega: float, optional
Omega torsion angle (degrees) between final `Residue` of
self and first `Residue` of other.
phi: float, optional
Phi torsion angle (degrees) between final `Residue` of self
and first `Residue` of other.
o_c_n_angle: float or None, optional
Desired angle between O, C (final `Residue` of self) and N
(first `Residue` of other) atoms. If `None`, default value is
taken from `ideal_backbone_bond_angles`.
c_n_ca_angle: float or None, optional
Desired angle between C (final `Residue` of self) and N, CA
(first `Residue` of other) atoms. If `None`, default value is
taken from `ideal_backbone_bond_angles`.
c_n_length: float or None, optional
Desired peptide bond length between final `Residue` of self
and first `Residue` of other. If `None`, default value is taken
from `ideal_backbone_bond_lengths`.
relabel: bool, optional
If `True`, `relabel_all` is run on self before returning.
Raises
------
TypeError:
If other is not a `Residue` or a Polypeptide.
"""
if isinstance(other, Residue):
other = Polypeptide([other])
if not isinstance(other, Polypeptide):
raise TypeError(
"Only Polypeptide or Residue objects can be joined to a Polypeptide"
)
if abs(omega) >= 90:
peptide_conformation = "trans"
else:
peptide_conformation = "cis"
if o_c_n_angle is None:
o_c_n_angle = ideal_backbone_bond_angles[peptide_conformation]["o_c_n"]
if c_n_ca_angle is None:
c_n_ca_angle = ideal_backbone_bond_angles[peptide_conformation]["c_n_ca"]
if c_n_length is None:
c_n_length = ideal_backbone_bond_lengths["c_n"]
r1 = self[-1]
r1_ca = r1["CA"]._vector
r1_c = r1["C"]._vector
r1_o = r1["O"]._vector
# p1 is point that will be used to position the N atom of r2.
p1 = r1_o[:]
# rotate p1 by o_c_n_angle, about axis perpendicular to the
# r1_ca, r1_c, r1_o plane, passing through r1_c.
axis = numpy.cross((r1_ca - r1_c), (r1_o - r1_c))
q = Quaternion.angle_and_axis(angle=o_c_n_angle, axis=axis)
p1 = q.rotate_vector(v=p1, point=r1_c)
# Ensure p1 is separated from r1_c by the correct distance.
p1 = r1_c + (c_n_length * unit_vector(p1 - r1_c))
# rotate p1 and r1['O'] by to obtain desired psi value at the join.
measured_psi = dihedral(r1["N"], r1["CA"], r1["C"], p1)
q = Quaternion.angle_and_axis(angle=(psi - measured_psi), axis=(r1_c - r1_ca))
p1 = q.rotate_vector(v=p1, point=r1_c)
r1["O"]._vector = q.rotate_vector(v=r1_o, point=r1_c)
# translate other so that its first N atom is at p1
other.translate(vector=(p1 - other[0]["N"]._vector))
# rotate other so that c_n_ca angle is correct.
v1 = r1_c - other[0]["N"]._vector
v2 = other[0]["CA"]._vector - other[0]["N"]._vector
measured_c_n_ca = angle_between_vectors(v1, v2)
axis = numpy.cross(v1, v2)
other.rotate(
angle=(c_n_ca_angle - measured_c_n_ca),
axis=axis,
point=other[0]["N"]._vector,
)
# rotate other to obtain desired omega and phi values at the join
measured_omega = dihedral(r1["CA"], r1["C"], other[0]["N"], other[0]["CA"])
other.rotate(
angle=(omega - measured_omega),
axis=(other[0]["N"] - r1["C"]),
point=other[0]["N"]._vector,
)
measured_phi = dihedral(r1["C"], other[0]["N"], other[0]["CA"], other[0]["C"])
other.rotate(
angle=(phi - measured_phi),
axis=(other[0]["CA"] - other[0]["N"]),
point=other[0]["CA"]._vector,
)
self.extend(other)
if relabel:
self.relabel_all()
self.tags["assigned_ff"] = False
return
[docs] def n_join(
self,
other,
psi=-40.76,
omega=-178.25,
phi=-65.07,
o_c_n_angle=None,
c_n_ca_angle=None,
c_n_length=None,
relabel=True,
):
"""Joins other to self at the N-terminus via a peptide bond.
Notes
-----
This function directly modifies self. It does not return a new object.
Parameters
----------
other: Residue or Polypeptide
psi: float
Psi torsion angle (degrees) between final `Residue` of other
and first `Residue` of self.
omega: float
Omega torsion angle (degrees) between final `Residue` of
other and first `Residue` of self.
phi: float
Phi torsion angle (degrees) between final `Residue` of other
and first `Residue` of self.
o_c_n_angle: float or None
Desired angle between O, C (final `Residue` of other) and N
(first `Residue` of self) atoms. If `None`, default value is
taken from `ideal_backbone_bond_angles`.
c_n_ca_angle: float or None
Desired angle between C (final `Residue` of other) and N, CA
(first `Residue` of self) atoms. If `None`, default value is taken
from `ideal_backbone_bond_angles`.
c_n_length: float or None
Desired peptide bond length between final `Residue` of other
and first `Residue` of self. If None, default value is taken
from ideal_backbone_bond_lengths.
relabel: bool
If True, relabel_all is run on self before returning.
Raises
------
TypeError:
If other is not a `Residue` or a `Polypeptide`
"""
if isinstance(other, Residue):
other = Polypeptide([other])
if not isinstance(other, Polypeptide):
raise TypeError(
"Only Polypeptide or Residue objects can be joined to a Polypeptide"
)
if abs(omega) >= 90:
peptide_conformation = "trans"
else:
peptide_conformation = "cis"
if o_c_n_angle is None:
o_c_n_angle = ideal_backbone_bond_angles[peptide_conformation]["o_c_n"]
if c_n_ca_angle is None:
c_n_ca_angle = ideal_backbone_bond_angles[peptide_conformation]["c_n_ca"]
if c_n_length is None:
c_n_length = ideal_backbone_bond_lengths["c_n"]
r1 = self[0]
r1_n = r1["N"]._vector
r1_ca = r1["CA"]._vector
r1_c = r1["C"]._vector
# p1 is point that will be used to position the C atom of r2.
p1 = r1_ca[:]
# rotate p1 by c_n_ca_angle, about axis perpendicular to the
# r1_n, r1_ca, r1_c plane, passing through r1_ca.
axis = numpy.cross((r1_ca - r1_n), (r1_c - r1_n))
q = Quaternion.angle_and_axis(angle=c_n_ca_angle, axis=axis)
p1 = q.rotate_vector(v=p1, point=r1_n)
# Ensure p1 is separated from r1_n by the correct distance.
p1 = r1_n + (c_n_length * unit_vector(p1 - r1_n))
# translate other so that its final C atom is at p1
other.translate(vector=(p1 - other[-1]["C"]._vector))
# Force CA-C=O-N to be in a plane, and fix O=C-N angle accordingly
measured_dihedral = dihedral(
other[-1]["CA"], other[-1]["C"], other[-1]["O"], r1["N"]
)
desired_dihedral = 180.0
axis = other[-1]["O"] - other[-1]["C"]
other.rotate(
angle=(measured_dihedral - desired_dihedral),
axis=axis,
point=other[-1]["C"]._vector,
)
axis = numpy.cross(other[-1]["O"] - other[-1]["C"], r1["N"] - other[-1]["C"])
measured_o_c_n = angle_between_vectors(
other[-1]["O"] - other[-1]["C"], r1["N"] - other[-1]["C"]
)
other.rotate(
angle=(measured_o_c_n - o_c_n_angle),
axis=axis,
point=other[-1]["C"]._vector,
)
# rotate other to obtain desired phi, omega, psi values at the join.
measured_phi = dihedral(other[-1]["C"], r1["N"], r1["CA"], r1["C"])
other.rotate(angle=(phi - measured_phi), axis=(r1_n - r1_ca), point=r1_ca)
measured_omega = dihedral(other[-1]["CA"], other[-1]["C"], r1["N"], r1["CA"])
other.rotate(
angle=(measured_omega - omega), axis=(r1["N"] - other[-1]["C"]), point=r1_n
)
measured_psi = dihedral(
other[-1]["N"], other[-1]["CA"], other[-1]["C"], r1["N"]
)
other.rotate(
angle=-(measured_psi - psi),
axis=(other[-1]["CA"] - other[-1]["C"]),
point=other[-1]["CA"]._vector,
)
self._monomers = other._monomers + self._monomers
if relabel:
self.relabel_all()
self.tags["assigned_ff"] = False
return
[docs] def tag_sidechain_dihedrals(self, force=False):
"""Tags each monomer with side-chain dihedral angles. For residues that
do not have any rotamers (Alanine and Glycine) the rotamer is tagged as
0 and chi_angles as None.
force: bool, optional
If `True` the tag will be run even if `Residues` are
already tagged.
"""
chi_tagged = ["chi_angles" in x.tags.keys() for x in self._monomers]
rot_tagged = ["rotamers" in x.tags.keys() for x in self._monomers]
if (not all(chi_tagged)) or (not all(rot_tagged)) or force:
for monomer in self._monomers:
if monomer.mol_letter == "G" or monomer.mol_letter == "A":
monomer.tags["rotamers"] = [0]
monomer.tags["chi_angles"] = None
else:
chi_angles_rotamer = measure_sidechain_torsion_angles(
monomer, verbose=False
)
if chi_angles_rotamer:
chi_angles, rotamer = chi_angles_rotamer
monomer.tags["rotamers"] = rotamer
monomer.tags["chi_angles"] = chi_angles
# Rotamer not found: either residue does not have rotamers or
# it is not a common residue
else:
monomer.tags["rotamers"] = None
monomer.tags["chi_angles"] = None
return
[docs] def tag_torsion_angles(self, force=False):
"""Tags each Monomer of the Polymer with its omega, phi and psi torsion angle.
Parameters
----------
force : bool, optional
If `True` the tag will be run even if `Residues` are
already tagged.
"""
tagged = ["omega" in x.tags.keys() for x in self._monomers]
if (not all(tagged)) or force:
tas = measure_torsion_angles(self._monomers)
for monomer, (omega, phi, psi) in zip(self._monomers, tas):
monomer.tags["omega"] = omega
monomer.tags["phi"] = phi
monomer.tags["psi"] = psi
monomer.tags["tas"] = (omega, phi, psi)
return
[docs] def rise_per_residue(self):
"""List of rise per residue values along the `Polypeptide`.
Notes
-----
Calculated from `Polypeptide.primitive`."""
return self.primitive.rise_per_residue()
[docs] def radii_of_curvature(self):
"""List of radius of curvature values along the `Polypeptide`."""
return self.primitive.radii_of_curvature()
[docs] def tag_ca_geometry(
self, force=False, reference_axis=None, reference_axis_name="ref_axis"
):
"""Tags each `Residue` with rise_per_residue, radius_of_curvature and residues_per_turn.
Parameters
----------
force : bool, optional
If `True` the tag will be run even if `Residues` are already
tagged.
reference_axis : list(numpy.array or tuple or list), optional
Coordinates to feed to geometry functions that depend on
having a reference axis.
reference_axis_name : str, optional
Used to name the keys in tags at `Polypeptide` and `Residue` level.
"""
tagged = ["rise_per_residue" in x.tags.keys() for x in self._monomers]
if (not all(tagged)) or force:
# Assign tags None if Polymer is too short to have a primitive.
if len(self) < 7:
rprs = [None] * len(self)
rocs = [None] * len(self)
rpts = [None] * len(self)
else:
rprs = self.rise_per_residue()
rocs = self.radii_of_curvature()
rpts = residues_per_turn(self)
for monomer, rpr, roc, rpt in zip(self._monomers, rprs, rocs, rpts):
monomer.tags["rise_per_residue"] = rpr
monomer.tags["radius_of_curvature"] = roc
monomer.tags["residues_per_turn"] = rpt
# Functions that require a reference_axis.
if (reference_axis is not None) and (len(reference_axis) == len(self)):
# Set up arguments to pass to functions.
ref_axis_args = dict(
p=self,
reference_axis=reference_axis,
tag=True,
reference_axis_name=reference_axis_name,
)
# Run the functions.
polymer_to_reference_axis_distances(**ref_axis_args)
crick_angles(**ref_axis_args)
alpha_angles(**ref_axis_args)
return
[docs] def valid_backbone_bond_lengths(self, atol=0.1):
"""True if all backbone bonds are within atol Angstroms of the expected distance.
Notes
-----
Ideal bond lengths taken from [1].
References
----------
.. [1] Schulz, G. E, and R. Heiner Schirmer. Principles Of
Protein Structure. New York: Springer-Verlag, 1979.
Parameters
----------
atol : float, optional
Tolerance value in Angstoms for the absolute deviation
away from ideal backbone bond lengths.
"""
bond_lengths = self.backbone_bond_lengths
a1 = numpy.allclose(
bond_lengths["n_ca"],
[ideal_backbone_bond_lengths["n_ca"]] * len(self),
atol=atol,
)
a2 = numpy.allclose(
bond_lengths["ca_c"],
[ideal_backbone_bond_lengths["ca_c"]] * len(self),
atol=atol,
)
a3 = numpy.allclose(
bond_lengths["c_o"],
[ideal_backbone_bond_lengths["c_o"]] * len(self),
atol=atol,
)
a4 = numpy.allclose(
bond_lengths["c_n"],
[ideal_backbone_bond_lengths["c_n"]] * (len(self) - 1),
atol=atol,
)
return all([a1, a2, a3, a4])
[docs] def valid_backbone_bond_angles(self, atol=20):
"""True if all backbone bond angles are within atol degrees of their expected values.
Notes
-----
Ideal bond angles taken from [1].
References
----------
.. [1] Schulz, G. E, and R. Heiner Schirmer. Principles Of
Protein Structure. New York: Springer-Verlag, 1979.
Parameters
----------
atol : float, optional
Tolerance value in degrees for the absolute deviation
away from ideal backbone bond angles.
"""
bond_angles = self.backbone_bond_angles
omegas = [x[0] for x in measure_torsion_angles(self)]
trans = [
"trans" if (omega is None) or (abs(omega) >= 90) else "cis"
for omega in omegas
]
ideal_n_ca_c = [ideal_backbone_bond_angles[x]["n_ca_c"] for x in trans]
ideal_ca_c_o = [
ideal_backbone_bond_angles[trans[i + 1]]["ca_c_o"]
for i in range(len(trans) - 1)
]
ideal_ca_c_o.append(ideal_backbone_bond_angles["trans"]["ca_c_o"])
ideal_ca_c_n = [ideal_backbone_bond_angles[x]["ca_c_n"] for x in trans[1:]]
ideal_c_n_ca = [ideal_backbone_bond_angles[x]["c_n_ca"] for x in trans[1:]]
a1 = numpy.allclose(bond_angles["n_ca_c"], [ideal_n_ca_c], atol=atol)
a2 = numpy.allclose(bond_angles["ca_c_o"], [ideal_ca_c_o], atol=atol)
a3 = numpy.allclose(bond_angles["ca_c_n"], [ideal_ca_c_n], atol=atol)
a4 = numpy.allclose(bond_angles["c_n_ca"], [ideal_c_n_ca], atol=atol)
return all([a1, a2, a3, a4])
[docs]class Residue(Monomer):
"""Represents a amino acid `Residue`.
Parameters
----------
atoms : OrderedDict, optional
OrderedDict containing Atoms for the Monomer. OrderedDict
is used to maintain the order items were added to the
dictionary.
mol_code : str, optional
One or three letter code that represents the monomer.
monomer_id : str, optional
String used to identify the residue.
insertion_code : str, optional
Insertion code of monomer, used if reading from pdb.
is_hetero : bool, optional
True if is a hetero atom in pdb. Helps with PDB formatting.
parent : ampal.Polypeptide, optional
Reference to `Polypeptide` containing the `Residue`.
Attributes
----------
mol_code : str
PDB molecule code that represents the `Residue`.
insertion_code : str
Insertion code of `Residue`, used if reading from pdb.
is_hetero : bool
True if is a hetero atom in pdb. Helps with PDB formatting.
states : dict
Contains an `OrderedDicts` containing atom information for each
state available for the `Residue`.
id : str
String used to identify the residue.
reference_atom : str
The key that corresponds to the reference atom. This is used
by various functions, for example backbone primitives are
calculated using the atom defined using this key.
parent : Polypeptide or None
A reference to the `Polypeptide` containing this `Residue`.
tags : dict
A dictionary containing information about this AMPAL object.
The tags dictionary is used by AMPAL to cache information
about this object, but is also intended to be used by users
to store any relevant information they have.
Raises
------
ValueError
Raised if `mol_code` is not length 1 or 3.
"""
def __init__(
self,
atoms=None,
mol_code="UNK",
monomer_id=" ",
insertion_code=" ",
is_hetero=False,
parent=None,
):
super(Residue, self).__init__(atoms, monomer_id, parent=parent)
if len(mol_code) == 3:
self.mol_code = mol_code
self.mol_letter = get_aa_letter(mol_code)
elif len(mol_code) == 1:
self.mol_code = get_aa_code(mol_code)
self.mol_letter = mol_code
else:
raise ValueError(
"Monomer requires either a 1-letter or a 3-letter "
"amino acid code ({})".format(mol_code)
)
self.insertion_code = insertion_code
self.is_hetero = is_hetero
self.reference_atom = "CA"
def __repr__(self):
return "<Residue containing {} {}. Residue code: {}>".format(
len(self.atoms), "Atom" if len(self.atoms) == 1 else "Atoms", self.mol_code
)
@property
def backbone(self):
"""Returns a new `Residue` containing only the backbone atoms.
Returns
-------
bb_monomer : Residue
`Residue` containing only the backbone atoms of the original
`Monomer`.
Raises
------
IndexError
Raise if the `atoms` dict does not contain the backbone
atoms (N, CA, C, O).
"""
try:
backbone = OrderedDict(
[
("N", self.atoms["N"]),
("CA", self.atoms["CA"]),
("C", self.atoms["C"]),
("O", self.atoms["O"]),
]
)
except KeyError:
missing_atoms = filter(
lambda x: x not in self.atoms.keys(), ("N", "CA", "C", "O")
)
raise KeyError(
"Error in residue {} {} {}, missing ({}) atoms. "
"`atoms` must be an `OrderedDict` with coordinates "
"defined for the backbone (N, CA, C, O) atoms.".format(
self.parent.id, self.mol_code, self.id, ", ".join(missing_atoms)
)
)
bb_monomer = Residue(
backbone,
self.mol_code,
monomer_id=self.id,
insertion_code=self.insertion_code,
is_hetero=self.is_hetero,
)
return bb_monomer
@property
def unique_id(self):
"""Generates a tuple that uniquely identifies a `Monomer` in an `Assembly`.
Notes
-----
The unique_id will uniquely identify each monomer within a polymer.
If each polymer in an assembly has a distinct id, it will uniquely
identify each monomer within the assembly.
The hetero-flag is defined as in Biopython as a string that is
either a single whitespace in the case of a non-hetero atom,
or 'H_' plus the name of the hetero-residue (e.g. 'H_GLC' in
the case of a glucose molecule), or 'W' in the case of a water
molecule.
For more information, see the Biopython documentation or this
Biopython wiki page:
http://biopython.org/wiki/The_Biopython_Structural_Bioinformatics_FAQ
Returns
-------
unique_id : tuple
unique_id[0] is the polymer_id unique_id[1] is a triple
of the hetero-flag, the monomer id (residue number) and the
insertion code.
"""
if self.is_hetero:
if self.mol_code == "HOH":
hetero_flag = "W"
else:
hetero_flag = "H_{0}".format(self.mol_code)
else:
hetero_flag = " "
return self.parent.id, (hetero_flag, self.id, self.insertion_code)
@property
def side_chain(self):
"""List of the side-chain atoms (R-group).
Notes
-----
Returns empty list for glycine.
Returns
-------
side_chain_atoms: list(`Atoms`)
"""
side_chain_atoms = []
if self.mol_code != "GLY":
covalent_bond_graph = generate_covalent_bond_graph(
find_covalent_bonds(self)
)
try:
subgraphs = generate_bond_subgraphs_from_break(
covalent_bond_graph, self["CA"], self["CB"]
)
if len(subgraphs) == 1:
subgraphs = generate_bond_subgraphs_from_break(
subgraphs[0], self["CD"], self["N"]
)
if len(subgraphs) == 2:
for g in subgraphs:
if self["CB"] in g:
side_chain_atoms = g.nodes()
break
except:
warning_message = "Malformed PDB for Residue {0}: {1}.".format(
self.id, self
)
if "CB" in self.atoms.keys():
side_chain_atoms.append(self["CB"])
warning_message += " Side-chain is just the CB atom."
else:
warning_message += " Empty side-chain."
warnings.warn(warning_message, MalformedPDBWarning)
return side_chain_atoms
@property
def centroid(self):
"""Calculates the centroid of the residue.
Returns
-------
centroid : numpy.array or None
Returns a 3D coordinate for the residue unless a CB
atom is not available, in which case `None` is
returned.
Notes
-----
Uses the definition of the centroid from Huang *et al* [2]_.
References
----------
.. [2] Huang ES, Subbiah S and Levitt M (1995) Recognizing Native
Folds by the Arrangement of Hydrophobic and Polar Residues, J. Mol.
Biol return., **252**, 709-720.
"""
if "CB" in self.atoms:
cb_unit_vector = unit_vector(self["CB"]._vector - self["CA"]._vector)
return self["CA"]._vector + (3.0 * cb_unit_vector)
return None
__author__ = (
"Jack W. Heal, Christopher W. Wood, Gail J. Bartlett, "
"Andrew R. Thomson, Kieran L. Hudson"
)