"""Contains various tools for analysing protein structure."""
from collections import Counter
import typing as t
import warnings
import numpy
from .pseudo_atoms import Primitive
from .geometry import (
dihedral,
distance,
find_foot,
unit_vector,
is_acute,
)
from .amino_acids import (
residue_mwt,
water_mass,
residue_ext_280,
residue_pka,
residue_charge,
side_chain_dihedrals,
)
from .ampal_warnings import NoncanonicalWarning
_nc_warning_str = "Unnatural amino acid detected, this value may be inaccurate."
[docs]def sequence_molecular_weight(seq):
"""Returns the molecular weight of the polypeptide sequence.
Notes
-----
Units = Daltons
Parameters
----------
seq : str
Sequence of amino acids.
"""
if "X" in seq:
warnings.warn(_nc_warning_str, NoncanonicalWarning)
return sum([residue_mwt[aa] * n for aa, n in Counter(seq).items()]) + water_mass
# TODO How to account for cystine Vs cysteine.
# (when there's a disuplhide bond, ext coefficient changes to 125.
[docs]def sequence_molar_extinction_280(seq):
"""Returns the molar extinction coefficient of the sequence at 280 nm.
Notes
-----
Units = M/cm
Parameters
----------
seq : str
Sequence of amino acids.
"""
if "X" in seq:
warnings.warn(_nc_warning_str, NoncanonicalWarning)
return sum([residue_ext_280[aa] * n for aa, n in Counter(seq).items()])
[docs]def partial_charge(aa, pH):
"""Calculates the partial charge of the amino acid.
Parameters
----------
aa : str
Amino acid single-letter code.
pH : float
pH of interest.
"""
difference = pH - residue_pka[aa]
if residue_charge[aa] > 0:
difference *= -1
ratio = (10**difference) / (1 + 10**difference)
return ratio
[docs]def sequence_charge(seq, pH=7.4):
"""Calculates the total charge of the input polypeptide sequence.
Parameters
----------
seq : str
Sequence of amino acids.
pH : float
pH of interest.
"""
if "X" in seq:
warnings.warn(_nc_warning_str, NoncanonicalWarning)
adj_protein_charge = sum(
[
partial_charge(aa, pH) * residue_charge[aa] * n
for aa, n in Counter(seq).items()
]
)
adj_protein_charge += partial_charge("N-term", pH) * residue_charge["N-term"]
adj_protein_charge += partial_charge("C-term", pH) * residue_charge["C-term"]
return adj_protein_charge
[docs]def charge_series(seq, granularity=0.1):
"""Calculates the charge for pH 1-13.
Parameters
----------
seq : str
Sequence of amino acids.
granularity : float, optional
Granularity of pH values i.e. if 0.1 pH = [1.0, 1.1, 1.2...]
"""
if "X" in seq:
warnings.warn(_nc_warning_str, NoncanonicalWarning)
ph_range = numpy.arange(1, 13, granularity)
charge_at_ph = [sequence_charge(seq, ph) for ph in ph_range]
return ph_range, charge_at_ph
[docs]def sequence_isoelectric_point(seq, granularity=0.1):
"""Calculates the isoelectric point of the sequence for ph 1-13.
Parameters
----------
seq : str
Sequence of amino acids.
granularity : float, optional
Granularity of pH values i.e. if 0.1 pH = [1.0, 1.1, 1.2...]
"""
if "X" in seq:
warnings.warn(_nc_warning_str, NoncanonicalWarning)
ph_range, charge_at_ph = charge_series(seq, granularity)
abs_charge_at_ph = [abs(ch) for ch in charge_at_ph]
pi_index = min(enumerate(abs_charge_at_ph), key=lambda x: x[1])[0]
return ph_range[pi_index]
[docs]def classify_angle_as_rotamer(angle: float) -> t.Optional[int]:
"""Classifies a chi angle into a rotamer.
Notes
-----
This function uses the convention 1:g+ 2:t 3:g- as from 10.1006/jmbi.1993.1170
R L Dunbrack Jr, M Karplus (1993) Backbone-dependent rotamer library for proteins.
Application to side-chain prediction.
Parameters
----------
angle : float
Chi angle to classify.
"""
if 0 < angle < 120:
return 1 # g+
elif -120 < angle < 0:
return 3 # g-
elif -120 > angle > -180 or 120 < angle < 180:
return 2 # t
else:
print(f"Got angle {angle} which does not between 0 < x < 180 or -180 > x > 0 ")
return None
[docs]def measure_sidechain_torsion_angles(
residue, verbose=True
) -> t.Tuple[t.List[t.Optional[float]], t.List[t.Optional[int]]]:
"""Calculates sidechain dihedral angles for a residue
Parameters
----------
residue : [ampal.Residue]
`Residue` object.
verbose : bool, optional
If `true`, tells you when a residue does not have any known
dihedral angles to measure.
Returns
-------
chi_angles: [float]
Length depends on residue type, in range [-pi, pi]
[0] = chi1 [if applicable]
[1] = chi2 [if applicable]
[2] = chi3 [if applicable]
[3] = chi4 [if applicable]
rotamers: [int]
Rotamer class according to Dunbrack 1993 {1, 2, 3}
[0] = chi1 [if applicable]
[1] = chi2 [if applicable]
[2] = chi3 [if applicable]
[3] = chi4 [if applicable]
"""
chi_angles = []
rotamers = []
aa = residue.mol_code
if aa not in side_chain_dihedrals:
if verbose:
print("Amino acid {} has no known side-chain dihedral".format(aa))
else:
for set_atoms in side_chain_dihedrals[aa]:
required_for_dihedral = set_atoms[0:4]
try:
angle = dihedral(
residue[required_for_dihedral[0]]._vector,
residue[required_for_dihedral[1]]._vector,
residue[required_for_dihedral[2]]._vector,
residue[required_for_dihedral[3]]._vector,
)
chi_angles.append(angle)
rot = classify_angle_as_rotamer(angle)
rotamers.append(rot)
except KeyError as k:
print(
"{0} atom missing from residue {1} {2} "
"- can't assign dihedral".format(k, residue.mol_code, residue.id)
)
chi_angles.append(None)
rotamers.append(None)
return (chi_angles, rotamers)
[docs]def measure_torsion_angles(residues):
"""Calculates the dihedral angles for a list of backbone atoms.
Parameters
----------
residues : [ampal.Residue]
List of `Residue` objects.
Returns
-------
torsion_angles : (float, float, float)
One triple for each residue, containing torsion angles in
the range [-pi, pi].
[0] omega
[1] phi
[2] psi
For the first residue, omega and phi are not defined. For
the final residue, psi is not defined.
Raises
------
ValueError
If the number of input residues is less than 2.
"""
if len(residues) < 2:
torsion_angles = [(None, None, None)] * len(residues)
else:
torsion_angles = []
for i in range(len(residues)):
if i == 0:
res1 = residues[i]
res2 = residues[i + 1]
omega = None
phi = None
try:
psi = dihedral(
res1["N"]._vector,
res1["CA"]._vector,
res1["C"]._vector,
res2["N"]._vector,
)
except KeyError as k:
print("{0} atom missing - can't assign psi".format(k))
psi = None
torsion_angles.append((omega, phi, psi))
elif i == len(residues) - 1:
res1 = residues[i - 1]
res2 = residues[i]
try:
omega = dihedral(
res1["CA"]._vector,
res1["C"]._vector,
res2["N"]._vector,
res2["CA"]._vector,
)
except KeyError as k:
print("{0} atom missing - can't assign omega".format(k))
omega = None
try:
phi = dihedral(
res1["C"]._vector,
res2["N"]._vector,
res2["CA"]._vector,
res2["C"]._vector,
)
except KeyError as k:
print("{0} atom missing - can't assign phi".format(k))
phi = None
psi = None
torsion_angles.append((omega, phi, psi))
else:
res1 = residues[i - 1]
res2 = residues[i]
res3 = residues[i + 1]
try:
omega = dihedral(
res1["CA"]._vector,
res1["C"]._vector,
res2["N"]._vector,
res2["CA"]._vector,
)
except KeyError as k:
print("{0} atom missing - can't assign omega".format(k))
omega = None
try:
phi = dihedral(
res1["C"]._vector,
res2["N"]._vector,
res2["CA"]._vector,
res2["C"]._vector,
)
except KeyError as k:
print("{0} atom missing - can't assign phi".format(k))
phi = None
try:
psi = dihedral(
res2["N"]._vector,
res2["CA"]._vector,
res2["C"]._vector,
res3["N"]._vector,
)
except KeyError as k:
print("{0} atom missing - can't assign psi".format(k))
psi = None
torsion_angles.append((omega, phi, psi))
return torsion_angles
# TODO: Find a home for this
[docs]def cc_to_local_params(pitch, radius, oligo):
"""Returns local parameters for an oligomeric assembly.
Parameters
----------
pitch : float
Pitch of assembly
radius : float
Radius of assembly
oligo : int
Oligomeric state of assembly
Returns
-------
pitchloc : float
Local pitch of assembly (between 2 adjacent component helices)
rloc : float
Local radius of assembly
alphaloc : float
Local pitch-angle of assembly
"""
rloc = numpy.sin(numpy.pi / oligo) * radius
alpha = numpy.arctan((2 * numpy.pi * radius) / pitch)
alphaloc = numpy.cos((numpy.pi / 2) - ((numpy.pi) / oligo)) * alpha
pitchloc = (2 * numpy.pi * rloc) / numpy.tan(alphaloc)
return pitchloc, rloc, numpy.rad2deg(alphaloc)
[docs]def residues_per_turn(p):
"""The number of residues per turn at each Monomer in the Polymer.
Notes
-----
Each element of the returned list is the number of residues
per turn, at a point on the Polymer primitive. Calculated using
the relative positions of the CA atoms and the primitive of the
Polymer. Element i is the calculated from the dihedral angle using
the CA atoms of the Monomers with indices [i, i+1] and the
corresponding atoms of the primitive. The final value is None.
Parameters
----------
p : ampal.Polypeptide
`Polypeptide` from which residues per turn will be calculated.
Returns
-------
rpts : [float]
Residue per turn values.
"""
cas = p.get_reference_coords()
prim_cas = p.primitive.coordinates
dhs = [
abs(dihedral(cas[i], prim_cas[i], prim_cas[i + 1], cas[i + 1]))
for i in range(len(prim_cas) - 1)
]
rpts = [360.0 / dh for dh in dhs]
rpts.append(None)
return rpts
[docs]def polymer_to_reference_axis_distances(
p, reference_axis, tag=True, reference_axis_name="ref_axis"
):
"""Returns distances between the primitive of a Polymer and a reference_axis.
Notes
-----
Distances are calculated between each point of the Polymer primitive
and the corresponding point in reference_axis. In the special case
of the helical barrel, if the Polymer is a helix and the reference_axis
represents the centre of the barrel, then this function returns the
radius of the barrel at each point on the helix primitive. The points
of the primitive and the reference_axis are run through in the same
order, so take care with the relative orientation of the reference
axis when defining it.
Parameters
----------
p : ampal.Polymer
reference_axis : list(numpy.array or tuple or list)
Length of reference_axis must equal length of the Polymer.
Each element of reference_axis represents a point in R^3.
tag : bool, optional
If True, tags the Chain with the reference axis coordinates
and each Residue with its distance to the ref axis.
Distances are stored at the Residue level, but refer to
distances from the CA atom.
reference_axis_name : str, optional
Used to name the keys in tags at Chain and Residue level.
Returns
-------
distances : list(float)
Distance values between corresponding points on the
reference axis and the `Polymer` `Primitive`.
Raises
------
ValueError
If the Polymer and the reference_axis have unequal length.
"""
if not len(p) == len(reference_axis):
raise ValueError(
"The reference axis must contain the same number of points "
"as the Polymer primitive."
)
prim_cas = p.primitive.coordinates
ref_points = reference_axis.coordinates
distances = [distance(prim_cas[i], ref_points[i]) for i in range(len(prim_cas))]
if tag:
p.tags[reference_axis_name] = reference_axis
monomer_tag_name = "distance_to_{0}".format(reference_axis_name)
for m, d in zip(p._monomers, distances):
m.tags[monomer_tag_name] = d
return distances
[docs]def crick_angles(p, reference_axis, tag=True, reference_axis_name="ref_axis"):
"""Returns the Crick angle for each CA atom in the `Polymer`.
Notes
-----
The final value is in the returned list is `None`, since the angle
calculation requires pairs of points on both the primitive and
reference_axis.
Parameters
----------
p : ampal.Polymer
Reference `Polymer`.
reference_axis : list(numpy.array or tuple or list)
Length of reference_axis must equal length of the Polymer.
Each element of reference_axis represents a point in R^3.
tag : bool, optional
If `True`, tags the `Polymer` with the reference axis coordinates
and each Residue with its Crick angle. Crick angles are stored
at the Residue level, but are calculated using the CA atom.
reference_axis_name : str, optional
Used to name the keys in tags at Chain and Residue level.
Returns
-------
cr_angles : list(float)
The crick angles in degrees for each CA atom of the Polymer.
Raises
------
ValueError
If the Polymer and the reference_axis have unequal length.
"""
if not len(p) == len(reference_axis):
raise ValueError(
"The reference axis must contain the same number of points"
" as the Polymer primitive."
)
prim_cas = p.primitive.coordinates
p_cas = p.get_reference_coords()
ref_points = reference_axis.coordinates
cr_angles = [
dihedral(ref_points[i], prim_cas[i], prim_cas[i + 1], p_cas[i])
for i in range(len(prim_cas) - 1)
]
cr_angles.append(None)
if tag:
p.tags[reference_axis_name] = reference_axis
monomer_tag_name = "crick_angle_{0}".format(reference_axis_name)
for m, c in zip(p._monomers, cr_angles):
m.tags[monomer_tag_name] = c
return cr_angles
[docs]def alpha_angles(p, reference_axis, tag=True, reference_axis_name="ref_axis"):
"""Alpha angle calculated using points on the primitive of helix and axis.
Notes
-----
The final value is None, since the angle calculation requires pairs
of points along the primitive and axis. This is a generalisation
of the calculation used to measure the tilt of a helix in a
coiled-coil with respect to the central axis of the coiled coil.
Parameters
----------
p : ampal.Polymer
Reference `Polymer`.
reference_axis : list(numpy.array or tuple or list)
Length of reference_axis must equal length of the Polymer.
Each element of reference_axis represents a point in R^3.
tag : bool, optional
If `True`, tags the Chain with the reference axis coordinates
and each Residue with its alpha angle. Alpha angles are stored
at the Residue level, but are calculated using the CA atom.
reference_axis_name : str, optional
Used to name the keys in tags at Chain and Residue level.
Returns
-------
alphas : list of float
The alpha angle for the Polymer at each point of its primitive,
in degrees.
Raises
------
ValueError
If the Polymer and the reference_axis have unequal length.
"""
if not len(p) == len(reference_axis):
raise ValueError(
"The reference axis must contain the same number of points "
"as the Polymer primitive."
)
prim_cas = p.primitive.coordinates
ref_points = reference_axis.coordinates
alphas = [
abs(dihedral(ref_points[i + 1], ref_points[i], prim_cas[i], prim_cas[i + 1]))
for i in range(len(prim_cas) - 1)
]
alphas.append(None)
if tag:
p.tags[reference_axis_name] = reference_axis
monomer_tag_name = "alpha_angle_{0}".format(reference_axis_name)
for m, a in zip(p._monomers, alphas):
m.tags[monomer_tag_name] = a
return alphas
[docs]def polypeptide_vector(p, start_index=0, end_index=-1, unit=True):
"""Vector along the Chain primitive (default is from N-terminus to C-terminus).
Notes
-----
`start_index` and `end_index` can be changed to examine smaller
sections of the Chain, or reversed to change the direction of
the vector.
Parameters
----------
p : ampal.Polymer
Reference `Polymer`.
start_index : int, optional
Default is 0 (start at the N-terminus of the Chain)
end_index : int, optional
Default is -1 (start at the C-terminus of the Chain)
unit : bool
If True, the vector returned has a magnitude of 1.
Returns
-------
vector : a numpy.array
vector has shape (1, 3)
"""
if len(p) <= 1:
raise ValueError(
"Polymer should have length greater than 1. Polymer length = {0}".format(
len(p)
)
)
try:
prim_cas = p.primitive.coordinates
direction_vector = prim_cas[end_index] - prim_cas[start_index]
except ValueError:
direction_vector = p[end_index]["CA"].array - p[start_index]["CA"].array
if unit:
direction_vector = unit_vector(direction_vector)
return direction_vector
# TODO Change functionality so that a Primitive object is returned.
# (e.g. all CA ALA atoms like with primitives).
[docs]def reference_axis_from_chains(chains):
"""Average coordinates from a set of primitives calculated from Chains.
Parameters
----------
chains : list(Chain)
Returns
-------
reference_axis : numpy.array
The averaged (x, y, z) coordinates of the primitives for
the list of Chains. In the case of a coiled coil barrel,
this would give the central axis for calculating e.g. Crick
angles.
Raises
------
ValueError :
If the Chains are not all of the same length.
"""
if not len(set([len(x) for x in chains])) == 1:
raise ValueError("All chains must be of the same length")
# First array in coords is the primitive coordinates of the first chain.
# The orientation of the first chain orients the reference_axis.
coords = [numpy.array(chains[0].primitive.coordinates)]
orient_vector = polypeptide_vector(chains[0])
# Append the coordinates for the remaining chains, reversing the
# direction in antiparallel arrangements.
for c in chains[1:]:
if is_acute(polypeptide_vector(c), orient_vector):
coords.append(numpy.array(c.primitive.coordinates))
else:
coords.append(numpy.flipud(numpy.array(c.primitive.coordinates)))
# Average across the x, y and z coordinates to get the reference_axis
# coordinates
reference_axis = numpy.mean(numpy.array(coords), axis=0)
return Primitive.from_coordinates(reference_axis)
[docs]def flip_reference_axis_if_antiparallel(p, reference_axis, start_index=0, end_index=-1):
"""Flips reference axis if direction opposes the direction of the `Polymer`.
Notes
-----
If the angle between the vector for the Polymer and the vector
for the reference_axis is > 90 degrees, then the reference axis
is reversed. This is useful to run before running
polymer_to_reference_axis_distances, crick_angles, or alpha_angles.
For more information on the start and end indices, see chain_vector.
Parameters
----------
p : ampal.Polymer
Reference `Polymer`.
reference_axis : list(numpy.array or tuple or list)
Length of reference_axis must equal length of the Polymer.
Each element of reference_axis represents a point in R^3.
start_index : int, optional
Default is 0 (start at the N-terminus of the Polymer)
end_index : int, optional
Default is -1 (start at the C-terminus of the Polymer)
Returns
-------
reference_axis : list(numpy.array or tuple or list)
"""
p_vector = polypeptide_vector(p, start_index=start_index, end_index=end_index)
if is_acute(p_vector, reference_axis[end_index] - reference_axis[start_index]):
reference_axis = numpy.flipud(reference_axis)
return reference_axis
[docs]def make_primitive(cas_coords, window_length=3):
"""Calculates running average of cas_coords with a fixed averaging window_length.
Parameters
----------
cas_coords : list(numpy.array or float or tuple)
Each element of the list must have length 3.
window_length : int, optional
The number of coordinate sets to average each time.
Returns
-------
s_primitive : list(numpy.array)
Each array has length 3.
Raises
------
ValueError
If the length of cas_coords is smaller than the window_length.
"""
if len(cas_coords) >= window_length:
primitive = []
count = 0
for _ in cas_coords[: -(window_length - 1)]:
group = cas_coords[count : count + window_length]
average_x = sum([x[0] for x in group]) / window_length
average_y = sum([y[1] for y in group]) / window_length
average_z = sum([z[2] for z in group]) / window_length
primitive.append(numpy.array([average_x, average_y, average_z]))
count += 1
else:
raise ValueError(
"A primitive cannot be generated for {0} atoms using a (too large) "
"averaging window_length of {1}.".format(len(cas_coords), window_length)
)
return primitive
[docs]def make_primitive_smoothed(cas_coords, smoothing_level=2):
"""Generates smoothed primitive from a list of coordinates.
Parameters
----------
cas_coords : list(numpy.array or float or tuple)
Each element of the list must have length 3.
smoothing_level : int, optional
Number of times to run the averaging.
Returns
-------
s_primitive : list(numpy.array)
Each array has length 3.
Raises
------
ValueError
If the smoothing level is too great compared to the length
of cas_coords.
"""
try:
s_primitive = make_primitive(cas_coords)
for _ in range(smoothing_level):
s_primitive = make_primitive(s_primitive)
except ValueError:
raise ValueError(
"Smoothing level {0} too high, try reducing the number of rounds"
" or give a longer Chain (curent length = {1}).".format(
smoothing_level, len(cas_coords)
)
)
return s_primitive
__author__ = (
"Jack W. Heal, Christopher W. Wood, Gail J. Bartlett, "
"Derek N. Woolfson, Kieran L. Hudson"
)