Source code for ampal.amino_acids

import json
import requests

from .data import AMINO_ACIDS_DATA, AA_PATH

water_mass = 18.01528

ideal_backbone_bond_lengths = {
    # Ideal bond distances from:
    #  Schulz, G. E, and R. Heiner Schirmer. Principles Of Protein Structure. New York: Springer-Verlag, 1979.
    "n_ca": 1.47,
    "ca_c": 1.53,
    "c_o": 1.24,
    # peptide bond length for residues.
    "c_n": 1.32,
}

ideal_backbone_bond_angles = {
    # Ideal bond angles from:
    #  Schulz, G. E, and R. Heiner Schirmer. Principles Of Protein Structure. New York: Springer-Verlag, 1979.
    "trans": {
        "n_ca_c": 110.0,
        "ca_c_o": 121.0,
        "ca_c_n": 114.0,
        "c_n_ca": 123.0,
        "o_c_n": 125.0,
    },
    "cis": {
        "n_ca_c": 110.0,
        "ca_c_o": 119.0,
        "ca_c_n": 118.0,
        "c_n_ca": 126.0,
        "o_c_n": 123.0,
    },
}

residue_mwt = {
    "A": 71.0779,
    "C": 103.1429,
    "D": 115.0874,
    "E": 129.114,
    "F": 147.1739,
    "G": 57.0513,
    "H": 137.1393,
    "I": 113.1576,
    "K": 128.1723,
    "L": 113.1576,
    "M": 131.1961,
    "N": 114.1026,
    "P": 97.1152,
    "Q": 128.1292,
    "R": 156.1857,
    "S": 87.0773,
    "T": 101.1039,
    "V": 99.1311,
    "W": 186.2099,
    "Y": 163.1733,
    "X": 57.0513,
}

residue_charge = {
    "A": 0,
    "C": -1,
    "D": -1,
    "E": -1,
    "F": 0,
    "G": 0,
    "H": +1,
    "I": 0,
    "K": +1,
    "L": 0,
    "M": 0,
    "N": 0,
    "P": 0,
    "Q": 0,
    "R": +1,
    "S": 0,
    "T": 0,
    "V": 0,
    "W": 0,
    "Y": -1,
    "N-term": +1,
    "C-term": -1,
    "X": 0,
}

residue_pka = {
    "A": 0.0,
    "C": 8.3,
    "D": 3.65,
    "E": 4.25,
    "F": 0.0,
    "G": 0.0,
    "H": 6.1,
    "I": 0.0,
    "K": 10.53,
    "L": 0.0,
    "M": 0.0,
    "N": 0.0,
    "P": 0.0,
    "Q": 0.0,
    "R": 12.48,
    "S": 0.0,
    "T": 0.0,
    "V": 0.0,
    "W": 0.0,
    "Y": 10.1,
    "N-term": 8.0,
    "C-term": 3.1,
    "X": 0.0,
}

residue_ext_280 = {
    "A": 0,
    "C": 0,
    "D": 0,
    "E": 0,
    "F": 0,
    "G": 0,
    "H": 0,
    "I": 0,
    "K": 0,
    "L": 0,
    "M": 0,
    "N": 0,
    "P": 0,
    "Q": 0,
    "R": 0,
    "S": 0,
    "T": 0,
    "V": 0,
    "W": 5690,
    "Y": 1280,
    "X": 0,
}

standard_amino_acids = {
    "A": "ALA",
    "C": "CYS",
    "D": "ASP",
    "E": "GLU",
    "F": "PHE",
    "G": "GLY",
    "H": "HIS",
    "I": "ILE",
    "K": "LYS",
    "L": "LEU",
    "M": "MET",
    "N": "ASN",
    "P": "PRO",
    "Q": "GLN",
    "R": "ARG",
    "S": "SER",
    "T": "THR",
    "V": "VAL",
    "W": "TRP",
    "Y": "TYR",
}

side_chain_dihedrals = {
    "ARG": [
        ["N", "CA", "CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"],
        ["CA", "CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"],
        ["CB", "CG", "CD", "NE", "CZ", "NH1", "NH2"],
        ["CG", "CD", "NE", "CZ", "NH1", "NH2"],
    ],
    "ASN": [["N", "CA", "CB", "CG", "OD1", "ND2"], ["CA", "CB", "CG", "OD1", "ND2"]],
    "ASP": [["N", "CA", "CB", "CG", "OD1", "OD2"], ["CA", "CB", "CG", "OD1", "OD2"]],
    "CYS": [["N", "CA", "CB", "SG"]],
    "GLN": [
        ["N", "CA", "CB", "CG", "CD", "OE1", "NE2"],
        ["CA", "CB", "CG", "CD", "OE1", "NE2"],
        ["CB", "CG", "CD", "OE1", "NE2"],
    ],
    "GLU": [
        ["N", "CA", "CB", "CG", "CD", "OE1", "OE2"],
        ["CA", "CB", "CG", "CD", "OE1", "OE2"],
        ["CB", "CG", "CD", "OE1", "OE2"],
    ],
    "HIS": [
        ["N", "CA", "CB", "CG", "ND1", "CE1", "ND2"],
        ["CA", "CB", "CG", "ND1", "CE1", "ND2"],
    ],
    "ILE": [["N", "CA", "CB", "CG1", "CG2", "CD1"], ["CA", "CB", "CG1", "CD1", "CG2"]],
    "LEU": [["N", "CA", "CB", "CG", "CD1", "CD2"], ["CA", "CB", "CG", "CD1", "CD2"]],
    "LYS": [
        ["N", "CA", "CB", "CG", "CD", "CE", "NZ"],
        ["CA", "CB", "CG", "CD", "CE", "NZ"],
        ["CB", "CG", "CD", "CE", "NZ"],
        ["CG", "CD", "CE", "NZ"],
    ],
    "MET": [
        ["N", "CA", "CB", "CG", "SD", "CE"],
        ["CA", "CB", "CG", "SD", "CE"],
        ["CB", "CG", "SD", "CE"],
    ],
    "PHE": [
        ["N", "CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ"],
        ["CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ"],
    ],
    "PRO": [["N", "CA", "CB", "CG", "CD"], ["CA", "CB", "CG", "CD"]],
    "SER": [["N", "CA", "CB", "OG"]],
    "THR": [["N", "CA", "CB", "OG1", "CG2"]],
    "TRP": [
        ["N", "CA", "CB", "CG", "CD1", "CD2", "CE2", "CE3", "NE1", "CZ2", "CZ3", "CH2"],
        ["CA", "CB", "CG", "CD1", "CD2", "CE2", "CE3", "NE1", "CZ2", "CZ3", "CH2"],
    ],
    "TYR": [
        ["N", "CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "OH"],
        ["CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "OH"],
    ],
    "VAL": [["N", "CA", "CB", "CG1", "CG2"]],
}

# Data taken from http://web.expasy.org/protscale/ unless otherwise stated. Original reference also given.
# Levitt M. Biochemistry 17:4277-4285(1978)
a_helix_Levitt = {
    "A": 1.29,
    "C": 1.11,
    "D": 1.04,
    "E": 1.44,
    "F": 1.07,
    "G": 0.56,
    "H": 1.22,
    "I": 0.97,
    "K": 1.23,
    "L": 1.3,
    "M": 1.47,
    "N": 0.9,
    "P": 0.52,
    "Q": 1.27,
    "R": 0.96,
    "S": 0.82,
    "T": 0.82,
    "V": 0.91,
    "W": 0.99,
    "Y": 0.72,
}


# Janin J. Nature 277:491-492(1979)
accessibility_Janin = {
    "A": 6.6,
    "C": 0.9,
    "D": 7.7,
    "E": 5.7,
    "F": 2.4,
    "G": 6.7,
    "H": 2.5,
    "I": 2.8,
    "K": 10.3,
    "L": 4.8,
    "M": 1.0,
    "N": 6.7,
    "P": 4.8,
    "Q": 5.2,
    "R": 4.5,
    "S": 9.4,
    "T": 7.0,
    "V": 4.5,
    "W": 1.4,
    "Y": 5.1,
}

# Bhaskaran R., Ponnuswamy P.K. Int. J. Pept. Protein. Res. 32:242-255(1988)
avg_flex_index = {
    "A": 0.36,
    "C": 0.35,
    "D": 0.51,
    "E": 0.5,
    "F": 0.31,
    "G": 0.54,
    "H": 0.32,
    "I": 0.46,
    "K": 0.47,
    "L": 0.37,
    "M": 0.3,
    "N": 0.46,
    "P": 0.51,
    "Q": 0.49,
    "R": 0.53,
    "S": 0.51,
    "T": 0.44,
    "V": 0.39,
    "W": 0.31,
    "Y": 0.42,
}

# Levitt M. Biochemistry 17:4277-4285(1978)
beta_sheet_Levitt = {
    "A": 0.9,
    "C": 0.74,
    "D": 0.72,
    "E": 0.75,
    "F": 1.32,
    "G": 0.92,
    "H": 1.08,
    "I": 1.45,
    "K": 0.77,
    "L": 1.02,
    "M": 0.97,
    "N": 0.76,
    "P": 0.64,
    "Q": 0.8,
    "R": 0.99,
    "S": 0.95,
    "T": 1.21,
    "V": 1.49,
    "W": 1.14,
    "Y": 1.25,
}

# Levitt M. Biochemistry 17:4277-4285(1978)
beta_turn_Levitt = {
    "A": 0.77,
    "C": 0.81,
    "D": 1.41,
    "E": 0.99,
    "F": 0.59,
    "G": 1.64,
    "H": 0.68,
    "I": 0.51,
    "K": 0.96,
    "L": 0.58,
    "M": 0.41,
    "N": 1.28,
    "P": 1.91,
    "Q": 0.98,
    "R": 0.88,
    "S": 1.32,
    "T": 1.04,
    "V": 0.47,
    "W": 0.76,
    "Y": 1.05,
}

# Zimmerman J.M., Eliezer N., Simha R. J. Theor. Biol. 21:170-201(1968)
bulkiness = {
    "A": 11.5,
    "C": 13.46,
    "D": 11.68,
    "E": 13.57,
    "F": 19.8,
    "G": 3.4,
    "H": 13.69,
    "I": 21.4,
    "K": 15.71,
    "L": 21.4,
    "M": 16.25,
    "N": 12.82,
    "P": 17.43,
    "Q": 14.45,
    "R": 14.28,
    "S": 9.47,
    "T": 15.77,
    "V": 21.57,
    "W": 21.67,
    "Y": 18.03,
}

# Kyte J., Doolittle R.F. J. Mol. Biol. 157:105-132(1982)
hydropathicity = {
    "A": 1.8,
    "C": 2.5,
    "D": -3.5,
    "E": -3.5,
    "F": 2.8,
    "G": -0.4,
    "H": -3.2,
    "I": 4.5,
    "K": -3.9,
    "L": 3.8,
    "M": 1.9,
    "N": -3.5,
    "P": -1.6,
    "Q": -3.5,
    "R": -4.5,
    "S": -0.8,
    "T": -0.7,
    "V": 4.2,
    "W": -0.9,
    "Y": -1.3,
}

# http://biopython.org/DIST/docs/api/Bio.PDB.DSSP%27-pysrc.html
# Sander & Rost, (1994), Proteins, 20:216-226
max_asa = {
    "A": 106.0,
    "C": 135.0,
    "D": 163.0,
    "E": 194.0,
    "F": 197.0,
    "G": 84.0,
    "H": 184.0,
    "I": 169.0,
    "K": 205.0,
    "L": 164.0,
    "M": 188.0,
    "N": 157.0,
    "P": 136.0,
    "Q": 198.0,
    "R": 135.0,
    "S": 130.0,
    "T": 142.0,
    "V": 142.0,
    "W": 227.0,
    "Y": 222.0,
}

# Grantham R. Science 185:862-864(1974)
polarity_Grantham = {
    "A": 8.1,
    "C": 5.5,
    "D": 13.0,
    "E": 12.3,
    "F": 5.2,
    "G": 9.0,
    "H": 10.4,
    "I": 5.2,
    "K": 11.3,
    "L": 4.9,
    "M": 5.7,
    "N": 11.6,
    "P": 8.0,
    "Q": 10.5,
    "R": 10.5,
    "S": 9.2,
    "T": 8.6,
    "V": 5.9,
    "W": 5.4,
    "Y": 6.2,
}

# Zimmerman J.M., Eliezer N., Simha R. J. Theor. Biol. 21:170-201(1968)
polarity_Zimmerman = {
    "A": 0.0,
    "C": 1.48,
    "D": 49.7,
    "E": 49.9,
    "F": 0.35,
    "G": 0.0,
    "H": 51.6,
    "I": 0.13,
    "K": 49.5,
    "L": 0.13,
    "M": 1.43,
    "N": 3.38,
    "P": 1.58,
    "Q": 3.53,
    "R": 52.0,
    "S": 1.67,
    "T": 1.66,
    "V": 0.13,
    "W": 2.1,
    "Y": 1.61,
}

# Fraga S. Can. J. Chem. 60:2606-2610(1982)
recognition_factors = {
    "A": 78.0,
    "C": 89.0,
    "D": 81.0,
    "E": 78.0,
    "F": 81.0,
    "G": 84.0,
    "H": 84.0,
    "I": 88.0,
    "K": 87.0,
    "L": 85.0,
    "M": 80.0,
    "N": 94.0,
    "P": 91.0,
    "Q": 87.0,
    "R": 95.0,
    "S": 107.0,
    "T": 93.0,
    "V": 89.0,
    "W": 104.0,
    "Y": 84.0,
}

# Jones. D.D. J. Theor. Biol. 50:167-184(1975)
refractivity = {
    "A": 4.34,
    "C": 35.77,
    "D": 12.0,
    "E": 17.26,
    "F": 29.4,
    "G": 0.0,
    "H": 21.81,
    "I": 19.06,
    "K": 21.29,
    "L": 18.78,
    "M": 21.64,
    "N": 13.28,
    "P": 10.93,
    "Q": 17.56,
    "R": 26.66,
    "S": 6.35,
    "T": 11.01,
    "V": 13.92,
    "W": 42.53,
    "Y": 31.53,
}

# Dayhoff M.O., Schwartz R.M., Orcutt B.C. In "Atlas of Protein Sequence and Structure", Vol.5, Suppl.3 (1978)
relative_mutability = {
    "A": 100.0,
    "C": 20.0,
    "D": 106.0,
    "E": 102.0,
    "F": 41.0,
    "G": 49.0,
    "H": 66.0,
    "I": 96.0,
    "K": 56.0,
    "L": 40.0,
    "M": 94.0,
    "N": 134.0,
    "P": 56.0,
    "Q": 93.0,
    "R": 65.0,
    "S": 120.0,
    "T": 97.0,
    "V": 74.0,
    "W": 18.0,
    "Y": 41.0,
}

# Meek J.L. Proc. Natl. Acad. Sci. USA 77:1632-1636(1980)
retention_coeff_hplc_pH7pt4 = {
    "A": 0.5,
    "C": -6.8,
    "D": -8.2,
    "E": -16.9,
    "F": 13.2,
    "G": 0.0,
    "H": -3.5,
    "I": 13.9,
    "K": 0.1,
    "L": 8.8,
    "M": 4.8,
    "N": 0.8,
    "P": 6.1,
    "Q": -4.8,
    "R": 0.8,
    "S": 1.2,
    "T": 2.7,
    "V": 2.7,
    "W": 14.9,
    "Y": 6.1,
}

# Zhao, G., London E. Protein Sci. 15:1987-2001(2006)
transmembrane_tendancy = {
    "A": 0.38,
    "C": -0.3,
    "D": -3.27,
    "E": -2.9,
    "F": 1.98,
    "G": -0.19,
    "H": -1.44,
    "I": 1.97,
    "K": -3.46,
    "L": 1.82,
    "M": 1.4,
    "N": -1.62,
    "P": -1.44,
    "Q": -1.84,
    "R": -2.57,
    "S": -0.53,
    "T": -0.32,
    "V": 1.46,
    "W": 1.53,
    "Y": 0.49,
}

# Bairoch A. Release notes for UniProtKB/Swiss-Prot release 2013_04 - April 2013
uniprot_composition_2013 = {
    "A": 8.25,
    "C": 1.37,
    "D": 5.45,
    "E": 6.75,
    "F": 3.86,
    "G": 7.07,
    "H": 2.27,
    "I": 5.96,
    "K": 5.84,
    "L": 9.66,
    "M": 2.42,
    "N": 4.06,
    "P": 4.7,
    "Q": 3.93,
    "R": 5.53,
    "S": 6.56,
    "T": 5.34,
    "V": 6.87,
    "W": 1.08,
    "Y": 2.92,
}


number_of_codons = {
    "A": 4,
    "C": 1,
    "D": 2,
    "E": 2,
    "F": 2,
    "G": 4,
    "H": 2,
    "I": 3,
    "K": 2,
    "L": 6,
    "M": 1,
    "N": 2,
    "P": 4,
    "Q": 2,
    "R": 6,
    "S": 6,
    "T": 4,
    "V": 4,
    "W": 1,
    "Y": 2,
}

# pI, pk_COOH, pK_NH3, pK_Rgroup all taken from:
# http://www.sigmaaldrich.com/life-science/metabolomics/learning-center/amino-acid-reference-chart.html#prop'
# D. R. Lide, Handbook of Chemistry and Physics, 72nd Edition, CRC Press, Boca Raton, FL, 1991.
pI = {
    "A": 6.0,
    "C": 5.07,
    "D": 2.77,
    "E": 3.22,
    "F": 5.48,
    "G": 5.97,
    "H": 7.59,
    "I": 6.02,
    "K": 9.74,
    "L": 5.98,
    "M": 5.74,
    "N": 5.41,
    "P": 6.3,
    "Q": 5.65,
    "R": 10.76,
    "S": 5.68,
    "T": 5.6,
    "V": 5.96,
    "W": 5.89,
    "Y": 5.66,
}

pK_COOH = {
    "A": 2.34,
    "C": 1.96,
    "D": 1.88,
    "E": 2.19,
    "F": 1.83,
    "G": 2.34,
    "H": 1.82,
    "I": 2.36,
    "K": 2.18,
    "L": 2.36,
    "M": 2.28,
    "N": 2.02,
    "P": 1.99,
    "Q": 2.17,
    "R": 2.17,
    "S": 2.21,
    "T": 2.09,
    "V": 2.32,
    "W": 2.83,
    "Y": 2.2,
}

pK_NH3 = {
    "A": 9.69,
    "C": 10.28,
    "D": 9.6,
    "E": 9.67,
    "F": 9.13,
    "G": 9.6,
    "H": 9.17,
    "I": 9.6,
    "K": 8.95,
    "L": 9.6,
    "M": 9.21,
    "N": 8.8,
    "P": 10.6,
    "Q": 9.13,
    "R": 9.04,
    "S": 9.15,
    "T": 9.1,
    "V": 9.62,
    "W": 9.39,
    "Y": 9.11,
}

pK_Rgroup = {
    "A": None,
    "C": 8.18,
    "D": 3.65,
    "E": 4.25,
    "F": None,
    "G": None,
    "H": 6.0,
    "I": None,
    "K": 10.53,
    "L": None,
    "M": None,
    "N": None,
    "P": None,
    "Q": None,
    "R": 12.48,
    "S": None,
    "T": None,
    "V": None,
    "W": None,
    "Y": 10.07,
}


[docs]def get_aa_code(aa_letter): """Get three-letter aa code if possible. If not, return None. If three-letter code is None, will have to find this later from the filesystem. Parameters ---------- aa_letter : str One-letter amino acid code. Returns ------- aa_code : str, or None Three-letter aa code. """ aa_code = None if aa_letter != "X": for key, val in standard_amino_acids.items(): if key == aa_letter: aa_code = val return aa_code
[docs]def get_aa_letter(aa_code): """Get one-letter version of aa_code if possible. If not, return 'X'. Parameters ---------- aa_code : str Three-letter amino acid code. Returns ------- aa_letter : str One-letter aa code. Default value is 'X'. """ aa_letter = "X" for key, val in standard_amino_acids.items(): if val == aa_code: aa_letter = key return aa_letter
[docs]def get_aa_info(code): """Get dictionary of information relating to a new amino acid code not currently in the database. Notes ----- Use this function to get a dictionary that is then to be sent to the function add_amino_acid_to_json(). use to fill in rows of amino_acid table for new amino acid code. Parameters ---------- code : str Three-letter amino acid code. Raises ------ IOError If unable to locate the page associated with the amino acid name on the PDBE site. Returns ------- aa_dict : dict Keys are AminoAcidDB field names. Values are the str values for the new amino acid, scraped from the PDBE if possible. None if not found. """ letter = "X" # Try to get content from PDBE. url_string = ( "http://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/{0}".format(code) ) r = requests.get(url_string) # Raise error if content not obtained. if not r.ok: raise IOError("Could not get to url {0}".format(url_string)) # Parse r.text in an ugly way to get the required information. description = r.text.split("<h3>Molecule name")[1].split("</tr>")[0] description = description.strip().split("\n")[3].strip()[:255] modified = r.text.split("<h3>Standard parent ")[1].split("</tr>")[0] modified = modified.replace(" ", "").replace("\n", "").split("<")[-3].split(">")[-1] if modified == "NotAssigned": modified = None # Add the required information to a dictionary which can then be passed to add_amino_acid_to_json. aa_dict = { "code": code, "description": description, "modified": modified, "letter": letter, } return aa_dict
[docs]def add_amino_acid_to_json( code, description, letter="X", modified=None, force_add=False ): """Add an amino acid to the amino_acids.json file used to populate the amino_acid table. Parameters ---------- code : str New code to be added to amino acid table. description : str Description of the amino acid, e.g. 'amidated terminal carboxy group'. letter : str, optional One letter code for the amino acid. Defaults to 'X' modified : str or None, optional Code of modified amino acid, e.g. 'ALA', or None. Defaults to None force_add : bool, optional If True, will over-write existing dictionary value for code if already in amino_acids.json. If False, then an IOError is raised if code is already in amino_acids.json. Raises ------ IOError If code is already in amino_acids.json and force_add is False. Returns ------- None """ # If code is already in the dictionary, raise an error if (not force_add) and code in AMINO_ACIDS_DATA.keys(): raise IOError( "{0} is already in the amino_acids dictionary, with values: {1}".format( code, AMINO_ACIDS_DATA[code] ) ) # Prepare data to be added. add_code = code add_code_dict = {"description": description, "letter": letter, "modified": modified} # Check that data does not already exist, and if not, add it to the dictionary. AMINO_ACIDS_DATA[add_code] = add_code_dict # Write over json file with updated dictionary. with open(AA_PATH, "w") as foo: foo.write(json.dumps(AMINO_ACIDS_DATA)) return
__author__ = "Jack W. Heal"