Source code for isambard.modelling.scwrl

"""This module provides an interface to the program Scwrl4.

The Scwrl executable must be on your path. Run the `test_scwrl`
function to determine if it is available.

For more information on Scwrl see [1].

References
----------
.. [1] Krivov GG, Shapovalov MV, and Dunbrack Jr RL (2009) "Improved
   prediction of protein side-chain conformations with SCWRL4.",
   Proteins.
"""

import os
import subprocess
import tempfile
import re

import ampal


[docs]def scwrl_available(): """True if Scwrl is available.""" available = False try: subprocess.check_output(['Scwrl4'], stderr=subprocess.DEVNULL) except subprocess.CalledProcessError: available = True except FileNotFoundError: print("Scwrl4 has not been found on your path. If you have already " "installed Scwrl but are unsure how to add it to your path, " "check out this: https://stackoverflow.com/a/14638025")
return available
[docs]def run_scwrl(pdb, sequence, path=True, rigid_rotamer_model=True, hydrogens=False): """Runs SCWRL on input PDB strong or path to PDB and a sequence string. Parameters ---------- pdb : str PDB string or a path to a PDB file. sequence : str Amino acid sequence for SCWRL to pack in single-letter code. path : bool, optional True if pdb is a path. rigid_rotamer_model : bool, optional If True, Scwrl will use the rigid-rotamer model, which is faster but less accurate. hydrogens : bool, optional If False, the hydrogens produced by Scwrl will be ommitted. Returns ------- scwrl_std_out : str Std out from SCWRL. scwrl_pdb : str String of packed SCWRL PDB. Raises ------ ChildProcessError Raised if SCWRL failed to run. """ if path: with open(pdb, 'r') as inf: pdb = inf.read() pdb = pdb.encode() sequence = sequence.encode() try: with tempfile.NamedTemporaryFile(delete=False) as scwrl_tmp,\ tempfile.NamedTemporaryFile(delete=False) as scwrl_seq,\ tempfile.NamedTemporaryFile(delete=False) as scwrl_out: scwrl_tmp.write(pdb) scwrl_tmp.seek(0) # Resets the buffer back to the first line scwrl_seq.write(sequence) scwrl_seq.seek(0) scwrl_command = ['Scwrl4', '-i', scwrl_tmp.name, '-o', scwrl_out.name, '-s', scwrl_seq.name] if rigid_rotamer_model: scwrl_command.append('-v') if not hydrogens: scwrl_command.append('-h') scwrl_std_out = subprocess.check_output(scwrl_command) scwrl_out.seek(0) scwrl_pdb = scwrl_out.read() finally: os.remove(scwrl_tmp.name) os.remove(scwrl_out.name) os.remove(scwrl_seq.name) if not scwrl_pdb: raise ChildProcessError( 'SCWRL failed to run. SCWRL:\n{}'.format(scwrl_std_out))
return scwrl_std_out.decode(), scwrl_pdb.decode()
[docs]def parse_scwrl_out(scwrl_std_out, scwrl_pdb): """Parses SCWRL output and returns PDB and SCWRL score. Parameters ---------- scwrl_std_out : str Std out from SCWRL. scwrl_pdb : str String of packed SCWRL PDB. Returns ------- fixed_scwrl_str : str String of packed SCWRL PDB, with correct PDB format. score : float SCWRL Score """ score = re.findall( r'Total minimal energy of the graph = ([-0-9.]+)', scwrl_std_out)[0] # Add temperature factors to SCWRL out split_scwrl = scwrl_pdb.splitlines() fixed_scwrl = [] for line in split_scwrl: if len(line) < 80: line += ' ' * (80 - len(line)) if re.search(r'H?E?T?ATO?M\s+\d+.+', line): front = line[:61] temp_factor = ' 0.00' back = line[66:] fixed_scwrl.append(''.join([front, temp_factor, back])) else: fixed_scwrl.append(line) fixed_scwrl_str = '\n'.join(fixed_scwrl) + '\n'
return fixed_scwrl_str, float(score)
[docs]def pack_side_chains_scwrl(assembly, sequences, rigid_rotamer_model=True, hydrogens=False): """Packs side chains onto a protein structure. Parameters ---------- assembly : AMPAL Assembly AMPAL object containing some protein structure. sequence : [str] A list of amino acid sequences in single-letter code for Scwrl to pack. rigid_rotamer_model : bool, optional If True, Scwrl will use the rigid-rotamer model, which is faster but less accurate. hydrogens : bool, optional If False, the hydrogens produced by Scwrl will be ommitted. Returns ------- packed_structure : AMPAL Assembly A new AMPAL Assembly containing the packed structure, with the Scwrl score in the tags. """ if not scwrl_available(): raise ValueError('Scwrl4 is unavailable on your system path.') protein = [x for x in assembly if isinstance(x, ampal.Polypeptide)] total_seq_len = sum([len(x) for x in sequences]) total_aa_len = sum([len(x) for x in protein]) if total_seq_len != total_aa_len: raise ValueError('Total sequence length ({}) does not match ' 'total Polypeptide length ({}).'.format( total_seq_len, total_aa_len)) if len(protein) != len(sequences): raise ValueError('Number of sequences ({}) does not match ' 'number of Polypeptides ({}).'.format( len(sequences), len(protein))) scwrl_std_out, scwrl_pdb = run_scwrl( assembly.pdb, ''.join(sequences), path=False, rigid_rotamer_model=rigid_rotamer_model, hydrogens=hydrogens) packed_structure, scwrl_score = parse_scwrl_out(scwrl_std_out, scwrl_pdb) new_assembly = ampal.load_pdb(packed_structure, path=False) new_assembly.tags['scwrl_score'] = scwrl_score
return new_assembly __author__ = 'Christopher W. Wood'