import warnings
try:
from pymol import cmd
except ImportError:
warnings.warn(
'Cannot import PyMOL: functionality will suffer (you can ignore this if you are just building the documentation).')
import itertools
from .utils import set_dihedral
from typing import Union, List, Tuple, Optional, Iterable
from .secstructdb import SecondaryStructureDB
import re
def is_amino_acid(selection: str) -> bool:
"""Decide if the selection is a single amino-acid
:param selection: the selection
:type selection: str
:return: True or False
:rtype: bool
"""
if not cmd.count_atoms('({}) and name N and symbol N'.format(selection)) == 1:
# either does not contain a nitrogen called "N" (no amino-acid), or
# contains more of them (not a single residue)
return False
elif not cmd.count_atoms('({}) and name C and symbol C'.format(selection)) == 1:
# either does not contain a carbon called "C" (no amino-acid),
# or contains more of them (not a single residue)
return False
elif not cmd.count_atoms(
'({}) and (neighbor (name C and symbol C)) and (name CA and symbol C)'.format(selection)) == 1:
# the "C" atom must have a "CA" neighbour
return False
else:
return True
def is_beta(selection: str) -> bool:
"""Decide if a residue is a beta-amino acid
:param selection: the selection to analyze
:type selection: str
:return: True or False
:rtype: bool
"""
if not is_amino_acid(selection):
return False
# see if "CA" has a neighbour which is also the neighbour of "N" and has the name "CB" or "CB1"
if cmd.count_atoms(
"(neighbor (({0}) and name N)) and (neighbor (({0}) and name CA)) and ({0}) and name CB+CB1".format(
selection)) == 1:
return True
return False
def is_alpha(selection: str) -> bool:
"""Decide if a residue is an alpha-amino acid
:param selection: the selection to analyze
:type selection: str
:return: True or False
:rtype: bool
"""
if not is_amino_acid(selection):
return False
# see if "CA" and "N" are neighbours
if cmd.count_atoms("(neighbor (({0}) and name N)) and ({0}) and name CA".format(selection)) == 1:
return True
return False
[docs]def fold_bp(sstype: str, selection: str = '(all)'):
"""
DESCRIPTION
Adjust the torsion angles of an alpha- or beta-peptide to
fold it into the desired secondary structure
USAGE
fold_bp sstype [, selection]
ARGUMENTS
sstype = string: the desired secondary structure. The following possibilities exist:
1. a name from the secondary structure database (see command ssdb_list)
2. two or three (for alpha- and beta-amino acids, respectively) space-separated
floating point numbers in parentheses, corresponding to the backbone dihedral angles
The such given secondary structure will be applied to all residues in the selection.
Additionally, a space-separated list of the same length as the residues in the
selection can be given in square brackets, containing entry names or angle tuples
(or a mix of them), corresponding to the residues.
selection = the selection to operate on. Must be a single peptide chain with
unique and consecutive residue IDs (default: all)
EXAMPLES
fold_bp H14M, model valxval
fold_bp (-140.3 66.5 -136.8), model valxval
fold_bp [(-140.3 66.5 -136.8) (180 180 180) H14M], model tripeptide
SEE ALSO
ssdb_add, ssdb_del, ssdb_dihedrals, ssdb_resetdefaults, ssdb_list
"""
if isinstance(sstype, str):
# parse the string into a list of tuples of floats or Nones
sstype = parse_sstype(sstype)
elif isinstance(sstype, tuple):
# we should have a tuple of floats or Nones
if not all([isinstance(x, float) or x is None for x in sstype]):
raise ValueError('Error in secondary structure type: {}'.format(sstype))
sstype = itertools.cycle([sstype]) # use this for all residues
# otherwise try to use sstype as an iterable, producing tuples of floats or Nones
residues = list(sorted({a.resi_number for a in cmd.get_model(selection).atom}))
r = None
for r, angles in zip(residues, sstype):
phi, theta, psi = angles
if is_beta('({}) and resi {}'.format(selection, r)):
set_dihedral(selection, ('C', r - 1), ('N', r), ('CB+CB1', r), ('CA', r), phi)
set_dihedral(selection, ('N', r), ('CB+CB1', r), ('CA', r), ('C', r), theta)
set_dihedral(selection, ('CB+CB1', r), ('CA', r), ('C', r), ('N', r + 1), psi)
elif is_alpha('({}) and resi {}'.format(selection, r)):
set_dihedral(selection, ('C', r - 1), ('N', r), ('CA', r), ('C', r), phi)
set_dihedral(selection, ('N', r), ('CA', r), ('C', r), ('N', r + 1), psi)
else:
# not an amino acid, do nothing with this.
continue
if not r == max(residues):
# after the for loop, r must be the largest residue number. If this is not the case,
# too few angle triplets were given. Do nothing at present, just warn the user
print('Warning: not all residues have been processed (too few secondary structures given)')
cmd.unpick()
cmd.orient(selection)
def parse_sstype(sstype: Union[
str, Tuple[float, Optional[float], float], List[Union[str, Tuple[float, Optional[float], float]]]]) -> Iterable[
Tuple[float, Optional[float], float]]:
"""Parse the secondary structure information for beta- and alpha-peptides
The desired secondary structure type can be given in two ways:
1. three floating point numbers in parentheses, separated by space:
the backbone torsion angles. The middle one can be None for alpha-amino acids.
2. a name of an entry in the secondary structure database (simple string)
Additionally, a list can be given from any of the above, inside square brackets and
separated by whitespace. E.g.:
[ (-120 80 -136) H14M (130 50 45) ... ]
The thing is more complicated because PyMOL gives all parameters in string format.
"""
# now define some regular expressions which we will use
# a regular expression for floating point numbers, including the exponential form
float_regex = r"""([+-]\s*)? # optional sign and whitespace
( # start of the mantissa
(\d+(\.\d*)?) # one option: some digits, then optionally some decimals
| or
(\.\d+) # second option: only the decimals, led in by a decimal point
) # end of the mantissa
([eE][+-]?\d+)? # optionally, an exponent
"""
# a regular expression of a parenthesized part (without nested parentheses)
paren_regex = re.compile(r"\([^()]*\)")
sstuple_regex = re.compile(r""" #regular expression for dihedral angle tuples
(
\(\s*(?P<angle1>{0})\s*(?P<angle2>{0})\s*((?P<angle3>{0})\s*)?\) # two or three floats in parentheses, whitespace separated
)
| # or
(
\(\s*(?P<phi>{0})\s*(?P<theta>{0}|None)\s*(?P<psi>{0})\s*\) # three floats, the middle can be None, in parentheses, whitespace separated
)
""".format(float_regex),
re.VERBOSE)
def parse_parentuple(parenpart: str) -> Tuple[float, Optional[float], float]:
"""Parse a parenthesized part of a sec.structure definition string"""
m = sstuple_regex.match(parenpart)
if m is None:
raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart))
if m['angle1'] is not None and m['angle2'] is not None and m['angle3'] is not None:
# three angles
return (float(m['angle1']), float(m['angle2']), float(m['angle3']))
elif m['angle1'] is not None and m['angle2'] is not None and m['angle3'] is None:
# two angles
return (float(m['angle1']), None, float(m['angle2']))
elif m['angle1'] is not None or m['angle2'] is not None or m['angle3'] is not None:
raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart))
elif m['phi'] is not None and m['theta'] == 'None' and m['psi'] is not None:
return (float(m['phi']), None, float(m['psi']))
elif m['phi'] is not None and m['theta'] is not None and m['psi'] is not None:
return (float(m['phi']), float(m['theta']), float(m['psi']))
else:
raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart))
# now start working in earnest.
if isinstance(sstype, tuple):
# this must be a tuple of three floats, the middle one can be None
if len(sstype) != 3:
raise ValueError('Invalid tuple length: {}'.format(len(sstype)))
elif not (isinstance(sstype[0], float) and
(isinstance(sstype[1], float) or sstype[1] is None) and
(isinstance(sstype[2], float))):
raise ValueError(
'Incorrect torsion angle type(s): {}, {}, {}'.format(type(sstype[0]), type(sstype[1]), type(sstype[2])))
else:
return itertools.cycle([sstype]) # one-applies-for-all case
elif isinstance(sstype, list):
# see if every element of the list is a correct tuple or a string. If a string, find the corresponding entry
# in the secondary structure database.
sstype_parsed = []
for item in sstype:
if isinstance(item, str):
# this can fail with KeyError
item = SecondaryStructureDB.dihedrals(item)
elif isinstance(item, tuple):
# validate it by simply calling ourselves again
item = parse_sstype(item)[0]
else:
raise TypeError('Invalid type: {}, ({})'.format(item, type(item)))
sstype_parsed.append(item)
return sstype_parsed # list of tuples
elif isinstance(sstype, str):
# this is the most complicated part.
sstype = sstype.strip() # strip leading whitespaces
# some validation (not complete, i.e. mismatched/mixed up parentheses or brackets are not detected
if sstype.count('[') != sstype.count(']'): # 0==0 can also happen
raise ValueError('Unmatched brackets in secondary structure type {}'.format(sstype))
if sstype.count('(') != sstype.count(')'): # 0==0 can also happen
raise ValueError('Unmatched parentheses in secondary structure type {}'.format(sstype))
if sstype.startswith('[') and sstype.endswith(']'): # parse it as a list
sstype = sstype[1:-1].strip() # cut the brackets
# first find the parts in parentheses
parentheses = [(x.start(), x.end()) for x in paren_regex.finditer(sstype)]
items = []
def parse_outsideofparentheses_part(line: str):
# If "x" does not contain parentheses:
# 1. strip it
# 2. split it up at whitespaces
# 3. find the dihedrals in the SSDB
# print('Parsing outside parentheses: *{}*'.format(line))
return [SecondaryStructureDB.dihedrals(x) for x in line.strip().split()]
if not parentheses:
# parse the whole string as a list of entry names
# print('No parentheses found.')
items = parse_outsideofparentheses_part(sstype)
else:
# parse each pair of parentheses and the parts inbetween them.
for ip in range(len(parentheses)):
parenpart = sstype[parentheses[ip][0]:parentheses[ip][1]]
# print('This parenthesis pair: {} to {} = *{}*'.format(*parentheses[ip], parenpart))
if ip == 0:
# parse the part before the first pair of parentheses
items.extend(parse_outsideofparentheses_part(sstype[0:parentheses[0][0]]))
else:
# parse the part before this pair of parentheses and after the previous pair of parentheses
items.extend(parse_outsideofparentheses_part(sstype[parentheses[ip - 1][1]:parentheses[ip][0]]))
# now parse the parenthesized part
items.append(parse_parentuple(parenpart))
# parse the end of the string after the last pair of parentheses
items.extend(parse_outsideofparentheses_part(sstype[parentheses[-1][1]:]))
return items # list of tuples
elif '[' in sstype:
raise ValueError(
'Opening bracket must be the first character in the secondary structure type {}'.format(sstype))
elif sstype.startswith('(') and sstype.endswith(')'):
return itertools.cycle([parse_parentuple(sstype)]) # one-applies-for-all case
elif '(' in sstype:
raise ValueError(
'Opening parenthesis must be the first character in the secondary structure type {}'.format(sstype))
else:
# no [, no ], no ( and no ) in the string, it must be an entry in the secondary structure database
return itertools.cycle([SecondaryStructureDB.dihedrals(sstype)]) # one-applies-for-all case