Source code for pmlbeta.setbetahelix

import warnings

try:
    from pymol import cmd
except ImportError:
    warnings.warn(
        'Cannot import PyMOL: functionality will suffer (you can ignore this if you are just building the documentation).')
import itertools
from .utils import set_dihedral
from typing import Union, List, Tuple, Optional, Iterable
from .secstructdb import SecondaryStructureDB
import re


def is_amino_acid(selection: str) -> bool:
    """Decide if the selection is a single amino-acid

    :param selection: the selection
    :type selection: str
    :return: True or False
    :rtype: bool
    """
    if not cmd.count_atoms('({}) and name N and symbol N'.format(selection)) == 1:
        # either does not contain a nitrogen called "N" (no amino-acid), or
        # contains more of them (not a single residue)
        return False
    elif not cmd.count_atoms('({}) and name C and symbol C'.format(selection)) == 1:
        # either does not contain a carbon called "C" (no amino-acid),
        # or contains more of them (not a single residue)
        return False
    elif not cmd.count_atoms(
            '({}) and (neighbor (name C and symbol C)) and (name CA and symbol C)'.format(selection)) == 1:
        # the "C" atom must have a "CA" neighbour
        return False
    else:
        return True


def is_beta(selection: str) -> bool:
    """Decide if a residue is a beta-amino acid

    :param selection: the selection to analyze
    :type selection: str
    :return: True or False
    :rtype: bool
    """
    if not is_amino_acid(selection):
        return False
    # see if "CA" has a neighbour which is also the neighbour of "N" and has the name "CB" or "CB1"
    if cmd.count_atoms(
            "(neighbor (({0}) and name N)) and (neighbor (({0}) and name CA)) and ({0}) and name CB+CB1".format(
                selection)) == 1:
        return True
    return False


def is_alpha(selection: str) -> bool:
    """Decide if a residue is an alpha-amino acid

    :param selection: the selection to analyze
    :type selection: str
    :return: True or False
    :rtype: bool
    """
    if not is_amino_acid(selection):
        return False
    # see if "CA" and "N" are neighbours
    if cmd.count_atoms("(neighbor (({0}) and name N)) and ({0}) and name CA".format(selection)) == 1:
        return True
    return False


[docs]def fold_bp(sstype: str, selection: str = '(all)'): """ DESCRIPTION Adjust the torsion angles of an alpha- or beta-peptide to fold it into the desired secondary structure USAGE fold_bp sstype [, selection] ARGUMENTS sstype = string: the desired secondary structure. The following possibilities exist: 1. a name from the secondary structure database (see command ssdb_list) 2. two or three (for alpha- and beta-amino acids, respectively) space-separated floating point numbers in parentheses, corresponding to the backbone dihedral angles The such given secondary structure will be applied to all residues in the selection. Additionally, a space-separated list of the same length as the residues in the selection can be given in square brackets, containing entry names or angle tuples (or a mix of them), corresponding to the residues. selection = the selection to operate on. Must be a single peptide chain with unique and consecutive residue IDs (default: all) EXAMPLES fold_bp H14M, model valxval fold_bp (-140.3 66.5 -136.8), model valxval fold_bp [(-140.3 66.5 -136.8) (180 180 180) H14M], model tripeptide SEE ALSO ssdb_add, ssdb_del, ssdb_dihedrals, ssdb_resetdefaults, ssdb_list """ if isinstance(sstype, str): # parse the string into a list of tuples of floats or Nones sstype = parse_sstype(sstype) elif isinstance(sstype, tuple): # we should have a tuple of floats or Nones if not all([isinstance(x, float) or x is None for x in sstype]): raise ValueError('Error in secondary structure type: {}'.format(sstype)) sstype = itertools.cycle([sstype]) # use this for all residues # otherwise try to use sstype as an iterable, producing tuples of floats or Nones residues = list(sorted({a.resi_number for a in cmd.get_model(selection).atom})) r = None for r, angles in zip(residues, sstype): phi, theta, psi = angles if is_beta('({}) and resi {}'.format(selection, r)): set_dihedral(selection, ('C', r - 1), ('N', r), ('CB+CB1', r), ('CA', r), phi) set_dihedral(selection, ('N', r), ('CB+CB1', r), ('CA', r), ('C', r), theta) set_dihedral(selection, ('CB+CB1', r), ('CA', r), ('C', r), ('N', r + 1), psi) elif is_alpha('({}) and resi {}'.format(selection, r)): set_dihedral(selection, ('C', r - 1), ('N', r), ('CA', r), ('C', r), phi) set_dihedral(selection, ('N', r), ('CA', r), ('C', r), ('N', r + 1), psi) else: # not an amino acid, do nothing with this. continue if not r == max(residues): # after the for loop, r must be the largest residue number. If this is not the case, # too few angle triplets were given. Do nothing at present, just warn the user print('Warning: not all residues have been processed (too few secondary structures given)') cmd.unpick() cmd.orient(selection)
def parse_sstype(sstype: Union[ str, Tuple[float, Optional[float], float], List[Union[str, Tuple[float, Optional[float], float]]]]) -> Iterable[ Tuple[float, Optional[float], float]]: """Parse the secondary structure information for beta- and alpha-peptides The desired secondary structure type can be given in two ways: 1. three floating point numbers in parentheses, separated by space: the backbone torsion angles. The middle one can be None for alpha-amino acids. 2. a name of an entry in the secondary structure database (simple string) Additionally, a list can be given from any of the above, inside square brackets and separated by whitespace. E.g.: [ (-120 80 -136) H14M (130 50 45) ... ] The thing is more complicated because PyMOL gives all parameters in string format. """ # now define some regular expressions which we will use # a regular expression for floating point numbers, including the exponential form float_regex = r"""([+-]\s*)? # optional sign and whitespace ( # start of the mantissa (\d+(\.\d*)?) # one option: some digits, then optionally some decimals | or (\.\d+) # second option: only the decimals, led in by a decimal point ) # end of the mantissa ([eE][+-]?\d+)? # optionally, an exponent """ # a regular expression of a parenthesized part (without nested parentheses) paren_regex = re.compile(r"\([^()]*\)") sstuple_regex = re.compile(r""" #regular expression for dihedral angle tuples ( \(\s*(?P<angle1>{0})\s*(?P<angle2>{0})\s*((?P<angle3>{0})\s*)?\) # two or three floats in parentheses, whitespace separated ) | # or ( \(\s*(?P<phi>{0})\s*(?P<theta>{0}|None)\s*(?P<psi>{0})\s*\) # three floats, the middle can be None, in parentheses, whitespace separated ) """.format(float_regex), re.VERBOSE) def parse_parentuple(parenpart: str) -> Tuple[float, Optional[float], float]: """Parse a parenthesized part of a sec.structure definition string""" m = sstuple_regex.match(parenpart) if m is None: raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart)) if m['angle1'] is not None and m['angle2'] is not None and m['angle3'] is not None: # three angles return (float(m['angle1']), float(m['angle2']), float(m['angle3'])) elif m['angle1'] is not None and m['angle2'] is not None and m['angle3'] is None: # two angles return (float(m['angle1']), None, float(m['angle2'])) elif m['angle1'] is not None or m['angle2'] is not None or m['angle3'] is not None: raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart)) elif m['phi'] is not None and m['theta'] == 'None' and m['psi'] is not None: return (float(m['phi']), None, float(m['psi'])) elif m['phi'] is not None and m['theta'] is not None and m['psi'] is not None: return (float(m['phi']), float(m['theta']), float(m['psi'])) else: raise ValueError('Invalid dihedral angle tuple: {}'.format(parenpart)) # now start working in earnest. if isinstance(sstype, tuple): # this must be a tuple of three floats, the middle one can be None if len(sstype) != 3: raise ValueError('Invalid tuple length: {}'.format(len(sstype))) elif not (isinstance(sstype[0], float) and (isinstance(sstype[1], float) or sstype[1] is None) and (isinstance(sstype[2], float))): raise ValueError( 'Incorrect torsion angle type(s): {}, {}, {}'.format(type(sstype[0]), type(sstype[1]), type(sstype[2]))) else: return itertools.cycle([sstype]) # one-applies-for-all case elif isinstance(sstype, list): # see if every element of the list is a correct tuple or a string. If a string, find the corresponding entry # in the secondary structure database. sstype_parsed = [] for item in sstype: if isinstance(item, str): # this can fail with KeyError item = SecondaryStructureDB.dihedrals(item) elif isinstance(item, tuple): # validate it by simply calling ourselves again item = parse_sstype(item)[0] else: raise TypeError('Invalid type: {}, ({})'.format(item, type(item))) sstype_parsed.append(item) return sstype_parsed # list of tuples elif isinstance(sstype, str): # this is the most complicated part. sstype = sstype.strip() # strip leading whitespaces # some validation (not complete, i.e. mismatched/mixed up parentheses or brackets are not detected if sstype.count('[') != sstype.count(']'): # 0==0 can also happen raise ValueError('Unmatched brackets in secondary structure type {}'.format(sstype)) if sstype.count('(') != sstype.count(')'): # 0==0 can also happen raise ValueError('Unmatched parentheses in secondary structure type {}'.format(sstype)) if sstype.startswith('[') and sstype.endswith(']'): # parse it as a list sstype = sstype[1:-1].strip() # cut the brackets # first find the parts in parentheses parentheses = [(x.start(), x.end()) for x in paren_regex.finditer(sstype)] items = [] def parse_outsideofparentheses_part(line: str): # If "x" does not contain parentheses: # 1. strip it # 2. split it up at whitespaces # 3. find the dihedrals in the SSDB # print('Parsing outside parentheses: *{}*'.format(line)) return [SecondaryStructureDB.dihedrals(x) for x in line.strip().split()] if not parentheses: # parse the whole string as a list of entry names # print('No parentheses found.') items = parse_outsideofparentheses_part(sstype) else: # parse each pair of parentheses and the parts inbetween them. for ip in range(len(parentheses)): parenpart = sstype[parentheses[ip][0]:parentheses[ip][1]] # print('This parenthesis pair: {} to {} = *{}*'.format(*parentheses[ip], parenpart)) if ip == 0: # parse the part before the first pair of parentheses items.extend(parse_outsideofparentheses_part(sstype[0:parentheses[0][0]])) else: # parse the part before this pair of parentheses and after the previous pair of parentheses items.extend(parse_outsideofparentheses_part(sstype[parentheses[ip - 1][1]:parentheses[ip][0]])) # now parse the parenthesized part items.append(parse_parentuple(parenpart)) # parse the end of the string after the last pair of parentheses items.extend(parse_outsideofparentheses_part(sstype[parentheses[-1][1]:])) return items # list of tuples elif '[' in sstype: raise ValueError( 'Opening bracket must be the first character in the secondary structure type {}'.format(sstype)) elif sstype.startswith('(') and sstype.endswith(')'): return itertools.cycle([parse_parentuple(sstype)]) # one-applies-for-all case elif '(' in sstype: raise ValueError( 'Opening parenthesis must be the first character in the secondary structure type {}'.format(sstype)) else: # no [, no ], no ( and no ) in the string, it must be an entry in the secondary structure database return itertools.cycle([SecondaryStructureDB.dihedrals(sstype)]) # one-applies-for-all case