Source code for prody.ensemble.functions

"""This module defines a functions for handling conformational ensembles."""

import os.path

import numpy as np

from prody.proteins import fetchPDB, parsePDB, writePDB
from prody.utilities import openFile, showFigure
from prody import LOGGER, SETTINGS

from .ensemble import *
from .pdbensemble import *
from .conformation import *

__all__ = ['saveEnsemble', 'loadEnsemble', 'trimPDBEnsemble',
           'calcOccupancies', 'showOccupancies', 'alignPDBEnsemble']


[docs]def saveEnsemble(ensemble, filename=None, **kwargs): """Save *ensemble* model data as :file:`filename.ens.npz`. If *filename* is ``None``, title of the *ensemble* will be used as the filename, after white spaces in the title are replaced with underscores. Extension is :file:`.ens.npz`. Upon successful completion of saving, filename is returned. This function makes use of :func:`numpy.savez` function.""" if not isinstance(ensemble, Ensemble): raise TypeError('invalid type for ensemble, {0}' .format(type(ensemble))) if len(ensemble) == 0: raise ValueError('ensemble instance does not contain data') dict_ = ensemble.__dict__ attr_list = ['_title', '_confs', '_weights', '_coords'] if isinstance(ensemble, PDBEnsemble): attr_list.append('_labels') attr_list.append('_trans') if filename is None: filename = ensemble.getTitle().replace(' ', '_') attr_dict = {} for attr in attr_list: value = dict_[attr] if value is not None: attr_dict[attr] = value filename += '.ens.npz' ostream = openFile(filename, 'wb', **kwargs) np.savez(ostream, **attr_dict) ostream.close() return filename
[docs]def loadEnsemble(filename): """Return ensemble instance loaded from *filename*. This function makes use of :func:`numpy.load` function. See also :func:`saveEnsemble`""" attr_dict = np.load(filename) if '_weights' in attr_dict: weights = attr_dict['_weights'] else: weights = None isPDBEnsemble = False try: title = str(attr_dict['_title']) except KeyError: title = str(attr_dict['_name']) if weights is not None and weights.ndim == 3: isPDBEnsemble = True ensemble = PDBEnsemble(title) else: ensemble = Ensemble(title) ensemble.setCoords(attr_dict['_coords']) if isPDBEnsemble: ensemble.addCoordset(attr_dict['_confs'], weights) if '_identifiers' in attr_dict.files: ensemble._labels = list(attr_dict['_identifiers']) if '_labels' in attr_dict.files: ensemble._labels = list(attr_dict['_labels']) if '_trans' in attr_dict.files: ensemble._trans = attr_dict['_trans'] else: ensemble.addCoordset(attr_dict['_confs']) if weights is not None: ensemble.setWeights(weights) return ensemble
[docs]def trimPDBEnsemble(pdb_ensemble, **kwargs): """Return a new PDB ensemble obtained by trimming given *pdb_ensemble*. This function helps selecting atoms in a pdb ensemble based on one of the following criteria, and returns them in a new :class:`.PDBEnsemble` instance. **Occupancy** Resulting PDB ensemble will contain atoms whose occupancies are greater or equal to *occupancy* keyword argument. Occupancies for atoms will be calculated using ``calcOccupancies(pdb_ensemble, normed=True)``. :arg occupancy: occupancy for selecting atoms, must satisfy ``0 < occupancy <= 1`` :type occupancy: float """ if not isinstance(pdb_ensemble, PDBEnsemble): raise TypeError('pdb_ensemble argument must be a PDBEnsemble') if pdb_ensemble.numConfs() == 0 or pdb_ensemble.numAtoms() == 0: raise ValueError('pdb_ensemble must have conformations') if 'occupancy' in kwargs: occupancy = float(kwargs['occupancy']) assert 0 < occupancy <= 1, ('occupancy is not > 0 and <= 1: ' '{0}'.format(repr(occupancy))) n_confs = pdb_ensemble.numConfs() assert n_confs > 0, 'pdb_ensemble does not contain any conformations' occupancies = calcOccupancies(pdb_ensemble, normed=True) #assert weights is not None, 'weights must be set for pdb_ensemble' #weights = weights.flatten() #mean_weights = weights / n_confs torf = occupancies >= occupancy else: return None trimmed = PDBEnsemble(pdb_ensemble.getTitle()) coords = pdb_ensemble.getCoords() if coords is not None: trimmed.setCoords(coords[torf]) confs = pdb_ensemble.getCoordsets() if confs is not None: weights = pdb_ensemble.getWeights() trimmed.addCoordset(confs[:, torf], weights[:, torf]) return trimmed
[docs]def calcOccupancies(pdb_ensemble, normed=False): """Return occupancy calculated from weights of a :class:`.PDBEnsemble`. Any non-zero weight will be considered equal to one. Occupancies are calculated by binary weights for each atom over the conformations in the ensemble. When *normed* is ``True``, total weights will be divided by the number of atoms. This function can be used to see how many times a residue is resolved when analyzing an ensemble of X-ray structures.""" if not isinstance(pdb_ensemble, PDBEnsemble): raise TypeError('pdb_ensemble must be a PDBEnsemble instance') if len(pdb_ensemble) == 0: raise ValueError('pdb_ensemble does not contain any conformations') assert isinstance(normed, bool), 'normed must be a boolean' weights = pdb_ensemble.getWeights() if weights is None: raise ValueError('pdb_ensemble weights are not set') occupancies = weights.astype(bool).sum(0).astype(float).flatten() if normed: return occupancies / len(pdb_ensemble) else: return occupancies
[docs]def showOccupancies(pdbensemble, *args, **kwargs): """Show occupancies for the PDB ensemble using :func:`~matplotlib.pyplot. plot`. Occupancies are calculated using :meth:`calcOccupancies`.""" import matplotlib.pyplot as plt if not isinstance(pdbensemble, PDBEnsemble): raise TypeError('pdbensemble must be a PDBEnsemble instance') weights = calcOccupancies(pdbensemble) if weights is None: return None show = plt.plot(weights, *args, **kwargs) axis = list(plt.axis()) axis[2] = 0 axis[3] += 1 plt.axis(axis) plt.xlabel('Atom index') plt.ylabel('Sum of weights') if SETTINGS['auto_show']: showFigure() return show
def checkWeights(weights, n_atoms, n_csets=None): """Return weights if checks pass, otherwise raise an exception.""" assert isinstance(n_atoms, int) and n_atoms > 0, \ 'n_atoms must be a positive integer' assert n_csets is None or isinstance(n_csets, int) and n_csets > 0, \ 'n_csets must be a positive integer' if not isinstance(weights, np.ndarray): raise TypeError('weights must be a Numpy array') elif n_csets is None and weights.ndim not in (1, 2): raise ValueError('weights.dim must be 1 or 2') elif n_csets is not None: if weights.ndim not in (1, 2, 3): raise ValueError('weights.dim must be 1, 2, or 3') elif weights.ndim == 3 and weights.shape[0] != n_csets: raise ValueError('weights.shape must be (n_csets, n_atoms, 1)') elif weights.ndim in (2, 3) and weights.shape[-1] != 1: raise ValueError('shape of weights must be ([n_csets,] n_atoms, 1)') elif weights.dtype != float: try: weights = weights.astype(float) except ValueError: raise ValueError('weights.astype(float) failed, float type could ' 'not be assigned') if np.any(weights < 0): raise ValueError('all weights must greater or equal to 0') if weights.ndim == 1: weights = weights.reshape((n_atoms, 1)) if n_csets is not None and weights.ndim == 2: weights = np.tile(weights.reshape((1, n_atoms, 1)), (n_csets, 1, 1)) return weights
[docs]def alignPDBEnsemble(ensemble, suffix='_aligned', outdir='.', gzip=False): """Align PDB files using transformations from *ensemble*, which may be a :class:`.PDBEnsemble` or a :class:`.PDBConformation` instance. Label of the conformation (see :meth:`~.PDBConformation.getLabel`) will be used to determine the PDB structure and model number. First four characters of the label is expected to be the PDB identifier and ending numbers to be the model number. For example, the :class:`.Transformation` from conformation with label *2k39_ca_selection_'resnum_<_71'_m116* will be applied to 116th model of structure **2k39**. After applicable transformations are made, structure will be written into *outputdir* as :file:`2k39_aligned.pdb`. If *gzip* is **True**, output files will be compressed. Return value is the output filename or list of filenames, in the order files are processed. Note that if multiple models from a file are aligned, that filename will appear in the list multiple times.""" if not isinstance(ensemble, (PDBEnsemble, PDBConformation)): raise TypeError('ensemble must be a PDBEnsemble or PDBConformation') if isinstance(ensemble, PDBConformation): ensemble = [ensemble] if gzip: gzip = '.gz' else: gzip = '' output = [] pdbdict = {} for conf in ensemble: trans = conf.getTransformation() if trans is None: raise ValueError('transformations are not calculated, call ' '`superpose` or `iterpose`') label = conf.getLabel() pdb = label[:4] filename = pdbdict.get(pdb, fetchPDB(pdb)) if filename is None: LOGGER.warning('PDB file for conformation {0} is not found.' .format(label)) output.append(None) continue LOGGER.info('Parsing PDB file {0} for conformation {1}.' .format(pdb, label)) acsi = None model = label.rfind('m') if model > 3: model = label[model+1:] if model.isdigit(): acsi = int(model) - 1 LOGGER.info('Applying transformation to model {0}.' .format(model)) if isinstance(filename, str): ag = parsePDB(filename) else: ag = filename if acsi is not None: if acsi >= ag.numCoordsets(): LOGGER.warn('Model number {0} for {1} is out of range.' .format(model, pdb)) output.append(None) continue ag.setACSIndex(acsi) trans.apply(ag) outfn = os.path.join(outdir, pdb + suffix + '.pdb' + gzip) if ag.numCoordsets() > 1: pdbdict[pdb] = ag else: writePDB(outfn, ag) output.append(os.path.normpath(outfn)) for pdb, ag in pdbdict.items(): # PY3K: OK writePDB(os.path.join(outdir, pdb + suffix + '.pdb' + gzip), ag) if len(output) == 1: return output[0] else: return output
Read the Docs v: v1.5
Versions
latest
v1.5
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.