Source code for prody.ensemble.pdbensemble

"""This module defines a class for handling ensembles of PDB conformations."""

import numpy as np

from prody.atomic import Atomic, AtomGroup
from prody.measure import getRMSD, getTransformation
from prody.utilities import checkCoords
from prody import LOGGER

from .ensemble import Ensemble
from .conformation import PDBConformation

__all__ = ['PDBEnsemble']


[docs]class PDBEnsemble(Ensemble): """This class enables handling coordinates for heterogeneous structural datasets and stores identifiers for individual conformations. See usage usage in :ref:`pca-xray`, :ref:`pca-dimer`, and :ref:`pca-blast`. .. note:: This class is designed to handle conformations with missing coordinates, e.g. atoms that are note resolved in an X-ray structure. For unresolved atoms, the coordinates of the reference structure is assumed in RMSD calculations and superpositions.""" def __init__(self, title='Unknown'): self._labels = [] Ensemble.__init__(self, title) self._trans = None def __repr__(self): return '<PDB' + Ensemble.__repr__(self)[1:] def __str__(self): return 'PDB' + Ensemble.__str__(self) def __add__(self, other): """Concatenate two ensembles. The reference coordinates of *self* is used in the result.""" if not isinstance(other, Ensemble): raise TypeError('an Ensemble instance cannot be added to an {0} ' 'instance'.format(type(other))) elif self.numAtoms() != other.numAtoms(): raise ValueError('Ensembles must have same number of atoms.') ensemble = PDBEnsemble('{0} + {1}'.format(self.getTitle(), other.getTitle())) ensemble.setCoords(self._coords.copy()) ensemble.addCoordset(self._confs.copy(), self._weights.copy()) other_weights = other.getWeights() if other_weights is None: ensemble.addCoordset(other.getCoordsets()) else: ensemble.addCoordset(other._confs.copy(), other_weights) return ensemble def __iter__(self): n_confs = self._n_csets for i in range(n_confs): if n_confs != self._n_csets: raise RuntimeError('number of conformations in the ensemble ' 'changed during iteration') yield PDBConformation(self, i) def __getitem__(self, index): """Return a conformation at given index.""" if isinstance(index, int): return self.getConformation(index) elif isinstance(index, slice): ens = PDBEnsemble('{0} ({1[0]}:{1[1]}:{1[2]})'.format( self._title, index.indices(len(self)))) ens.setCoords(self.getCoords()) ens.addCoordset(self._confs[index].copy(), self._weights[index].copy(), label=self._labels[index]) if self._trans is not None: ens._trans = self._trans[index] return ens elif isinstance(index, (list, np.ndarray)): ens = PDBEnsemble('Conformations of {0}'.format(self._title)) ens.setCoords(self.getCoords()) ens.addCoordset(self._confs[index].copy(), self._weights[index].copy(), label=[self._labels[i] for i in index]) if self._trans is not None: ens._trans = self._trans[index] return ens else: raise IndexError('invalid index') def _superpose(self, **kwargs): """Superpose conformations and update coordinates.""" calcT = getTransformation if kwargs.get('trans', False): if self._trans is not None: LOGGER.info('Existing transformations will be overwritten.') trans = np.zeros((self._n_csets, 4, 4)) else: trans = None indices = self._indices if indices is None: weights = self._weights coords = self._coords confs = self._confs confs_selected = self._confs else: weights = self._weights[:, indices] coords = self._coords[indices] confs = self._confs confs_selected = self._confs[:, indices] for i, conf in enumerate(confs_selected): rmat, tvec = calcT(conf, coords, weights[i]) if trans is not None: trans[i][:3, :3] = rmat trans[i][:3, 3] = tvec confs[i] = tvec + np.dot(confs[i], rmat.T) self._trans = trans
[docs] def iterpose(self, rmsd=0.0001): confs = self._confs.copy() Ensemble.iterpose(self, rmsd) self._confs = confs LOGGER.info('Final superposition to calculate transformations.') self.superpose()
iterpose.__doc__ = Ensemble.iterpose.__doc__
[docs] def addCoordset(self, coords, weights=None, label=None): """Add coordinate set(s) to the ensemble. *coords* must be a Numpy array with suitable shape and dimensionality, or an object with :meth:`getCoordsets` method. *weights* is an optional argument. If provided, its length must match number of atoms. Weights of missing (not resolved) atoms must be ``0`` and weights of those that are resolved can be anything greater than ``0``. If not provided, weights of all atoms for this coordinate set will be set equal to ``1``. *label*, which may be a PDB identifier or a list of identifiers, is used to label conformations.""" atoms = coords try: if self._coords is not None and hasattr(coords, '_getCoordsets'): coords = coords._getCoordsets() else: coords = coords.getCoordsets() except AttributeError: label = label or 'Unknown' else: if coords is None: raise ValueError('coordinates are not set') elif label is None and isinstance(atoms, Atomic): ag = atoms if not isinstance(atoms, AtomGroup): ag = atoms.getAtomGroup() label = ag.getTitle() if coords.shape[0] < ag.numCoordsets(): label += 'm' + str(atoms.getACSIndex()) else: label = label or str(coords) try: checkCoords(coords, csets=True, natoms=self._n_atoms) except TypeError: raise TypeError('coords must be a Numpy array or must have ' '`getCoords` attribute') if coords.ndim == 2: coords = coords.reshape((1, self._n_atoms, 3)) n_csets, n_atoms, _ = coords.shape if not self._n_atoms: self._n_atoms = n_atoms if weights is None: weights = np.ones((n_csets, n_atoms, 1), dtype=float) else: weights = checkWeights(weights, n_atoms, n_csets) if n_csets > 1: if isinstance(label, str): self._labels.extend('{0}_m{1}' .format(label, i+1) for i in range(n_csets)) else: if len(label) != n_csets: raise ValueError('length of label and number of ' 'coordinate sets must be the same') self._labels.extend(label) else: self._labels.append(label) if self._confs is None and self._weights is None: self._confs = coords self._weights = weights self._n_csets = n_csets elif self._confs is not None and self._weights is not None: self._confs = np.concatenate((self._confs, coords), axis=0) self._weights = np.concatenate((self._weights, weights), axis=0) self._n_csets += n_csets else: raise RuntimeError('_confs and _weights must be set or None at ' 'the same time')
[docs] def getLabels(self): """Return identifiers of the conformations in the ensemble.""" return list(self._labels)
[docs] def getCoordsets(self, indices=None): """Return a copy of coordinate set(s) at given *indices* for selected atoms. *indices* may be an integer, a list of integers or ``None``. ``None`` returns all coordinate sets. .. warning:: When there are atoms with weights equal to zero (0), their coordinates will be replaced with the coordinates of the ensemble reference coordinate set.""" if self._confs is None: return None if indices is None: indices = slice(None) else: indices = np.array([indices]).flatten() coords = self._coords if self._indices is None: confs = self._confs[indices].copy() for i, w in enumerate(self._weights[indices]): which = w.flatten() == 0 confs[i, which] = coords[which] else: selids = self._indices coords = coords[selids] confs = self._confs[indices, selids] for i, w in enumerate(self._weights[indices]): which = w[selids].flatten() == 0 confs[i, which] = coords[which] return confs
_getCoordsets = getCoordsets
[docs] def iterCoordsets(self): """Iterate over coordinate sets. A copy of each coordinate set for selected atoms is returned. Reference coordinates are not included.""" conf = PDBConformation(self, 0) for i in range(self._n_csets): conf._index = i yield conf.getCoords()
[docs] def delCoordset(self, index): """Delete a coordinate set from the ensemble.""" Ensemble.delCoordset(self, index) if isinstance(index, int): index = [index] else: index = list(index) index.sort(reverse=True) for i in index: self._labels.pop(i)
[docs] def getConformation(self, index): """Return conformation at given index.""" if self._confs is None: raise AttributeError('conformations are not set') if not isinstance(index, int): raise TypeError('index must be an integer') n_confs = self._n_csets if -n_confs <= index < n_confs: if index < 0: index = n_confs + index return PDBConformation(self, index) else: raise IndexError('conformation index out of range')
[docs] def getMSFs(self): """Calculate and return mean square fluctuations (MSFs). Note that you might need to align the conformations using :meth:`superpose` or :meth:`iterpose` before calculating MSFs.""" if self._confs is None: return indices = self._indices if indices is None: coords = self._coords confs = self._confs weights = self._weights > 0 else: coords = self._coords[indices] confs = self._confs[:, indices] weights = self._weights[:, indices] > 0 weightsum = weights.sum(0) mean = np.zeros(coords.shape) for i, conf in enumerate(confs): mean += conf * weights[i] mean /= weightsum ssqf = np.zeros(mean.shape) for i, conf in enumerate(confs): ssqf += ((conf - mean) * weights[i]) ** 2 return ssqf.sum(1) / weightsum.flatten()
[docs] def getRMSDs(self): """Calculate and return root mean square deviations (RMSDs). Note that you might need to align the conformations using :meth:`superpose` or :meth:`iterpose` before calculating RMSDs.""" if self._confs is None or self._coords is None: return None indices = self._indices if indices is None: return getRMSD(self._coords, self._confs, self._weights) else: return getRMSD(self._coords[indices], self._confs[:, indices], self._weights[:, indices])
[docs] def setWeights(self, weights): """Set atomic weights.""" if self._n_atoms == 0: raise AttributeError('coordinates are not set') elif not isinstance(weights, np.ndarray): raise TypeError('weights must be an ndarray instance') elif weights.shape[:2] != (self._n_csets, self._n_atoms): raise ValueError('shape of weights must (n_confs, n_atoms[, 1])') if weights.dtype not in (np.float32, float): try: weights = weights.astype(float) except ValueError: raise ValueError('coords array cannot be assigned type ' '{0}'.format(float)) if np.any(weights < 0): raise ValueError('weights must greater or equal to 0') if weights.ndim == 2: weights = weights.reshape((self._n_csets, self._n_atoms, 1)) self._weights = weights
Read the Docs v: v1.5
Versions
latest
v1.5
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.