Source code for hippynn.layers.indexers

"""
Layers for encoding, decoding, index states, besides pairs
"""

import warnings
import torch


[docs] class OneHotSpecies(torch.nn.Module): """ Encodes species as one-hot map using `species_set` :param species_set iterable of species types. Typically this will be the Z-value of the elemeent. Note: 0 denotes a 'blank atom' used only for padding purposes :returns OneHotSpecies """ def __init__(self, species_set): super().__init__() self.species_set = torch.as_tensor(species_set) self.n_species = self.species_set.shape[0] max_z = torch.max(self.species_set) zmap = torch.zeros((max_z + 1,), dtype=torch.long) zmap[self.species_set] = torch.arange(0, self.n_species, dtype=torch.long) self.species_map = torch.nn.Parameter(torch.as_tensor(zmap, dtype=torch.long), requires_grad=False)
[docs] def forward(self, species): """ :param species: :return: Initial one-hotted features, nonblank atoms """ onehot_species = torch.eye(self.n_species, dtype=torch.bool, device=species.device)[self.species_map[species]] nonblank = ~onehot_species[:, :, 0] initial_features = onehot_species[:, :, 1:] # remove atoms that are 0 in the species map. return initial_features, nonblank
[docs] class PaddingIndexer(torch.nn.Module): """ Hipnn's indexer Description: This indexer allows us to go from rectangular (mol,atom) representations To (flatatom) representations and a corresponding molecule index for those atoms. The 'real_index' allows us to take _values_ from a flattened rectangular representation (mol*atom) And select only the real ones. The 'inv_real_index', when indexed, converts a (mol*atom) index set into a (flatatom) index set """
[docs] def forward(self, features, nonblank): """ Pytorch Enforced Forward function :param features: :param nonblank: :return: real atoms, amd inverse atoms mappings """ dev = features.device n_molecules, n_atoms_max = nonblank.shape n_fictitious_atoms = nonblank.shape[0] * nonblank.shape[1] # Just calculate the total number of atoms in the dataset flat_nonblank = nonblank.reshape(n_fictitious_atoms) # Flatten the nonblank n_mol x n_atoms nonblank matrix large_enough = torch.empty(nonblank.shape,device=dev,dtype=torch.int64) large_enough.resize_(0) real_atoms = torch.nonzero(flat_nonblank, as_tuple=False,out=large_enough)[:, 0] # print(real_atoms) # Grab the indexes of each real atom, give it an atom get an index back n_real_atoms = real_atoms.shape[0] # Count how many real atoms there are inv_real_atoms = torch.zeros((n_fictitious_atoms,), dtype=torch.long, device=dev) # Create a vector of 0's inv_real_atoms[real_atoms] = torch.arange(n_real_atoms, dtype=torch.long, device=dev) # Create the inverse real atom "function" give it an index get an atom back # Flatten incoming features to atom representation indexed_features = features.reshape(n_molecules * n_atoms_max, -1)[real_atoms] if indexed_features.ndimension() == 1: indexed_features = indexed_features.unsqueeze(1) # Get molecule index for atoms mol_index_shaped = ( torch.arange(n_molecules, dtype=torch.long, device=dev).unsqueeze(1).expand(-1, n_atoms_max) ) atom_index_shaped = ( torch.arange(n_atoms_max, dtype=torch.long, device=dev).unsqueeze(0).expand(n_molecules, -1) ) atom_index = atom_index_shaped.reshape(n_fictitious_atoms)[real_atoms] mol_index = mol_index_shaped.reshape(n_fictitious_atoms)[real_atoms] return indexed_features, real_atoms, inv_real_atoms, mol_index, atom_index, n_molecules, n_atoms_max
[docs] class AtomReIndexer(torch.nn.Module):
[docs] def forward(self, molatom_thing, real_atoms): m, a, *rest = molatom_thing.shape out = molatom_thing.reshape(m * a, *rest)[real_atoms] if len(rest) == 0: out = out.unsqueeze(1) return out
[docs] class MolSummer(torch.nn.Module): """ Molecule Summer Description: This sums (flatatom) things into (mol) things. It actually works similarly to the interaction layer """
[docs] def forward(self, features, mol_index, n_molecules): featshape = (1,) if features.ndimension() == 1 else features.shape[1:] out_shape = (n_molecules, *featshape) result = torch.zeros(*out_shape, device=features.device, dtype=features.dtype) result.index_add_(0, mol_index, features) return result
[docs] class SysMaxOfAtoms(torch.nn.Module): """ Take maximum over atom dimension. """
[docs] def forward(self, features, mol_index, n_molecules): # Add feature dimension if not found if features.ndim == 1: featshape = (1,) features = features.unsqueeze(1) else: featshape = features.shape[1:] # Allocate result out_shape = (n_molecules, *featshape) result = torch.zeros(*out_shape, device=features.device, dtype=features.dtype) # Prepare index shape for scatter operation mi_expand = mol_index.reshape(-1, *(1,) * len(featshape)) mi_expand = mi_expand.expand((-1, *featshape)) # Perform calculation result.scatter_reduce_(0, mi_expand, features, reduce='amax', include_self=False) return result
[docs] class AtomDeIndexer(torch.nn.Module):
[docs] def forward(self, features, mol_index, atom_index, n_molecules, n_atoms_max): featshape = 1 if features.ndimension() == 1 else features.shape[1:] out_shape = (n_molecules, n_atoms_max, *featshape) result = torch.zeros(*out_shape, device=features.device, dtype=features.dtype) result[mol_index, atom_index] = features return result
[docs] class CellScaleInducer(torch.nn.Module): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.pbc = False
[docs] def forward(self, coordinates, cell): strain = torch.eye( coordinates.shape[2], dtype=coordinates.dtype, device=coordinates.device, requires_grad=True ).tile(coordinates.shape[0],1,1) strained_coordinates = torch.bmm(coordinates, strain) strained_cell = torch.bmm(cell, strain) return strained_coordinates, strained_cell, strain
[docs] class QuadPack(torch.nn.Module): """ Converts quadrupoles flattened form to packed triangular, assumes symmmetric. Packed form: (molecule) XX, YY, ZZ, XY, XZ, YZ Index 0, 4, 8, 1, 2, 5 Unpacked form: (molecule) XX, XY, XZ, YX, YY, YZ , ZX, ZY, ZZ Index 00 01 02, 10, 11, 12, 20, 21, 22 """ def __init__(self): super().__init__() ind1 = [0, 1, 2, 0, 0, 1] ind2 = [0, 1, 2, 1, 2, 2] self.register_buffer("ind_1", torch.LongTensor(ind1)) self.register_buffer("ind_2", torch.LongTensor(ind2))
[docs] def forward(self, quadrupoles): return quadrupoles[:, self.ind1, self.ind2]
[docs] class QuadUnpack(torch.nn.Module): """ Converts quadrupoles from packed triangular form to flattened molecule form. Packed form: (molecule) XX, YY, ZZ, XY, XZ, YZ Index 0, 1, 2, 3, 4, 5 Unpacked form: (molecule) XX, XY, XZ, YX, YY, YZ , ZX, ZY, ZZ Index 0, 3, 4, 3, 1, 5, 4, 5, 2 """ def __init__(self): super().__init__() indices = [0, 3, 4, 3, 1, 5, 4, 5, 2] self.register_buffer("index_permutation", torch.LongTensor(indices))
[docs] def forward(self, packed_quadrupoles): return packed_quadrupoles[:, self.index_permutation]
[docs] class FilterBondsOneway(torch.nn.Module):
[docs] def forward(self, bonds, pair_first, pair_second): # in seqm, only bonds with index first < second is used cond = pair_first < pair_second return bonds[cond]
[docs] class FuzzyHistogram(torch.nn.Module): """ Transforms a scalar feature into a vectorized feature via the fuzzy/soft histogram method. :param length: length of vectorized feature :returns FuzzyHistogram """ def __init__(self, length, vmin, vmax): super().__init__() err_msg = "The value of 'length' must be a positive integer." if not isinstance(length, int): raise ValueError(err_msg) if length <= 0: raise ValueError(err_msg) if not (isinstance(vmin, (int,float)) and isinstance(vmax, (int,float))): raise ValueError("The values of 'vmin' and 'vmax' must be floating point numbers.") if vmin >= vmax: raise ValueError("The value of 'vmin' must be less than the value of 'vmax.'") self.bins = torch.nn.Parameter(torch.linspace(vmin, vmax, length), requires_grad=False) self.sigma = (vmax - vmin) / length self.vmin = vmin self.vmax = vmax
[docs] def forward(self, values): # Warn user if provided values lie outside the range of the histogram bins values_out_of_range = (values < self.vmin) + (values > self.vmax) if values_out_of_range.sum() > 0: perc_out_of_range = values_out_of_range.float().mean() warnings.warn( "Values out of range for FuzzyHistogrammer\n" f"Number of values out of range: {values_out_of_range.sum()}\n" f"Percentage of values out of range: {perc_out_of_range * 100:.2f}%\n" f"Set range for FuzzyHistogrammer: ({self.vmin:.2f}, {self.vmax:.2f})\n" f"Range of values: ({values.min().item():.2f}, {values.max().item():.2f})" ) if values.shape[-1] != 1: values = values[...,None] x = values - self.bins histo = torch.exp(-((x / self.sigma) ** 2) / 4) return torch.flatten(histo, end_dim=1)