Source code for pyhdc.components.bundling.selection

#!/usr/bin/env python
"""Random-selection bundling.

Bundles a set of hypervectors by copying each coordinate from one randomly chosen
input vector, rather than summing them. Operates on raw arrays dimension-first (axis 0
is the dimension ``D``, the trailing axis is the batch).
"""

import numpy as np

try:
    import torch

    TORCH_AVAILABLE = True
except ImportError:  # pragma: no cover
    TORCH_AVAILABLE = False
    torch = None


def _is_torch(data):
    return TORCH_AVAILABLE and torch is not None and torch.is_tensor(data)


[docs] def randsel(data, p=None): """Random-selection bundling of a ``(D, N)`` batch into a single ``(D,)`` vector. Each coordinate is copied from one of the ``N`` input columns, chosen independently at random (uniformly, or per the probability weights ``p`` over the ``N`` columns). Args: data: A ``(D, N)`` array (numpy or torch) whose columns are the inputs to select among. p: Optional length-``N`` weights over the columns (default uniform). Weights are normalized to a probability distribution, so they need not sum to 1. Returns: A ``(D,)`` array of the same backend/dtype as ``data``. """ dim, num = data.shape if p is not None: weights = np.asarray(p, dtype=np.float64) p = weights / weights.sum() # normalize so both backends agree if _is_torch(data): rows = torch.arange(dim, device=data.device) if p is None: idx = torch.randint(0, num, (dim,), device=data.device) else: tw = torch.as_tensor(p).to(data.device) idx = torch.multinomial(tw.expand(dim, num), 1).squeeze(1) return data[rows, idx] idx = np.random.choice(num, size=dim, p=p) return data[np.arange(dim), idx]
[docs] def multirandsel(data, count, p=None): """Produce ``count`` independent :func:`randsel` draws as a ``(D, count)`` array.""" cols = [randsel(data, p=p) for _ in range(count)] if _is_torch(data): return torch.stack(cols, dim=1) return np.stack(cols, axis=1)