Source code for pyhdc.components.bundling.selection

#!/usr/bin/env python
"""Random-selection bundling.

Bundles a set of hypervectors by copying each coordinate from one randomly chosen
input vector, rather than summing them. Operates on raw arrays dimension-first (axis 0
is the dimension ``D``, the trailing axis is the batch).
"""

import numpy as np

try:
    import torch

    TORCH_AVAILABLE = True
except ImportError:  # pragma: no cover
    TORCH_AVAILABLE = False
    torch = None


def _is_torch(data):
    return TORCH_AVAILABLE and torch is not None and torch.is_tensor(data)



[docs]
def randsel(data, p=None):
    """Random-selection bundling of a ``(D, N)`` batch into a single ``(D,)`` vector.

    Each coordinate is copied from one of the ``N`` input columns, chosen
    independently at random (uniformly, or per the probability weights ``p`` over the
    ``N`` columns).

    Args:
        data: A ``(D, N)`` array (numpy or torch) whose columns are the inputs to
            select among.
        p: Optional length-``N`` weights over the columns (default uniform). Weights
            are normalized to a probability distribution, so they need not sum to 1.

    Returns:
        A ``(D,)`` array of the same backend/dtype as ``data``.
    """
    dim, num = data.shape
    if p is not None:
        weights = np.asarray(p, dtype=np.float64)
        p = weights / weights.sum()  # normalize so both backends agree
    if _is_torch(data):
        rows = torch.arange(dim, device=data.device)
        if p is None:
            idx = torch.randint(0, num, (dim,), device=data.device)
        else:
            tw = torch.as_tensor(p).to(data.device)
            idx = torch.multinomial(tw.expand(dim, num), 1).squeeze(1)
        return data[rows, idx]
    idx = np.random.choice(num, size=dim, p=p)
    return data[np.arange(dim), idx]




[docs]
def multirandsel(data, count, p=None):
    """Produce ``count`` independent :func:`randsel` draws as a ``(D, count)`` array."""
    cols = [randsel(data, p=p) for _ in range(count)]
    if _is_torch(data):
        return torch.stack(cols, dim=1)
    return np.stack(cols, axis=1)