Source code for pyhdc.components.basis.codebook

#!/usr/bin/env python
"""Family-aware basis builders.

Each builder returns a raw ``(D, count)`` array (numpy or torch, matching the
encoding's backend) whose columns are basis hypervectors in the encoding's value
domain. Builders are component-level and return arrays, not Hypervectors.

- ``empty`` / ``random`` / ``identity`` are trivial draws.
- ``level`` / ``circular`` are family-agnostic. They mix two ordinary
  ``encoding.generate`` draws with a per-coordinate threshold, so they work for every
  family without any per-family special-casing.
- ``thermometer`` is a deterministic cumulative code and needs the
  discrete ``(low, high)`` endpoints, so it is defined only for discrete families.
"""

import numpy as np

from pyhdc.components.basis.domain import binding_identity, family_endpoints

try:
    import torch

    TORCH_AVAILABLE = True
except ImportError:  # pragma: no cover
    TORCH_AVAILABLE = False
    torch = None


def _resolve_dim(encoding, dimension):
    return encoding.dimension if dimension is None else int(dimension)


[docs] def random(encoding, count, dimension=None): """``count`` independent random hypervectors as a ``(D, count)`` codebook.""" dim = _resolve_dim(encoding, dimension) return encoding.generate((dim, count)).data
[docs] def identity(encoding, count, dimension=None): """``count`` copies of the binding-identity element as a ``(D, count)`` codebook. The binding-identity ``e`` satisfies ``bind(x, e) == x``, such that every column is ``e``. Defined for the MAP, HRR, FHRR, and BSC families, raises ``NotImplementedError`` for VTB, MBAT, and the BSDC family (no neutral binding element). """ dim = _resolve_dim(encoding, dimension) elem = binding_identity(encoding, dim) # (D,) if encoding.backend == "torch": return elem[:, None].repeat(1, count) return np.repeat(elem[:, None], count, axis=1)
[docs] def empty(encoding, count, dimension=None): """``count`` all-zero hypervectors as a ``(D, count)`` array.""" dim = _resolve_dim(encoding, dimension) return encoding.zeros((dim, count)).data
[docs] def level(encoding, count, dimension=None): """A linear level codebook: adjacent columns correlated, ends near-orthogonal. Built family-agnostically from two random endpoint draws ``base`` and ``alt`` plus a per-coordinate uniform threshold ``u``. Column ``i`` keeps ``base`` where ``u >= i / (count - 1)`` and ``alt`` elsewhere, so each coordinate flips from ``base`` to ``alt`` exactly once (at its own threshold). Similarity therefore decays monotonically with ``|i - j|``. Column 0 is ``base`` and the last column is ``alt`` (two independent draws, so near-orthogonal). """ dim = _resolve_dim(encoding, dimension) base = encoding.generate(dim).data alt = encoding.generate(dim).data span = max(count - 1, 1) if encoding.backend == "torch": assert torch is not None idx = torch.arange(count, device=base.device, dtype=torch.float32) / span u = torch.rand(dim, device=base.device) keep = u[:, None] >= idx[None, :] # (D, count) bool return torch.where(keep, base[:, None], alt[:, None]).to(base.dtype) idx = np.arange(count) / span u = np.random.uniform(0.0, 1.0, dim) keep = u[:, None] >= idx[None, :] return np.where(keep, base[:, None], alt[:, None]).astype(base.dtype)
[docs] def circular(encoding, count, dimension=None): """A circular (ring) level codebook: similarity wraps, so level 0 ~ level L-1. Like :func:`level`, but each coordinate is assigned a random start phase ``p`` in ``[0, count)`` and takes ``base`` over a half-ring arc and ``alt`` over the other half. Similarity depends on ring distance ``min(|i - j|, count - |i - j|)``, so the first and last columns are adjacent and the diametrically opposite column is the similarity minimum (near-orthogonal, around 0). """ dim = _resolve_dim(encoding, dimension) base = encoding.generate(dim).data alt = encoding.generate(dim).data half = count / 2.0 if encoding.backend == "torch": assert torch is not None i = torch.arange(count, device=base.device, dtype=torch.float32) p = torch.rand(dim, device=base.device) * count d = torch.remainder(i[None, :] - p[:, None], count) # (D, count) ring offset keep = d < half return torch.where(keep, base[:, None], alt[:, None]).to(base.dtype) i = np.arange(count) p = np.random.uniform(0.0, count, dim) d = np.mod(i[None, :] - p[:, None], count) keep = d < half return np.where(keep, base[:, None], alt[:, None]).astype(base.dtype)
[docs] def thermometer(encoding, count, dimension=None): """A deterministic thermometer (cumulative unary) codebook. Discrete families only. Column ``i`` sets its first ``round(i / (count - 1) * D)`` coordinates to the high endpoint and the rest to the low endpoint, so each column's high-set is a strict superset of the previous column's (nested). Column 0 is the constant all-low vector and the last column the constant all-high vector (so the two ends are anti-correlated, not orthogonal). Distinct from :func:`level`, whose endpoints are two independent random draws. Raises: NotImplementedError: For continuous and phase families (via :func:`~pyhdc.components.basis.domain.family_endpoints`). """ dim = _resolve_dim(encoding, dimension) low, high = family_endpoints(encoding) span = max(count - 1, 1) if encoding.backend == "torch": assert torch is not None ref = encoding.generate(1).data # (1,) tensor: correct device + dtype device = ref.device coord = torch.arange(dim, device=device).unsqueeze(1) # (D, 1) fill = torch.arange(count, device=device, dtype=torch.float32) / span cutoff = torch.round(fill * dim).long() # (count,) filled = coord < cutoff.unsqueeze(0) # (D, count) bool return (filled.to(ref.dtype) * (high - low) + low).to(ref.dtype) cutoff = np.rint((np.arange(count) / span) * dim).astype(np.int64) coord = np.arange(dim)[:, None] filled = coord < cutoff[None, :] return np.where(filled, high, low).astype(encoding._spec.dtype)