Source code for pyhdc.components.similarity.hamming

from typing import Optional

import numpy as np

try:
    import torch

    TORCH_AVAILABLE = True
except ImportError:
    TORCH_AVAILABLE = False
    torch = None

from pyhdc.components.input_formatting import _normalize_similarity
from pyhdc.types import ArrayLike


[docs] def HammingDistance( *hypervectors: ArrayLike, axis: Optional[int] = None, mode: str = "pairwise" ): """HammingDistance Hamming Distance of hypervectors Counts the number of elements where A[i] != B[i], normalized to the hypervector dimension and mapped to [-1, 1] where 1 is identical, -1 is completely different, and 0 is half-matching. Hypervectors are dimension-first (axis 0 is always the dimension ``D``). Supports these calling conventions:: (a, b) where a and b are 1D: returns a scalar in [-1, 1] (a, b) batches: per-pair scores (trailing axes broadcast) (arr,) where arr is (D, N): sim(col_0, col_i) for i in 1..N-1 (arr,) where arr is (D, N, M, ...): requires ``axis`` With ``mode="cross"`` and two binary ``{0, 1}`` batches ``A=(D, P)``, ``B=(D, M)``, returns the full ``(P, M)`` cross-similarity matrix. Args: *hypervectors: Two hypervectors, or a single batch array axis: For a single ``(D, N, M, ...)`` batch, the batch axis to split on mode: ``"pairwise"`` (default) or ``"cross"`` Returns: Scalar similarity, or an array of similarities over the trailing axes """ a, b, is_torch, scalar = _normalize_similarity(*hypervectors, axis=axis, mode=mode) if mode == "cross": if is_torch: assert torch is not None a_t = torch.as_tensor(a).double() b_t = torch.as_tensor(b).double() dimension = a_t.shape[0] matches = a_t.T @ b_t mism = a_t.sum(dim=0)[:, None] + b_t.sum(dim=0)[None, :] - 2 * matches return 1 - 2 * mism / dimension # Inputs are cast to float64 for a BLAS matmul a_n = np.asarray(a, dtype=np.float64) b_n = np.asarray(b, dtype=np.float64) dimension = a_n.shape[0] matches = a_n.T @ b_n mism = a_n.sum(axis=0)[:, None] + b_n.sum(axis=0)[None, :] - 2 * matches return 1 - 2 * mism / dimension if is_torch: assert torch is not None a_t = torch.as_tensor(a) b_t = torch.as_tensor(b) dimension = a_t.shape[0] mismatches = (a_t != b_t).sum(dim=0).float() sims = 1 - 2 * mismatches / dimension return sims.item() if scalar else sims a_n = np.asarray(a) b_n = np.asarray(b) dimension = a_n.shape[0] mismatches = np.not_equal(a_n, b_n).sum(axis=0) sims = 1 - 2 * mismatches / dimension return sims.item() if scalar else sims