Source code for rwskit.hash

"""Hash utilities."""

from __future__ import annotations

import logging

from typing import Any, Callable, Literal, Optional

from rwskit.collections_ import recursive_sort
from rwskit.numeric import to_signed

import xxhash

from icontract import require
from msgspec import msgpack

[docs] log = logging.getLogger(__name__)
[docs] HashSize = Literal[32, 64, 128]
[docs] class ObjectHasher: """Hash objects using xxHash. The only requirement is that the object be ``msgpack`` serializable. """ _hashers: dict[HashSize, Callable] = { 32: xxhash.xxh32, 64: xxhash.xxh3_64, 128: xxhash.xxh3_128, } @require(lambda hash_size: hash_size in (32, 64, 128)) def __init__(self, hash_size: HashSize = 128, signed: bool = False): self._hash_size = hash_size self._encoder = msgpack.Encoder() self._hasher = self._hashers[hash_size] self._signed = signed @property
[docs] def hash_size(self) -> HashSize: """Get integer size of the returned hash values this hasher produces.""" return self._hash_size
@require(lambda self, signed: not signed or self._hash_size < 128)
[docs] def hash(self, obj: Any, signed: Optional[bool] = None) -> int: """ Hash the object using the hash size specified in the constructor. ``xxHash`` returns an unsigned value, but it can be converted to a signed value if the hash size is less than 128 bits. ..note:: Only values supported by `msgpack <https://jcristharif.com/msgspec/supported-types.html>`__ can be hashed. ..note:: This can only hash integers up to 64-bits. Parameters ---------- obj : Any The object to hash signed : bool, default = False Whether to convert the value to a signed integer using :meth:`~rwskit.numeric.to_signed`. Returns ------- int An integer representing the hash of the object. Raises ------ OverflowError If the data contains an integer that is outside the range -2^63 to 2^64-1. """ signed = signed or self._signed data = self._encoder.encode(obj) unsigned_value = self._hasher(data).intdigest() return to_signed(unsigned_value) if signed else unsigned_value
@require(lambda self, signed: not signed or self._hash_size < 128)
[docs] def hash_sorted(self, obj: Any, signed: Optional[bool] = None) -> int: """Sort a collection using :func:`~rwskit.collections_.recursive_sort` and hash the result. This should provide a more robust hash that should return the same value for collections containing the same data, but in a different order. """ signed = signed or self._signed return self.hash(recursive_sort(obj), signed=signed)