Tree - source-git/mozjs60 - CentOS Git server

source-git / mozjs60

Blame mfbt/HashFunctions.h

Blob History Raw

Packit	f0b94e	`/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -- */`
Packit	f0b94e	`/* vim: set ts=8 sts=2 et sw=2 tw=80: */`
Packit	f0b94e	`/* This Source Code Form is subject to the terms of the Mozilla Public`
Packit	f0b94e	`* License, v. 2.0. If a copy of the MPL was not distributed with this`
Packit	f0b94e	`* file, You can obtain one at http://mozilla.org/MPL/2.0/. */`
Packit	f0b94e
Packit	f0b94e	`/* Utilities for hashing. */`
Packit	f0b94e
Packit	f0b94e	`/*`
Packit	f0b94e	`* This file exports functions for hashing data down to a 32-bit value,`
Packit	f0b94e	`* including:`
Packit	f0b94e	`*`
Packit	f0b94e	`* - HashString Hash a char* or char16_t/wchar_t* of known or unknown`
Packit	f0b94e	`* length.`
Packit	f0b94e	`*`
Packit	f0b94e	`* - HashBytes Hash a byte array of known length.`
Packit	f0b94e	`*`
Packit	f0b94e	`* - HashGeneric Hash one or more values. Currently, we support uint32_t,`
Packit	f0b94e	`* types which can be implicitly cast to uint32_t, data`
Packit	f0b94e	`* pointers, and function pointers.`
Packit	f0b94e	`*`
Packit	f0b94e	`* - AddToHash Add one or more values to the given hash. This supports the`
Packit	f0b94e	`* same list of types as HashGeneric.`
Packit	f0b94e	`*`
Packit	f0b94e	`*`
Packit	f0b94e	`* You can chain these functions together to hash complex objects. For example:`
Packit	f0b94e	`*`
Packit	f0b94e	`* class ComplexObject`
Packit	f0b94e	`* {`
Packit	f0b94e	`* char* mStr;`
Packit	f0b94e	`* uint32_t mUint1, mUint2;`
Packit	f0b94e	`* void (*mCallbackFn)();`
Packit	f0b94e	`*`
Packit	f0b94e	`* public:`
Packit	f0b94e	`* uint32_t hash()`
Packit	f0b94e	`* {`
Packit	f0b94e	`* uint32_t hash = HashString(mStr);`
Packit	f0b94e	`* hash = AddToHash(hash, mUint1, mUint2);`
Packit	f0b94e	`* return AddToHash(hash, mCallbackFn);`
Packit	f0b94e	`* }`
Packit	f0b94e	`* };`
Packit	f0b94e	`*`
Packit	f0b94e	`* If you want to hash an nsAString or nsACString, use the HashString functions`
Packit	f0b94e	`* in nsHashKeys.h.`
Packit	f0b94e	`*/`
Packit	f0b94e
Packit	f0b94e	`#ifndef mozilla_HashFunctions_h`
Packit	f0b94e	`#define mozilla_HashFunctions_h`
Packit	f0b94e
Packit	f0b94e	`#include "mozilla/Assertions.h"`
Packit	f0b94e	`#include "mozilla/Attributes.h"`
Packit	f0b94e	`#include "mozilla/Char16.h"`
Packit	f0b94e	`#include "mozilla/MathAlgorithms.h"`
Packit	f0b94e	`#include "mozilla/Types.h"`
Packit	f0b94e	`#include "mozilla/WrappingOperations.h"`
Packit	f0b94e
Packit	f0b94e	`#include <stdint.h>`
Packit	f0b94e
Packit	f0b94e	`namespace mozilla {`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* The golden ratio as a 32-bit fixed-point value.`
Packit	f0b94e	`*/`
Packit	f0b94e	`static const uint32_t kGoldenRatioU32 = 0x9E3779B9U;`
Packit	f0b94e
Packit	f0b94e	`namespace detail {`
Packit	f0b94e
Packit	f0b94e	`inline uint32_t AddU32ToHash(uint32_t aHash, uint32_t aValue) {`
Packit	f0b94e	`/*`
Packit	f0b94e	`* This is the meat of all our hash routines. This hash function is not`
Packit	f0b94e	`* particularly sophisticated, but it seems to work well for our mostly`
Packit	f0b94e	`* plain-text inputs. Implementation notes follow.`
Packit	f0b94e	`*`
Packit	f0b94e	`* Our use of the golden ratio here is arbitrary; we could pick almost any`
Packit	f0b94e	`* number which:`
Packit	f0b94e	`*`
Packit	f0b94e	`* * is odd (because otherwise, all our hash values will be even)`
Packit	f0b94e	`*`
Packit	f0b94e	`* * has a reasonably-even mix of 1's and 0's (consider the extreme case`
Packit	f0b94e	`* where we multiply by 0x3 or 0xeffffff -- this will not produce good`
Packit	f0b94e	`* mixing across all bits of the hash).`
Packit	f0b94e	`*`
Packit	f0b94e	`* The rotation length of 5 is also arbitrary, although an odd number is again`
Packit	f0b94e	`* preferable so our hash explores the whole universe of possible rotations.`
Packit	f0b94e	`*`
Packit	f0b94e	`* Finally, we multiply by the golden ratio after xor'ing, not before.`
Packit	f0b94e	`* Otherwise, if \|aHash\| is 0 (as it often is for the beginning of a`
Packit	f0b94e	`* message), the expression`
Packit	f0b94e	`*`
Packit	f0b94e	`* mozilla::WrappingMultiply(kGoldenRatioU32, RotateBitsLeft(aHash, 5))`
Packit	f0b94e	`* \|xor\|`
Packit	f0b94e	`* aValue`
Packit	f0b94e	`*`
Packit	f0b94e	`* evaluates to \|aValue\|.`
Packit	f0b94e	`*`
Packit	f0b94e	`* (Number-theoretic aside: Because any odd number \|m\| is relatively prime to`
Packit	f0b94e	`* our modulus (2**32), the list`
Packit	f0b94e	`*`
Packit	f0b94e	`* [x * m (mod 232) for 0 <= x < 232]`
Packit	f0b94e	`*`
Packit	f0b94e	`* has no duplicate elements. This means that multiplying by \|m\| does not`
Packit	f0b94e	`* cause us to skip any possible hash values.`
Packit	f0b94e	`*`
Packit	f0b94e	`* It's also nice if \|m\| has large-ish order mod 2**32 -- that is, if the`
Packit	f0b94e	`* smallest k such that mk == 1 (mod 232) is large -- so we can safely`
Packit	f0b94e	`* multiply our hash value by \|m\| a few times without negating the`
Packit	f0b94e	`* multiplicative effect. Our golden ratio constant has order 2**29, which is`
Packit	f0b94e	`* more than enough for our purposes.)`
Packit	f0b94e	`*/`
Packit	f0b94e	`return mozilla::WrappingMultiply(kGoldenRatioU32,`
Packit	f0b94e	`RotateLeft(aHash, 5) ^ aValue);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* AddUintptrToHash takes sizeof(uintptr_t) as a template parameter.`
Packit	f0b94e	`*/`
Packit	f0b94e	`template <size_t PtrSize>`
Packit	f0b94e	`inline uint32_t AddUintptrToHash(uint32_t aHash, uintptr_t aValue) {`
Packit	f0b94e	`return AddU32ToHash(aHash, static_cast<uint32_t>(aValue));`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`template <>`
Packit	f0b94e	`inline uint32_t AddUintptrToHash<8>(uint32_t aHash, uintptr_t aValue) {`
Packit	f0b94e	`uint32_t v1 = static_cast<uint32_t>(aValue);`
Packit	f0b94e	`uint32_t v2 = static_cast<uint32_t>(static_cast<uint64_t>(aValue) >> 32);`
Packit	f0b94e	`return AddU32ToHash(AddU32ToHash(aHash, v1), v2);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`} /* namespace detail */`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* AddToHash takes a hash and some values and returns a new hash based on the`
Packit	f0b94e	`* inputs.`
Packit	f0b94e	`*`
Packit	f0b94e	`* Currently, we support hashing uint32_t's, values which we can implicitly`
Packit	f0b94e	`* convert to uint32_t, data pointers, and function pointers.`
Packit	f0b94e	`*/`
Packit	f0b94e	`template <typename T, bool TypeIsNotIntegral = !mozilla::IsIntegral<T>::value,`
Packit	f0b94e	`typename U = typename mozilla::EnableIf<TypeIsNotIntegral>::Type>`
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t AddToHash(uint32_t aHash, T aA) {`
Packit	f0b94e	`/*`
Packit	f0b94e	`* Try to convert \|A\| to uint32_t implicitly. If this works, great. If not,`
Packit	f0b94e	`* we'll error out.`
Packit	f0b94e	`*/`
Packit	f0b94e	`return detail::AddU32ToHash(aHash, aA);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`template <typename A>`
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t AddToHash(uint32_t aHash, A* aA) {`
Packit	f0b94e	`/*`
Packit	f0b94e	`* You might think this function should just take a void*. But then we'd only`
Packit	f0b94e	`* catch data pointers and couldn't handle function pointers.`
Packit	f0b94e	`*/`
Packit	f0b94e
Packit	f0b94e	`static_assert(sizeof(aA) == sizeof(uintptr_t), "Strange pointer!");`
Packit	f0b94e
Packit	f0b94e	`return detail::AddUintptrToHash<sizeof(uintptr_t)>(aHash, uintptr_t(aA));`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`// We use AddUintptrToHash() for hashing all integral types. 8-byte integral`
Packit	f0b94e	`// types are treated the same as 64-bit pointers, and smaller integral types are`
Packit	f0b94e	`// first implicitly converted to 32 bits and then passed to AddUintptrToHash()`
Packit	f0b94e	`// to be hashed.`
Packit	f0b94e	`template`
Packit	f0b94e	`mozilla::IsIntegral<T>::value>::Type>`
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t AddToHash(uint32_t aHash, T aA) {`
Packit	f0b94e	`return detail::AddUintptrToHash<sizeof(T)>(aHash, aA);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`template <typename A, typename... Args>`
Packit	f0b94e	`MOZ_MUST_USE uint32_t AddToHash(uint32_t aHash, A aArg, Args... aArgs) {`
Packit	f0b94e	`return AddToHash(AddToHash(aHash, aArg), aArgs...);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* The HashGeneric class of functions let you hash one or more values.`
Packit	f0b94e	`*`
Packit	f0b94e	`* If you want to hash together two values x and y, calling HashGeneric(x, y) is`
Packit	f0b94e	`* much better than calling AddToHash(x, y), because AddToHash(x, y) assumes`
Packit	f0b94e	`* that x has already been hashed.`
Packit	f0b94e	`*/`
Packit	f0b94e	`template <typename... Args>`
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t HashGeneric(Args... aArgs) {`
Packit	f0b94e	`return AddToHash(0, aArgs...);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`namespace detail {`
Packit	f0b94e
Packit	f0b94e	`template <typename T>`
Packit	f0b94e	`uint32_t HashUntilZero(const T* aStr) {`
Packit	f0b94e	`uint32_t hash = 0;`
Packit	f0b94e	`for (T c; (c = *aStr); aStr++) {`
Packit	f0b94e	`hash = AddToHash(hash, c);`
Packit	f0b94e	`}`
Packit	f0b94e	`return hash;`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`template <typename T>`
Packit	f0b94e	`uint32_t HashKnownLength(const T* aStr, size_t aLength) {`
Packit	f0b94e	`uint32_t hash = 0;`
Packit	f0b94e	`for (size_t i = 0; i < aLength; i++) {`
Packit	f0b94e	`hash = AddToHash(hash, aStr[i]);`
Packit	f0b94e	`}`
Packit	f0b94e	`return hash;`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`} /* namespace detail */`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* The HashString overloads below do just what you'd expect.`
Packit	f0b94e	`*`
Packit	f0b94e	`* If you have the string's length, you might as well call the overload which`
Packit	f0b94e	`* includes the length. It may be marginally faster.`
Packit	f0b94e	`*/`
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t HashString(const char* aStr) {`
Packit	f0b94e	`return detail::HashUntilZero(reinterpret_cast<const unsigned char*>(aStr));`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t HashString(const char* aStr, size_t aLength) {`
Packit	f0b94e	`return detail::HashKnownLength(reinterpret_cast<const unsigned char*>(aStr),`
Packit	f0b94e	`aLength);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`MOZ_MUST_USE`
Packit	f0b94e	`inline uint32_t HashString(const unsigned char* aStr, size_t aLength) {`
Packit	f0b94e	`return detail::HashKnownLength(aStr, aLength);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t HashString(const char16_t* aStr) {`
Packit	f0b94e	`return detail::HashUntilZero(aStr);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t HashString(const char16_t* aStr, size_t aLength) {`
Packit	f0b94e	`return detail::HashKnownLength(aStr, aLength);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`/*`
Packit	f0b94e	`* On Windows, wchar_t is not the same as char16_t, even though it's`
Packit	f0b94e	`* the same width!`
Packit	f0b94e	`*/`
Packit	f0b94e	`#ifdef WIN32`
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t HashString(const wchar_t* aStr) {`
Packit	f0b94e	`return detail::HashUntilZero(aStr);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`MOZ_MUST_USE inline uint32_t HashString(const wchar_t* aStr, size_t aLength) {`
Packit	f0b94e	`return detail::HashKnownLength(aStr, aLength);`
Packit	f0b94e	`}`
Packit	f0b94e	`#endif`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* Hash some number of bytes.`
Packit	f0b94e	`*`
Packit	f0b94e	`* This hash walks word-by-word, rather than byte-by-byte, so you won't get the`
Packit	f0b94e	`* same result out of HashBytes as you would out of HashString.`
Packit	f0b94e	`*/`
Packit	f0b94e	`MOZ_MUST_USE extern MFBT_API uint32_t HashBytes(const void* bytes,`
Packit	f0b94e	`size_t aLength);`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* A pseudorandom function mapping 32-bit integers to 32-bit integers.`
Packit	f0b94e	`*`
Packit	f0b94e	`* This is for when you're feeding private data (like pointer values or credit`
Packit	f0b94e	`* card numbers) to a non-crypto hash function (like HashBytes) and then using`
Packit	f0b94e	`* the hash code for something that untrusted parties could observe (like a JS`
Packit	f0b94e	`* Map). Plug in a HashCodeScrambler before that last step to avoid leaking the`
Packit	f0b94e	`* private data.`
Packit	f0b94e	`*`
Packit	f0b94e	`* By itself, this does not prevent hash-flooding DoS attacks, because an`
Packit	f0b94e	`* attacker can still generate many values with exactly equal hash codes by`
Packit	f0b94e	`* attacking the non-crypto hash function alone. Equal hash codes will, of`
Packit	f0b94e	`* course, still be equal however much you scramble them.`
Packit	f0b94e	`*`
Packit	f0b94e	`* The algorithm is SipHash-1-3. See <https://131002.net/siphash/>.`
Packit	f0b94e	`*/`
Packit	f0b94e	`class HashCodeScrambler {`
Packit	f0b94e	`struct SipHasher;`
Packit	f0b94e
Packit	f0b94e	`uint64_t mK0, mK1;`
Packit	f0b94e
Packit	f0b94e	`public:`
Packit	f0b94e	`/** Creates a new scrambler with the given 128-bit key. */`
Packit	f0b94e	`constexpr HashCodeScrambler(uint64_t aK0, uint64_t aK1)`
Packit	f0b94e	`: mK0(aK0), mK1(aK1) {}`
Packit	f0b94e
Packit	f0b94e	`/**`
Packit	f0b94e	`* Scramble a hash code. Always produces the same result for the same`
Packit	f0b94e	`* combination of key and hash code.`
Packit	f0b94e	`*/`
Packit	f0b94e	`uint32_t scramble(uint32_t aHashCode) const {`
Packit	f0b94e	`SipHasher hasher(mK0, mK1);`
Packit	f0b94e	`return uint32_t(hasher.sipHash(aHashCode));`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`private:`
Packit	f0b94e	`struct SipHasher {`
Packit	f0b94e	`SipHasher(uint64_t aK0, uint64_t aK1) {`
Packit	f0b94e	`// 1. Initialization.`
Packit	f0b94e	`mV0 = aK0 ^ UINT64_C(0x736f6d6570736575);`
Packit	f0b94e	`mV1 = aK1 ^ UINT64_C(0x646f72616e646f6d);`
Packit	f0b94e	`mV2 = aK0 ^ UINT64_C(0x6c7967656e657261);`
Packit	f0b94e	`mV3 = aK1 ^ UINT64_C(0x7465646279746573);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`uint64_t sipHash(uint64_t aM) {`
Packit	f0b94e	`// 2. Compression.`
Packit	f0b94e	`mV3 ^= aM;`
Packit	f0b94e	`sipRound();`
Packit	f0b94e	`mV0 ^= aM;`
Packit	f0b94e
Packit	f0b94e	`// 3. Finalization.`
Packit	f0b94e	`mV2 ^= 0xff;`
Packit	f0b94e	`for (int i = 0; i < 3; i++) sipRound();`
Packit	f0b94e	`return mV0 ^ mV1 ^ mV2 ^ mV3;`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`MOZ_NO_SANITIZE_UNSIGNED_OVERFLOW`
Packit	f0b94e	`void sipRound() {`
Packit	f0b94e	`mV0 += mV1;`
Packit	f0b94e	`mV1 = RotateLeft(mV1, 13);`
Packit	f0b94e	`mV1 ^= mV0;`
Packit	f0b94e	`mV0 = RotateLeft(mV0, 32);`
Packit	f0b94e	`mV2 += mV3;`
Packit	f0b94e	`mV3 = RotateLeft(mV3, 16);`
Packit	f0b94e	`mV3 ^= mV2;`
Packit	f0b94e	`mV0 += mV3;`
Packit	f0b94e	`mV3 = RotateLeft(mV3, 21);`
Packit	f0b94e	`mV3 ^= mV0;`
Packit	f0b94e	`mV2 += mV1;`
Packit	f0b94e	`mV1 = RotateLeft(mV1, 17);`
Packit	f0b94e	`mV1 ^= mV2;`
Packit	f0b94e	`mV2 = RotateLeft(mV2, 32);`
Packit	f0b94e	`}`
Packit	f0b94e
Packit	f0b94e	`uint64_t mV0, mV1, mV2, mV3;`
Packit	f0b94e	`};`
Packit	f0b94e	`};`
Packit	f0b94e
Packit	f0b94e	`} /* namespace mozilla */`
Packit	f0b94e
Packit	f0b94e	`#endif /* mozilla_HashFunctions_h */`

source-git / mozjs60

Source Code

Blame mfbt/HashFunctions.h