From d04d4ab4991c17619192ed234b5817fcac7f2000 Mon Sep 17 00:00:00 2001 From: elsid Date: Mon, 15 Nov 2021 14:48:13 +0100 Subject: [PATCH 1/4] Add MurmurHash3_x64_128 implementation Sources: https://github.com/aappleby/smhasher/blob/92cf3702fcfaadc84eb7bef59825a23e0cd84f56/src/MurmurHash3.h https://github.com/aappleby/smhasher/blob/92cf3702fcfaadc84eb7bef59825a23e0cd84f56/src/MurmurHash3.cpp Other hash functions and unused implementation details are removed. --- extern/CMakeLists.txt | 2 + extern/smhasher/CMakeLists.txt | 2 + extern/smhasher/MurmurHash3.cpp | 148 ++++++++++++++++++++++++++++++++ extern/smhasher/MurmurHash3.h | 33 +++++++ 4 files changed, 185 insertions(+) create mode 100644 extern/smhasher/CMakeLists.txt create mode 100644 extern/smhasher/MurmurHash3.cpp create mode 100644 extern/smhasher/MurmurHash3.h diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt index 59d3d15176..0bfdc4c233 100644 --- a/extern/CMakeLists.txt +++ b/extern/CMakeLists.txt @@ -208,3 +208,5 @@ if (NOT OPENMW_USE_SYSTEM_SQLITE3) set(SQLite3_INCLUDE_DIR ${sqlite3_SOURCE_DIR}/ PARENT_SCOPE) set(SQLite3_LIBRARY sqlite3 PARENT_SCOPE) endif() + +add_subdirectory(smhasher) diff --git a/extern/smhasher/CMakeLists.txt b/extern/smhasher/CMakeLists.txt new file mode 100644 index 0000000000..ee03e6c38e --- /dev/null +++ b/extern/smhasher/CMakeLists.txt @@ -0,0 +1,2 @@ +add_library(smhasher STATIC MurmurHash3.cpp) +target_include_directories(smhasher INTERFACE .) diff --git a/extern/smhasher/MurmurHash3.cpp b/extern/smhasher/MurmurHash3.cpp new file mode 100644 index 0000000000..62be4f3cef --- /dev/null +++ b/extern/smhasher/MurmurHash3.cpp @@ -0,0 +1,148 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +// Note - The x86 and x64 versions do _not_ produce the same results, as the +// algorithms are optimized for their respective platforms. You can still +// compile and run any of them on any platform, but your performance with the +// non-native version will be less than optimal. + +#include "MurmurHash3.h" + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) + +#define FORCE_INLINE __forceinline + +#include + +#define ROTL64(x,y) _rotl64(x,y) + +#define BIG_CONSTANT(x) (x) + +// Other compilers + +#else // defined(_MSC_VER) + +#define FORCE_INLINE inline __attribute__((always_inline)) + +inline uint64_t rotl64 ( uint64_t x, int8_t r ) +{ + return (x << r) | (x >> (64 - r)); +} + +#define ROTL64(x,y) rotl64(x,y) + +#define BIG_CONSTANT(x) (x##LLU) + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- +// Block read - if your platform needs to do endian-swapping or can only +// handle aligned reads, do the conversion here + +FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) +{ + return p[i]; +} + +//---------- + +FORCE_INLINE uint64_t fmix64 ( uint64_t k ) +{ + k ^= k >> 33; + k *= BIG_CONSTANT(0xff51afd7ed558ccd); + k ^= k >> 33; + k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); + k ^= k >> 33; + + return k; +} + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128 ( const void * key, const int len, + const uint32_t seed, void * out ) +{ + const uint8_t * data = (const uint8_t*)key; + const int nblocks = len / 16; + + uint64_t h1 = seed; + uint64_t h2 = seed; + + const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); + const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); + + //---------- + // body + + const uint64_t * blocks = (const uint64_t *)(data); + + for(int i = 0; i < nblocks; i++) + { + uint64_t k1 = getblock64(blocks,i*2+0); + uint64_t k2 = getblock64(blocks,i*2+1); + + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + + h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; + + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; + } + + //---------- + // tail + + const uint8_t * tail = (const uint8_t*)(data + nblocks*16); + + uint64_t k1 = 0; + uint64_t k2 = 0; + + switch(len & 15) + { + case 15: k2 ^= ((uint64_t)tail[14]) << 48; + case 14: k2 ^= ((uint64_t)tail[13]) << 40; + case 13: k2 ^= ((uint64_t)tail[12]) << 32; + case 12: k2 ^= ((uint64_t)tail[11]) << 24; + case 11: k2 ^= ((uint64_t)tail[10]) << 16; + case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; + case 9: k2 ^= ((uint64_t)tail[ 8]) << 0; + k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2; + + case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; + case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; + case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; + case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; + case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; + case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; + case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; + case 1: k1 ^= ((uint64_t)tail[ 0]) << 0; + k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1; + }; + + //---------- + // finalization + + h1 ^= len; h2 ^= len; + + h1 += h2; + h2 += h1; + + h1 = fmix64(h1); + h2 = fmix64(h2); + + h1 += h2; + h2 += h1; + + ((uint64_t*)out)[0] = h1; + ((uint64_t*)out)[1] = h2; +} + +//----------------------------------------------------------------------------- + diff --git a/extern/smhasher/MurmurHash3.h b/extern/smhasher/MurmurHash3.h new file mode 100644 index 0000000000..5a7ed73f33 --- /dev/null +++ b/extern/smhasher/MurmurHash3.h @@ -0,0 +1,33 @@ +//----------------------------------------------------------------------------- +// MurmurHash3 was written by Austin Appleby, and is placed in the public +// domain. The author hereby disclaims copyright to this source code. + +#ifndef _MURMURHASH3_H_ +#define _MURMURHASH3_H_ + +//----------------------------------------------------------------------------- +// Platform-specific functions and macros + +// Microsoft Visual Studio + +#if defined(_MSC_VER) && (_MSC_VER < 1600) + +typedef unsigned char uint8_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; + +// Other compilers + +#else // defined(_MSC_VER) + +#include + +#endif // !defined(_MSC_VER) + +//----------------------------------------------------------------------------- + +void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); + +//----------------------------------------------------------------------------- + +#endif // _MURMURHASH3_H_ From 86bf9d5b8df6b2c136dc9429376a8671c2571d06 Mon Sep 17 00:00:00 2001 From: elsid Date: Mon, 15 Nov 2021 14:50:04 +0100 Subject: [PATCH 2/4] Support 128bit seed for MurmurHash3_x64_128 --- extern/smhasher/MurmurHash3.cpp | 6 +++--- extern/smhasher/MurmurHash3.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/extern/smhasher/MurmurHash3.cpp b/extern/smhasher/MurmurHash3.cpp index 62be4f3cef..69d8d6c773 100644 --- a/extern/smhasher/MurmurHash3.cpp +++ b/extern/smhasher/MurmurHash3.cpp @@ -66,13 +66,13 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k ) //----------------------------------------------------------------------------- void MurmurHash3_x64_128 ( const void * key, const int len, - const uint32_t seed, void * out ) + const uint64_t * seed, void * out ) { const uint8_t * data = (const uint8_t*)key; const int nblocks = len / 16; - uint64_t h1 = seed; - uint64_t h2 = seed; + uint64_t h1 = seed[0]; + uint64_t h2 = seed[1]; const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); diff --git a/extern/smhasher/MurmurHash3.h b/extern/smhasher/MurmurHash3.h index 5a7ed73f33..8aebdc304d 100644 --- a/extern/smhasher/MurmurHash3.h +++ b/extern/smhasher/MurmurHash3.h @@ -26,7 +26,7 @@ typedef unsigned __int64 uint64_t; //----------------------------------------------------------------------------- -void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); +void MurmurHash3_x64_128 ( const void * key, int len, const uint64_t * seed, void * out ); //----------------------------------------------------------------------------- From f85053d78c7d21484bcf49d75ab2a9fce5ce79f3 Mon Sep 17 00:00:00 2001 From: elsid Date: Mon, 15 Nov 2021 16:57:58 +0100 Subject: [PATCH 3/4] Support unaligned blocks --- extern/smhasher/MurmurHash3.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extern/smhasher/MurmurHash3.cpp b/extern/smhasher/MurmurHash3.cpp index 69d8d6c773..c8b774bab9 100644 --- a/extern/smhasher/MurmurHash3.cpp +++ b/extern/smhasher/MurmurHash3.cpp @@ -9,6 +9,8 @@ #include "MurmurHash3.h" +#include + //----------------------------------------------------------------------------- // Platform-specific functions and macros @@ -47,7 +49,9 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r ) FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) { - return p[i]; + uint64_t result = 0; + std::memcpy(&result, p + i, sizeof(result)); + return result; } //---------- From a665a38aca554515b7e6e2d071183ba325134317 Mon Sep 17 00:00:00 2001 From: elsid Date: Mon, 15 Nov 2021 17:40:22 +0100 Subject: [PATCH 4/4] Use MurmurHash3_x64_128 for file hash --- apps/openmw_test_suite/CMakeLists.txt | 2 + apps/openmw_test_suite/files/hash.cpp | 55 +++++++++++++++++++ .../nifloader/testbulletnifloader.cpp | 4 +- components/CMakeLists.txt | 1 + components/files/hash.cpp | 19 ++++--- components/files/hash.hpp | 3 +- components/nif/niffile.cpp | 3 +- components/nif/niffile.hpp | 6 +- components/nifosg/nifloader.cpp | 4 +- components/resource/bulletshape.hpp | 3 +- components/resource/bulletshapemanager.cpp | 5 +- components/resource/scenemanager.cpp | 4 +- 12 files changed, 85 insertions(+), 24 deletions(-) create mode 100644 apps/openmw_test_suite/files/hash.cpp diff --git a/apps/openmw_test_suite/CMakeLists.txt b/apps/openmw_test_suite/CMakeLists.txt index 29564ef191..ed1d6c413b 100644 --- a/apps/openmw_test_suite/CMakeLists.txt +++ b/apps/openmw_test_suite/CMakeLists.txt @@ -62,6 +62,8 @@ if (GTEST_FOUND AND GMOCK_FOUND) esmloader/load.cpp esmloader/esmdata.cpp + + files/hash.cpp ) source_group(apps\\openmw_test_suite FILES openmw_test_suite.cpp ${UNITTEST_SRC_FILES}) diff --git a/apps/openmw_test_suite/files/hash.cpp b/apps/openmw_test_suite/files/hash.cpp new file mode 100644 index 0000000000..e6dbc8f6cc --- /dev/null +++ b/apps/openmw_test_suite/files/hash.cpp @@ -0,0 +1,55 @@ +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace +{ + using namespace testing; + using namespace Files; + + struct Params + { + std::size_t mSize; + std::array mHash; + }; + + struct FilesGetHash : TestWithParam {}; + + TEST_P(FilesGetHash, shouldReturnHashForStringStream) + { + const std::string fileName = "fileName"; + std::string content; + std::fill_n(std::back_inserter(content), GetParam().mSize, 'a'); + std::istringstream stream(content); + EXPECT_EQ(getHash(fileName, stream), GetParam().mHash); + } + + TEST_P(FilesGetHash, shouldReturnHashForConstrainedFileStream) + { + std::string fileName(UnitTest::GetInstance()->current_test_info()->name()); + std::replace(fileName.begin(), fileName.end(), '/', '_'); + std::string content; + std::fill_n(std::back_inserter(content), GetParam().mSize, 'a'); + std::fstream(fileName, std::ios_base::out | std::ios_base::binary) + .write(content.data(), static_cast(content.size())); + const auto stream = Files::openConstrainedFileStream(fileName.data(), 0, content.size()); + EXPECT_EQ(getHash(fileName, *stream), GetParam().mHash); + } + + INSTANTIATE_TEST_SUITE_P(Params, FilesGetHash, Values( + Params {0, {0, 0}}, + Params {1, {9607679276477937801ull, 16624257681780017498ull}}, + Params {128, {15287858148353394424ull, 16818615825966581310ull}}, + Params {1000, {11018119256083894017ull, 6631144854802791578ull}}, + Params {4096, {11972283295181039100ull, 16027670129106775155ull}}, + Params {4097, {16717956291025443060ull, 12856404199748778153ull}}, + Params {5000, {15775925571142117787ull, 10322955217889622896ull}} + )); +} diff --git a/apps/openmw_test_suite/nifloader/testbulletnifloader.cpp b/apps/openmw_test_suite/nifloader/testbulletnifloader.cpp index 92aece9075..8fbf5c1b5b 100644 --- a/apps/openmw_test_suite/nifloader/testbulletnifloader.cpp +++ b/apps/openmw_test_suite/nifloader/testbulletnifloader.cpp @@ -335,7 +335,7 @@ namespace MOCK_METHOD(void, setUseSkinning, (bool), (override)); MOCK_METHOD(bool, getUseSkinning, (), (const, override)); MOCK_METHOD(std::string, getFilename, (), (const, override)); - MOCK_METHOD(std::uint64_t, getHash, (), (const, override)); + MOCK_METHOD(std::string, getHash, (), (const, override)); MOCK_METHOD(unsigned int, getVersion, (), (const, override)); MOCK_METHOD(unsigned int, getUserVersion, (), (const, override)); MOCK_METHOD(unsigned int, getBethVersion, (), (const, override)); @@ -382,7 +382,7 @@ namespace ), btVector3(4, 8, 12) }; - const std::uint64_t mHash = 42; + const std::string mHash = "hash"; TestBulletNifLoader() { diff --git a/components/CMakeLists.txt b/components/CMakeLists.txt index 56371a8c02..3fc9cb92a1 100644 --- a/components/CMakeLists.txt +++ b/components/CMakeLists.txt @@ -289,6 +289,7 @@ target_link_libraries(components Base64 SQLite::SQLite3 + smhasher ) target_link_libraries(components ${BULLET_LIBRARIES}) diff --git a/components/files/hash.cpp b/components/files/hash.cpp index 91af1e9283..079a169ae5 100644 --- a/components/files/hash.cpp +++ b/components/files/hash.cpp @@ -1,17 +1,17 @@ #include "hash.hpp" -#include +#include +#include #include -#include #include #include namespace Files { - std::uint64_t getHash(const std::string& fileName, std::istream& stream) + std::array getHash(const std::string& fileName, std::istream& stream) { - std::uint64_t hash = std::hash {}(fileName); + std::array hash {0, 0}; try { const auto start = stream.tellg(); @@ -19,9 +19,14 @@ namespace Files stream.exceptions(std::ios_base::badbit); while (stream) { - std::uint64_t value = 0; - stream.read(reinterpret_cast(&value), sizeof(value)); - Misc::hashCombine(hash, value); + std::array value; + stream.read(value.data(), value.size()); + const std::streamsize read = stream.gcount(); + if (read == 0) + break; + std::array blockHash {0, 0}; + MurmurHash3_x64_128(value.data(), static_cast(read), hash.data(), blockHash.data()); + hash = blockHash; } stream.exceptions(exceptions); stream.clear(); diff --git a/components/files/hash.hpp b/components/files/hash.hpp index 46784ee9be..13d56d5824 100644 --- a/components/files/hash.hpp +++ b/components/files/hash.hpp @@ -1,13 +1,14 @@ #ifndef COMPONENTS_FILES_HASH_H #define COMPONENTS_FILES_HASH_H +#include #include #include #include namespace Files { - std::uint64_t getHash(const std::string& fileName, std::istream& stream); + std::array getHash(const std::string& fileName, std::istream& stream); } #endif diff --git a/components/nif/niffile.cpp b/components/nif/niffile.cpp index f11b75d218..f70b9024f9 100644 --- a/components/nif/niffile.cpp +++ b/components/nif/niffile.cpp @@ -173,7 +173,8 @@ std::string NIFFile::printVersion(unsigned int version) void NIFFile::parse(Files::IStreamPtr stream) { - hash = Files::getHash(filename, *stream); + const std::array fileHash = Files::getHash(filename, *stream); + hash.append(reinterpret_cast(fileHash.data()), fileHash.size() * sizeof(std::uint64_t)); NIFStream nif (this, stream); diff --git a/components/nif/niffile.hpp b/components/nif/niffile.hpp index eb851a74ff..6884f51d58 100644 --- a/components/nif/niffile.hpp +++ b/components/nif/niffile.hpp @@ -34,7 +34,7 @@ struct File virtual std::string getFilename() const = 0; - virtual std::uint64_t getHash() const = 0; + virtual std::string getHash() const = 0; virtual unsigned int getVersion() const = 0; @@ -52,7 +52,7 @@ class NIFFile final : public File /// File name, used for error messages and opening the file std::string filename; - std::uint64_t hash = 0; + std::string hash; /// Record list std::vector records; @@ -144,7 +144,7 @@ public: /// Get the name of the file std::string getFilename() const override { return filename; } - std::uint64_t getHash() const override { return hash; } + std::string getHash() const override { return hash; } /// Get the version of the NIF format used unsigned int getVersion() const override { return ver; } diff --git a/components/nifosg/nifloader.cpp b/components/nifosg/nifloader.cpp index 91d2300161..99aaaa3323 100644 --- a/components/nifosg/nifloader.cpp +++ b/components/nifosg/nifloader.cpp @@ -325,9 +325,7 @@ namespace NifOsg if (!textkeys->mTextKeys.empty()) created->getOrCreateUserDataContainer()->addUserObject(textkeys); - const std::uint64_t nifHash = nif->getHash(); - created->setUserValue(Misc::OsgUserValues::sFileHash, - std::string(reinterpret_cast(&nifHash), sizeof(nifHash))); + created->setUserValue(Misc::OsgUserValues::sFileHash, nif->getHash()); return created; } diff --git a/components/resource/bulletshape.hpp b/components/resource/bulletshape.hpp index 6dfa37aeda..cd8922ec8e 100644 --- a/components/resource/bulletshape.hpp +++ b/components/resource/bulletshape.hpp @@ -1,6 +1,7 @@ #ifndef OPENMW_COMPONENTS_RESOURCE_BULLETSHAPE_H #define OPENMW_COMPONENTS_RESOURCE_BULLETSHAPE_H +#include #include #include @@ -53,7 +54,7 @@ namespace Resource std::map mAnimatedShapes; std::string mFileName; - std::uint64_t mFileHash = 0; + std::string mFileHash; void setLocalScaling(const btVector3& scale); diff --git a/components/resource/bulletshapemanager.cpp b/components/resource/bulletshapemanager.cpp index 3803fbf669..da4672757a 100644 --- a/components/resource/bulletshapemanager.cpp +++ b/components/resource/bulletshapemanager.cpp @@ -169,10 +169,7 @@ osg::ref_ptr BulletShapeManager::getShape(const std::string & if (shape != nullptr) { shape->mFileName = normalized; - std::string fileHash; - constNode->getUserValue(Misc::OsgUserValues::sFileHash, fileHash); - if (!fileHash.empty()) - std::memcpy(&shape->mFileHash, fileHash.data(), std::min(fileHash.size(), sizeof(shape->mFileHash))); + constNode->getUserValue(Misc::OsgUserValues::sFileHash, shape->mFileHash); } } diff --git a/components/resource/scenemanager.cpp b/components/resource/scenemanager.cpp index 16d942ecec..5f2d78d2ed 100644 --- a/components/resource/scenemanager.cpp +++ b/components/resource/scenemanager.cpp @@ -506,7 +506,7 @@ namespace Resource options->setReadFileCallback(new ImageReadCallback(imageManager)); if (ext == "dae") options->setOptionString("daeUseSequencedTextureUnits"); - const std::uint64_t fileHash = Files::getHash(normalizedFilename, model); + const std::array fileHash = Files::getHash(normalizedFilename, model); osgDB::ReaderWriter::ReadResult result = reader->readNode(model, options); if (!result.success()) @@ -538,7 +538,7 @@ namespace Resource } node->setUserValue(Misc::OsgUserValues::sFileHash, - std::string(reinterpret_cast(&fileHash), sizeof(fileHash))); + std::string(reinterpret_cast(fileHash.data()), fileHash.size() * sizeof(std::uint64_t))); return node; }