UnleashedRecomp/tools/fshasher/fshasher.cpp
2024-12-13 21:21:58 +03:00

203 lines
7.3 KiB
C++

//
// fshasher - CLI tool to generate a hash map from a file system.
//
// This is free and unencumbered software released into the public domain.
//
// Anyone is free to copy, modify, publish, use, compile, sell, or
// distribute this software, either in source code form or as a compiled
// binary, for any purpose, commercial or non-commercial, and by any
// means.
//
// In jurisdictions that recognize copyright laws, the author or authors
// of this software dedicate any and all copyright interest in the
// software to the public domain. We make this dedication for the benefit
// of the public at large and to the detriment of our heirs and
// successors. We intend this dedication to be an overt act of
// relinquishment in perpetuity of all present and future rights to this
// software under copyright law.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
//
#include <cstdio>
#include <filesystem>
#include <fstream>
#include <iostream>
#include <list>
#include <map>
#include <set>
#include <xxh3.h>
#include "plainargs.h"
void showHelp() {
std::cout << "fshasher --directory <directory1 directory2 ...> --source <source file> --header <header file> --variable <variable name>" << std::endl;
}
int process(const std::list<std::filesystem::path> &searchDirectories, std::ofstream &outputSourceStream, std::ofstream &outputHeaderStream, const std::string &variableName) {
auto writeExterns = [&](std::ofstream &outputStream)
{
outputStream << "extern const uint64_t " << variableName << "Hashes[];" << std::endl;
outputStream << "extern const std::pair<const char *, uint32_t> " << variableName << "Files[];" << std::endl;
outputStream << "extern const size_t " << variableName << "FilesSize;" << std::endl << std::endl;
};
// Generate header.
outputHeaderStream << "// File automatically generated by fshasher" << std::endl << std::endl;
outputHeaderStream << "#pragma once" << std::endl << std::endl;
outputHeaderStream << "#include <utility>" << std::endl << std::endl;
writeExterns(outputHeaderStream);
if (outputHeaderStream.bad())
{
std::cerr << "Failed to write to output header." << std::endl;
return 1;
}
outputSourceStream << "// File automatically generated by fshasher" << std::endl << std::endl;
outputSourceStream << "#include <utility>" << std::endl << std::endl;
writeExterns(outputSourceStream);
std::map<std::u8string, std::set<uint64_t>> fileHashSets;
char fileData[65536];
XXH3_state_t xxh3;
for (const std::filesystem::path &searchDirectory : searchDirectories)
{
if (!std::filesystem::is_directory(searchDirectory))
{
std::cerr << "Specified directory " << searchDirectory << " does not exist." << std::endl;
return 1;
}
for (const std::filesystem::directory_entry &entry : std::filesystem::recursive_directory_iterator(searchDirectory))
{
if (!entry.is_regular_file())
{
continue;
}
std::filesystem::path entryPath = entry.path();
std::filesystem::path entryRelative = std::filesystem::relative(entryPath, searchDirectory);
std::ifstream entryStream(entryPath, std::ios::binary);
if (!entryStream.is_open())
{
std::cerr << "Could not open " << entryPath << " for reading." << std::endl;
return 1;
}
std::cout << "Reading " << entryRelative << "." << std::endl;
XXH3_64bits_reset(&xxh3);
while (!entryStream.eof() && !entryStream.bad())
{
entryStream.read(fileData, sizeof(fileData));
XXH3_64bits_update(&xxh3, fileData, entryStream.gcount());
}
if (entryStream.bad())
{
std::cerr << "Could not read " << entryPath << " successfully." << std::endl;
return 1;
}
std::u8string entryRelativeU8 = entryRelative.u8string();
std::replace(entryRelativeU8.begin(), entryRelativeU8.end(), '\\', '/');
fileHashSets[entryRelativeU8].insert(XXH3_64bits_digest(&xxh3));
}
}
outputSourceStream << "const uint64_t " << variableName << "Hashes[] = {" << std::endl;
for (auto &it : fileHashSets)
{
for (uint64_t hash : it.second)
{
outputSourceStream << " " << hash << "ULL," << std::endl;
}
if (outputSourceStream.bad())
{
std::cerr << "Failed to write to output source." << std::endl;
return 1;
}
}
outputSourceStream << "};" << std::endl << std::endl;
outputSourceStream << "const std::pair<const char *, uint32_t> " << variableName << "Files[] = {" << std::endl;
for (const auto &it : fileHashSets)
{
outputSourceStream << " { \"" << (const char *)(it.first.c_str()) << "\", " << it.second.size() << " }," << std::endl;
if (outputSourceStream.bad())
{
std::cerr << "Failed to write to output source." << std::endl;
return 1;
}
}
outputSourceStream << "};" << std::endl << std::endl;
outputSourceStream << "const size_t " << variableName << "FilesSize = std::size(" << variableName << "Files);" << std::endl;
if (outputSourceStream.bad())
{
std::cerr << "Failed to write to output source." << std::endl;
return 1;
}
return 0;
}
int main(int argc, char *argv[])
{
plainargs::Result argsResult = plainargs::parse(argc, argv);
std::vector<std::string> directories = argsResult.getValues("directory", "d");
std::string variable = argsResult.getValue("variable", "v");
std::string source = argsResult.getValue("source", "s");
std::string header = argsResult.getValue("header", "h");
if (directories.empty() || variable.empty() || source.empty() || header.empty())
{
showHelp();
return 1;
}
std::filesystem::path sourcePath(source);
std::ofstream sourceStream(sourcePath);
if (!sourceStream.is_open())
{
std::cerr << "Could not open " << sourcePath << " for writing." << std::endl;
return 1;
}
std::filesystem::path headerPath(header);
std::ofstream headerStream(headerPath);
if (!headerStream.is_open())
{
std::cerr << "Could not open " << headerPath << " for writing." << std::endl;
return 1;
}
std::list<std::filesystem::path> searchDirectories;
for (std::string &directory : directories)
{
searchDirectories.emplace_back(directory);
}
int resultCode = process(searchDirectories, sourceStream, headerStream, variable);
sourceStream.close();
headerStream.close();
if (resultCode != 0)
{
std::cerr << "Failed to generate " << sourcePath << "and" << headerPath << "." << std::endl;
std::filesystem::remove(sourcePath);
std::filesystem::remove(headerPath);
}
return resultCode;
}