diff options
| author | Stefan Boberg <[email protected]> | 2021-09-19 23:19:06 +0200 |
|---|---|---|
| committer | Stefan Boberg <[email protected]> | 2021-09-19 23:19:06 +0200 |
| commit | 3cf9dedfd08fe4d7a049e51b14a937f7a34afce3 (patch) | |
| tree | d68b5203818665f6f54aceea586f550dc6aadca4 /zenstore/filecas.cpp | |
| parent | Added BasicFile::StreamFile helper function to support reading large files in... (diff) | |
| download | zen-3cf9dedfd08fe4d7a049e51b14a937f7a34afce3.tar.xz zen-3cf9dedfd08fe4d7a049e51b14a937f7a34afce3.zip | |
Implemended basic scrubbing / detection of disk corruption. Still needs more code to propagate errors and make adjustments to account for them in higher level data structures
Diffstat (limited to 'zenstore/filecas.cpp')
| -rw-r--r-- | zenstore/filecas.cpp | 95 |
1 files changed, 78 insertions, 17 deletions
diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp index 5fdf505d4..3314beb7e 100644 --- a/zenstore/filecas.cpp +++ b/zenstore/filecas.cpp @@ -10,6 +10,7 @@ #include <zencore/string.h> #include <zencore/thread.h> #include <zencore/uid.h> +#include <zenstore/basicfile.h> #include <gsl/gsl-lite.hpp> @@ -17,6 +18,7 @@ #include <functional> #include <unordered_map> +// clang-format off #include <zencore/prewindows.h> struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive- @@ -24,13 +26,19 @@ struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax erro #include <zencore/postwindows.h> // clang-format on -// -////////////////////////////////////////////////////////////////////////// namespace zen { using namespace fmt::literals; +FileCasStrategy::FileCasStrategy(const CasStoreConfiguration& Config) : m_Config(Config) +{ +} + +FileCasStrategy::~FileCasStrategy() +{ +} + WideStringBuilderBase& FileCasStrategy::MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHash& ChunkHash, size_t& OutShard2len) { @@ -56,7 +64,7 @@ FileCasStrategy::MakeShardedPath(WideStringBuilderBase& ShardedPath, const IoHas OutShard2len = ShardedPath.Size(); ShardedPath.Append('\\'); - ShardedPath.AppendAsciiRange(str + 6, str + 64); + ShardedPath.AppendAsciiRange(str + 5, str + 64); return ShardedPath; } @@ -259,12 +267,9 @@ FileCasStrategy::InsertChunk(const void* const ChunkData, const size_t ChunkSize } // We cannot rely on RAII to close the file handle since it would be closed - // *after* the lock is released due to the initialization order. + // *after* the lock is released due to the initialization order PayloadFile.Close(); - AtomicIncrement(m_Stats.PutCount); - AtomicAdd(m_Stats.PutBytes, ChunkSize); - return {.New = true}; } @@ -279,15 +284,7 @@ FileCasStrategy::FindChunk(const IoHash& ChunkHash) RwLock::SharedLockScope _(LockForHash(ChunkHash)); - auto Chunk = IoBufferBuilder::MakeFromFile(ShardedPath.c_str()); - - if (Chunk) - { - AtomicIncrement(m_Stats.GetCount); - AtomicAdd(m_Stats.GetBytes, Chunk.Size()); - } - - return Chunk; + return IoBufferBuilder::MakeFromFile(ShardedPath.c_str()); } bool @@ -338,6 +335,56 @@ FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks) } void +FileCasStrategy::IterateChunks(std::function<void(const IoHash& Hash, BasicFile& PayloadFile)>&& Callback) +{ + struct Visitor : public FileSystemTraversal::TreeVisitor + { + Visitor(const std::filesystem::path& RootDir) : RootDirectory(RootDir) {} + virtual void VisitFile(const std::filesystem::path& Parent, const std::wstring_view& File, uint64_t FileSize) override + { + std::filesystem::path RelPath = std::filesystem::relative(Parent, RootDirectory); + + std::wstring PathString = RelPath.native(); + + if ((PathString.size() == (3 + 2 + 1)) && (File.size() == (40 - 3 - 2))) + { + if (PathString.at(3) == std::filesystem::path::preferred_separator) + { + PathString.erase(3, 1); + } + PathString.append(File); + + StringBuilder<64> Utf8; + WideToUtf8(PathString, Utf8); + + // TODO: should validate that we're actually dealing with a valid hex string here + + IoHash NameHash = IoHash::FromHexString({Utf8.Data(), Utf8.Size()}); + + BasicFile PayloadFile; + std::error_code Ec; + PayloadFile.Open(Parent / File, false, Ec); + + if (!Ec) + { + Callback(NameHash, PayloadFile); + } + } + } + + virtual bool VisitDirectory(const std::filesystem::path& Parent, const std::wstring_view& DirectoryName) { return true; } + + const std::filesystem::path& RootDirectory; + std::function<void(const IoHash& Hash, BasicFile& PayloadFile)> Callback; + } CasVisitor{m_Config.RootDirectory}; + + CasVisitor.Callback = std::move(Callback); + + FileSystemTraversal Traversal; + Traversal.TraverseFileSystem(m_Config.RootDirectory, CasVisitor); +} + +void FileCasStrategy::Flush() { // Since we don't keep files open after writing there's nothing specific @@ -353,8 +400,22 @@ FileCasStrategy::Flush() } void -FileCasStrategy::Scrub() +FileCasStrategy::Scrub(ScrubContext& Ctx) { + std::vector<IoHash> BadHashes; + + IterateChunks([&](const IoHash& Hash, BasicFile& Payload) { + IoHashStream Hasher; + Payload.StreamFile([&](const void* Data, size_t Size) { Hasher.Append(Data, Size); }); + IoHash ComputedHash = Hasher.GetHash(); + + if (ComputedHash != Hash) + { + BadHashes.push_back(Hash); + } + }); + + Ctx.ReportBadChunks(BadHashes); } void |