aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2021-08-26 16:15:30 +0200
committerStefan Boberg <[email protected]>2021-08-26 16:15:30 +0200
commit647b43f4195bb6c0f172614bde8bb71e259c9cf3 (patch)
tree5b9f989c6da4dab08c5bbcf9bb56171b4bbfe90a
parentAdded GetLastErrorAsString() to retrieve error string for last Windows API er... (diff)
downloadzen-647b43f4195bb6c0f172614bde8bb71e259c9cf3.tar.xz
zen-647b43f4195bb6c0f172614bde8bb71e259c9cf3.zip
Added missing lock to side channel InsertChunk() implementation
Previously, this could cause file contention as two threads would try to create the same chunk file
-rw-r--r--zenstore/filecas.cpp45
-rw-r--r--zenstore/filecas.h1
-rw-r--r--zenstore/include/zenstore/CAS.h9
3 files changed, 49 insertions, 6 deletions
diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp
index 80b59ea90..845206741 100644
--- a/zenstore/filecas.cpp
+++ b/zenstore/filecas.cpp
@@ -13,17 +13,21 @@
#include <spdlog/spdlog.h>
#include <gsl/gsl-lite.hpp>
+#include <filesystem>
#include <functional>
#include <unordered_map>
+// clang-format off
+#include <zencore/prewindows.h>
+
struct IUnknown; // Workaround for "combaseapi.h(229): error C2187: syntax error: 'identifier' was unexpected here" when using /permissive-
#include <atlfile.h>
-#include <filesystem>
-
-// Used for getting My Documents for default CAS
-#include <ShlObj.h>
+#include <ShlObj.h> // Used for getting My Documents for default CAS
#pragma comment(lib, "shell32.lib")
+#include <zencore/postwindows.h>
+// clang-format on
+//
//////////////////////////////////////////////////////////////////////////
namespace zen {
@@ -76,6 +80,7 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
MakeShardedPath(ShardedPath, ChunkHash, /* out */ Shard2len);
auto DeletePayloadFileOnClose = [&] {
+ // This will cause the file to be deleted when the last handle to it is closed
FILE_DISPOSITION_INFO Fdi{};
Fdi.DeleteFile = TRUE;
BOOL Success = SetFileInformationByHandle(FileRef.FileHandle, FileDispositionInfo, &Fdi, sizeof Fdi);
@@ -90,6 +95,8 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
//
// Future improvement: maintain Bloom filter to avoid expensive file system probes?
+ RwLock::ExclusiveLockScope _(LockForHash(ChunkHash));
+
{
CAtlFile PayloadFile;
@@ -128,6 +135,9 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
if (!Success)
{
+ // The rename/move could fail because the target directory does not yet exist. This code attempts
+ // to create it
+
CAtlFile DirHandle;
auto InternalCreateDirectoryHandle = [&] {
@@ -138,6 +148,10 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
FILE_FLAG_BACKUP_SEMANTICS);
};
+ // It's possible for several threads to enter this logic trying to create the same
+ // directory. Only one will create the directory of course, but all threads will
+ // make it through okay
+
HRESULT hRes = InternalCreateDirectoryHandle();
if (FAILED(hRes))
@@ -164,7 +178,9 @@ FileCasStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
return CasStore::InsertResult{.New = true};
}
- spdlog::warn("rename of CAS payload file failed, falling back to regular write for {}", ChunkHash);
+ spdlog::warn("rename of CAS payload file failed ('{}'), falling back to regular write for insert of {}",
+ GetLastErrorAsString(),
+ ChunkHash);
DeletePayloadFileOnClose();
}
@@ -302,6 +318,14 @@ FileCasStrategy::HaveChunk(const IoHash& ChunkHash)
void
FileCasStrategy::FilterChunks(CasChunkSet& InOutChunks)
{
+ // NOTE: it's not a problem now, but in the future if a GC should happen while this
+ // is in flight, the result could be wrong since chunks could go away in the meantime.
+ //
+ // It would be good to have a pinning mechanism to make this less likely but
+ // given that chunks could go away at any point after the results are returned to
+ // a caller, this is something which needs to be taken into account by anyone consuming
+ // this functionality in any case
+
std::unordered_set<IoHash> HaveSet;
for (const IoHash& Hash : InOutChunks.GetChunkSet())
@@ -326,7 +350,16 @@ FileCasStrategy::Flush()
//
// Depending on what semantics we want Flush() to provide, it could be
// argued that this should just flush the volume which we are using to
- // store the CAS files on here?
+ // store the CAS files on here, to ensure metadata is flushed along
+ // with file data
+ //
+ // Related: to facilitate more targeted validation during recovery we could
+ // maintain a log of when chunks were created
+}
+
+void
+FileCasStrategy::GarbageCollect(GcContext& GcCtx)
+{
}
/**
diff --git a/zenstore/filecas.h b/zenstore/filecas.h
index 0213b52c3..448d1a05f 100644
--- a/zenstore/filecas.h
+++ b/zenstore/filecas.h
@@ -21,6 +21,7 @@ struct FileCasStrategy
bool HaveChunk(const IoHash& ChunkHash);
void FilterChunks(CasChunkSet& InOutChunks);
void Flush();
+ void GarbageCollect(GcContext& GcCtx);
private:
const CasStoreConfiguration& m_Config;
diff --git a/zenstore/include/zenstore/CAS.h b/zenstore/include/zenstore/CAS.h
index ec594af8b..b4de533dd 100644
--- a/zenstore/include/zenstore/CAS.h
+++ b/zenstore/include/zenstore/CAS.h
@@ -28,6 +28,15 @@ struct CasStoreConfiguration
uint64_t HugeValueThreshold = 1024 * 1024;
};
+/** Garbage Collection context object
+ */
+
+class GcContext
+{
+public:
+private:
+};
+
class CasChunkSet
{
public: