aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Boberg <[email protected]>2022-04-25 11:59:45 +0200
committerStefan Boberg <[email protected]>2022-04-25 11:59:45 +0200
commit21c9ec50b57821d150a3ba61ccff51a55383e031 (patch)
tree2acf72b77c32cc3dc3e3544c1767953acd2848f2
parentAdded option to use Catch2 framework for tests (diff)
parentCompute tweaks (#78) (diff)
downloadzen-21c9ec50b57821d150a3ba61ccff51a55383e031.tar.xz
zen-21c9ec50b57821d150a3ba61ccff51a55383e031.zip
Merge from main
-rw-r--r--.github/workflows/self_host_build.yml134
-rw-r--r--.github/workflows/update_release.yml124
-rw-r--r--.pre-commit-config.yaml28
-rw-r--r--.unreal-header1
-rw-r--r--CODING.md6
-rw-r--r--README.md7
-rw-r--r--docs/cpp-coding/00-Table_of_Contents.md15
-rw-r--r--docs/cpp-coding/01-Preface.md16
-rw-r--r--docs/cpp-coding/02-Use_the_Tools_Available.md415
-rw-r--r--docs/cpp-coding/03-Style.md457
-rw-r--r--docs/cpp-coding/04-Considering_Safety.md145
-rw-r--r--docs/cpp-coding/05-Considering_Maintainability.md58
-rw-r--r--docs/cpp-coding/06-Considering_Portability.md21
-rw-r--r--docs/cpp-coding/07-Considering_Threadability.md30
-rw-r--r--docs/cpp-coding/08-Considering_Performance.md356
-rw-r--r--docs/cpp-coding/09-Considering_Correctness.md30
-rw-r--r--docs/cpp-coding/10-Enable_Scripting.md12
-rw-r--r--docs/cpp-coding/11-Further_Reading.md29
-rw-r--r--docs/cpp-coding/12-Final_Thoughts.md4
-rw-r--r--docs/cpp-coding/SUMMARY.md15
-rw-r--r--prepare_commit.bat1
-rw-r--r--scripts/formatcode.py138
-rw-r--r--scripts/remote_build.py3
-rw-r--r--zen/zen.cpp2
-rw-r--r--zencore-test/zencore-test.cpp2
-rw-r--r--zencore/compactbinary.cpp19
-rw-r--r--zencore/filesystem.cpp40
-rw-r--r--zencore/include/zencore/compactbinary.h1
-rw-r--r--zencore/include/zencore/filesystem.h4
-rw-r--r--zencore/include/zencore/string.h105
-rw-r--r--zencore/iobuffer.cpp5
-rw-r--r--zenserver-test/zenserver-test.cpp2
-rw-r--r--zenserver/cache/structuredcachestore.cpp60
-rw-r--r--zenserver/compute/function.cpp23
-rw-r--r--zenserver/config.cpp73
-rw-r--r--zenserver/config.h9
-rw-r--r--zenserver/projectstore.cpp58
-rw-r--r--zenserver/testing/launch.cpp8
-rw-r--r--zenserver/upstream/hordecompute.cpp1374
-rw-r--r--zenserver/upstream/jupiter.cpp46
-rw-r--r--zenserver/upstream/jupiter.h7
-rw-r--r--zenserver/upstream/upstreamapply.cpp1321
-rw-r--r--zenserver/upstream/upstreamapply.h45
-rw-r--r--zenserver/zenserver.cpp25
-rw-r--r--zenstore-test/zenstore-test.cpp8
-rw-r--r--zenstore/basicfile.cpp169
-rw-r--r--zenstore/blockstore.cpp242
-rw-r--r--zenstore/cas.cpp8
-rw-r--r--zenstore/caslog.cpp46
-rw-r--r--zenstore/cidstore.cpp38
-rw-r--r--zenstore/compactcas.cpp2456
-rw-r--r--zenstore/compactcas.h90
-rw-r--r--zenstore/filecas.cpp73
-rw-r--r--zenstore/gc.cpp414
-rw-r--r--zenstore/include/zenstore/basicfile.h17
-rw-r--r--zenstore/include/zenstore/blockstore.h104
-rw-r--r--zenstore/include/zenstore/cas.h2
-rw-r--r--zenstore/include/zenstore/caslog.h40
-rw-r--r--zenstore/include/zenstore/gc.h4
-rw-r--r--zenstore/zenstore.cpp2
-rw-r--r--zenutil/zenserverprocess.cpp4
61 files changed, 5370 insertions, 3621 deletions
diff --git a/.github/workflows/self_host_build.yml b/.github/workflows/self_host_build.yml
new file mode 100644
index 000000000..2645f9738
--- /dev/null
+++ b/.github/workflows/self_host_build.yml
@@ -0,0 +1,134 @@
+name: Validate Build
+
+on:
+ pull_request:
+ types: [opened, reopened]
+ branches: [ main ]
+
+jobs:
+ clang-format:
+ name: Check clang-format
+ runs-on: [self-hosted, linux, x64]
+ strategy:
+ matrix:
+ path:
+ - 'zen'
+ - 'zencore'
+ - 'zencore-test'
+ - 'zenhttp'
+ - 'zenserver-test'
+ - 'zenstore'
+ - 'zenstore-test'
+ - 'zentest-appstub'
+ - 'zenutil'
+ - 'zenserver'
+ steps:
+ - uses: actions/checkout@v2
+ - name: clang-format ${{ matrix.path }}
+ uses: jidicula/[email protected]
+ with:
+ clang-format-version: '13'
+ check-path: ${{ matrix.path }}
+
+ windows-build:
+ name: Build Windows
+ needs: clang-format
+ runs-on: [self-hosted, windows, x64]
+ strategy:
+ matrix:
+ config:
+ - 'debug'
+ - 'release'
+ arch:
+ - 'x64'
+ env:
+ VCPKG_VERSION: 2022.03.10
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Setup xmake
+ uses: xmake-io/github-action-setup-xmake@v1
+ with:
+ xmake-version: 2.6.4
+
+ - name: Installing vcpkg
+ run: |
+ git clone -b ${{env.VCPKG_VERSION}} --single-branch https://github.com/Microsoft/vcpkg.git .vcpkg
+ cd .vcpkg
+ .\bootstrap-vcpkg.bat
+ .\vcpkg.exe integrate install
+ cd ..
+
+ - name: Cache vcpkg
+ uses: actions/cache@v2
+ with:
+ path: |
+ ${{ github.workspace }}\.vcpkg\installed
+ key: ${{ runner.os }}-${{ matrix.config }}-${{env.VCPKG_VERSION}}-${{ hashFiles('xmake.lua') }}-${{ matrix.arch }}-v5
+
+ - name: Config
+ run: |
+ xmake config -v -y -m ${{ matrix.config }} --arch=${{ matrix.arch }}
+ env:
+ VCPKG_ROOT: ${{ github.workspace }}/.vcpkg
+
+ - name: Build & Test
+ run: |
+ xmake test -v -y
+ env:
+ VCPKG_ROOT: ${{ github.workspace }}/.vcpkg
+
+ linux-build:
+ name: Build Linux
+ needs: clang-format
+ runs-on: [self-hosted, linux, x64]
+ strategy:
+ matrix:
+ config:
+ - 'debug'
+ - 'release'
+ arch:
+ - 'x86_64'
+ env:
+ VCPKG_VERSION: 2022.03.10
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Set up GCC 11
+ uses: egor-tensin/setup-gcc@v1
+ with:
+ version: 11
+ platform: x64
+
+ - name: Setup xmake
+ uses: xmake-io/github-action-setup-xmake@v1
+ with:
+ xmake-version: 2.6.4
+
+ - name: Installing vcpkg
+ run: |
+ git clone -b ${{env.VCPKG_VERSION}} --single-branch https://github.com/Microsoft/vcpkg.git .vcpkg
+ cd .vcpkg
+ ./bootstrap-vcpkg.sh
+ cd ..
+
+ - name: Cache vcpkg
+ uses: actions/cache@v2
+ with:
+ path: |
+ ${{ github.workspace }}/.vcpkg/installed
+ key: ${{ runner.os }}-${{ matrix.config }}-${{env.VCPKG_VERSION}}-${{ hashFiles('xmake.lua') }}-${{ matrix.arch }}-v5
+
+ - name: Config
+ run: |
+ xmake config -v -y -m ${{ matrix.config }} --arch=${{ matrix.arch }}
+ env:
+ VCPKG_ROOT: ${{ github.workspace }}/.vcpkg
+
+ - name: Build & Test
+ run: |
+ xmake test -v -y
+ env:
+ VCPKG_ROOT: ${{ github.workspace }}/.vcpkg
diff --git a/.github/workflows/update_release.yml b/.github/workflows/update_release.yml
new file mode 100644
index 000000000..62568d1c0
--- /dev/null
+++ b/.github/workflows/update_release.yml
@@ -0,0 +1,124 @@
+name: Build release
+
+on:
+ # push
+ pull_request:
+ types: [closed]
+ branches: [ main ]
+
+jobs:
+ windows-build:
+ # if: github.event.pull_request.merged == true
+ name: Build Windows
+ runs-on: [self-hosted, windows, x64]
+ strategy:
+ matrix:
+ config:
+ - 'release'
+ arch:
+ - 'x64'
+ env:
+ VCPKG_VERSION: 2022.03.10
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Setup xmake
+ uses: xmake-io/github-action-setup-xmake@v1
+ with:
+ xmake-version: 2.6.4
+
+ - name: Installing vcpkg
+ run: |
+ git clone -b ${{env.VCPKG_VERSION}} --single-branch https://github.com/Microsoft/vcpkg.git .vcpkg
+ cd .vcpkg
+ .\bootstrap-vcpkg.bat
+ .\vcpkg.exe integrate install
+ cd ..
+
+ - name: Cache vcpkg
+ uses: actions/cache@v2
+ with:
+ path: |
+ ${{ github.workspace }}\.vcpkg\installed
+ key: ${{ runner.os }}-${{ matrix.config }}-${{env.VCPKG_VERSION}}-${{ hashFiles('xmake.lua') }}-${{ matrix.arch }}-v5
+
+ - name: Config
+ run: |
+ xmake config -v -y -m ${{ matrix.config }} --arch=${{ matrix.arch }}
+ env:
+ VCPKG_ROOT: ${{ github.workspace }}/.vcpkg
+
+ - name: Build
+ run: |
+ xmake build -v -y
+ env:
+ VCPKG_ROOT: ${{ github.workspace }}/.vcpkg
+
+ # - name: Create Archive
+ # run: |
+ # cd .\build\windows\${{ matrix.arch }}\${{ matrix.config }}
+ # C:\'Program Files'\7-Zip\7z.exe a -r ..\..\..\..\windows-${{ matrix.arch }}-${{ matrix.config }}.zip *
+ # cd ..\..\..\..
+
+ - name: Create Archive
+ run: |
+ cd .\build\windows\${{ matrix.arch }}\${{ matrix.config }}
+ C:\'Program Files'\7-Zip\7z.exe a -r ..\..\..\..\zenserver-win64.zip zenserver.exe
+ cd ..\..\..\..
+
+ - name: Get current release version info
+ run: |
+ $repo = "EpicGames/zen"
+ $releases = "https://api.github.com/repos/$repo/releases/latest"
+ Write-Host Determining latest release
+ $latest = (Invoke-WebRequest -Headers @{"Accept"="application/vnd.github.v3+json";"Authorization"="token ${{ secrets.GITHUB_TOKEN }}"} $releases | ConvertFrom-Json)[0]
+ $current_version_tag = [version]$latest.tag_name.replace('v','')
+ echo "Current version" $current_version_tag
+ $new_version_tag = [version]::New($current_version_tag.Major,$current_version_tag.Minor,$current_version_tag.Build,$current_version_tag.Revision+1).toString()
+ echo $new_version_tag
+ echo "new_version_tag=$new_version_tag" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
+
+ - name: Create Release
+ id: create_release
+ uses: actions/create-release@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ tag_name: v${{ env.new_version_tag }}
+ release_name: Release
+ draft: false
+ prerelease: false
+
+ # - name: Create Release
+ # id: create_release
+ # uses: actions/create-release@v1
+ # env:
+ # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ # with:
+ # tag_name: ${{ github.ref_name }}
+ # release_name: Release ${{ github.head_ref }}
+ # draft: false
+ # prerelease: false
+
+ # - name: Upload Release Asset
+ # id: upload-release-asset
+ # uses: actions/upload-release-asset@v1
+ # env:
+ # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ # with:
+ # upload_url: ${{ steps.create_release.outputs.upload_url }} # This pulls from the CREATE RELEASE step above, referencing it's ID to get its outputs object, which include a `upload_url`. See this blog post for more info: https://jasonet.co/posts/new-features-of-github-actions/#passing-data-to-future-steps
+ # asset_path: .\windows-${{ matrix.arch }}-${{ matrix.config }}.zip
+ # asset_name: windows-${{ matrix.arch }}-${{ matrix.config }}
+ # asset_content_type: application/zip
+ - name: Upload Release Asset
+ id: upload-release-asset
+ uses: actions/upload-release-asset@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ with:
+ upload_url: ${{ steps.create_release.outputs.upload_url }}
+ asset_path: .\zenserver-win64.zip
+ asset_name: zenserver-win64.zip
+ asset_content_type: application/zip
+
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..98535dd94
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,28 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v3.2.0
+ hooks:
+ #- id: end-of-file-fixer
+ #- id: trailing-whitespace
+ #- id: check-added-large-files
+ - id: mixed-line-ending
+ - id: check-yaml
+ - id: check-case-conflict
+- repo: https://github.com/Lucas-C/pre-commit-hooks
+ rev: v1.1.13
+ hooks:
+ - id: insert-license
+ exclude: sha1\.(cpp|h)$
+ files: \.(cpp|h)$
+ args:
+ - --license-filepath
+ - .unreal-header # defaults to: LICENSE.txt
+ - --comment-style
+ - // # defaults to: #
+- repo: https://github.com/pre-commit/mirrors-clang-format
+ rev: v13.0.1
+ hooks:
+ - id: clang-format
+exclude: ^thirdparty/
diff --git a/.unreal-header b/.unreal-header
new file mode 100644
index 000000000..473471916
--- /dev/null
+++ b/.unreal-header
@@ -0,0 +1 @@
+Copyright Epic Games, Inc. All Rights Reserved. \ No newline at end of file
diff --git a/CODING.md b/CODING.md
index d94d9d665..54774590f 100644
--- a/CODING.md
+++ b/CODING.md
@@ -12,4 +12,8 @@ Those who are familiar with the UE coding standards will note that we do not req
To ensure consistent formatting we rely on `clang-format` to automatically format source code. This leads to consistent formatting which should lead to less surprises and more straightforward merging.
-Formatting is triggered via `prepare_commit` which should be used ahead of commit. We do not currently reject commits which have not been formatted, but we probably should at some point in the future.
+Formatting is ensured by using [pre-commit](https://pre-commit.com/)
+- [Install pre-commit](https://pre-commit.com/#installation) so it is available in PATH
+- Run pre-commit manually on staged files `pre-commit run`
+- Run pre-commit manually on all files `pre-commit run --all-files`
+- Install git commit hooks `pre-commit install`, which will automatically run before every commit.
diff --git a/README.md b/README.md
index 5b85d86f9..f8cc6779a 100644
--- a/README.md
+++ b/README.md
@@ -3,12 +3,13 @@
This is the implementation of the local storage service for UE5. It is intended to be deployed on
user machines either as a daemon or launched ad hoc as required during of editor/cooker/game startup
+Zen is currently EXPERIMENTAL and not intended to be used in production. We will make breaking changes to storage and application interfaces as well as significant performance improvements in the upcoming months to make it ready for general adoption.
+
Zen can also be deployed as a shared instance for use as a shared cache. It also supports upstream
connectivity to cloud storage services as well as other Zen server instances.
-We currently only support building and running the server on Windows. Linux and Mac support is in progress
-
All platforms require [xmake](xmake.io)
+
Download the latest release [here](https://github.com/xmake-io/xmake/releases)
## Building on Windows
@@ -213,7 +214,7 @@ is incredibly handy. When that is installed you may enable auto-attach to child
The tests are implemented using [doctest](https://github.com/onqtam/doctest), which is similar to Catch in usage.
-# Adding a http.sys URL reservation
+# Adding a http.sys URL reservation (Windows only)
Registering a handler for an HTTP endpoint requires either process elevation (i.e running Zen as admin) or a one-time URL reservation. An URL reservation can be added by issuing a command like
diff --git a/docs/cpp-coding/00-Table_of_Contents.md b/docs/cpp-coding/00-Table_of_Contents.md
deleted file mode 100644
index 67008eba9..000000000
--- a/docs/cpp-coding/00-Table_of_Contents.md
+++ /dev/null
@@ -1,15 +0,0 @@
-
- 1. [Preface](01-Preface.md)
- 2. [Use the Tools Available](02-Use_the_Tools_Available.md)
- 3. [Style](03-Style.md)
- 4. [Considering Safety](04-Considering_Safety.md)
- 5. [Considering Maintainability](05-Considering_Maintainability.md)
- 6. [Considering Portability](06-Considering_Portability.md)
- 7. [Considering Threadability](07-Considering_Threadability.md)
- 8. [Considering Performance](08-Considering_Performance.md)
- 9. [Considering Correctness](09-Considering_Correctness.md)
- 10. [Enable Scripting](10-Enable_Scripting.md)
- 11. [Further Reading](11-Further_Reading.md)
- 12. [Final Thoughts](12-Final_Thoughts.md)
-
-
diff --git a/docs/cpp-coding/01-Preface.md b/docs/cpp-coding/01-Preface.md
deleted file mode 100644
index fac2b8109..000000000
--- a/docs/cpp-coding/01-Preface.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# Preface
-
-C++ Best Practices: A Forkable Coding Standards Document
-
-This document is meant to be a collaborative discussion of the best practices in C++. It complements books such as *Effective C++* (Meyers) and *C++ Coding Standards* (Alexandrescu, Sutter). We fill in some of the lower level details that they don't discuss and provide specific stylistic recommendations while also discussing how to ensure overall code quality.
-
-In all cases brevity and succinctness is preferred. Examples are preferred for making the case for why one option is preferred over another. If necessary, words will be used.
-
-
-<a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc/4.0/88x31.png" /></a><br /><span xmlns:dct="http://purl.org/dc/terms/" href="http://purl.org/dc/dcmitype/Text" property="dct:title" rel="dct:type">C++ Best Practices</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="http://cppbestpractices.com" property="cc:attributionName" rel="cc:attributionURL">Jason Turner</a> is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/">Creative Commons Attribution-NonCommercial 4.0 International License</a>.
-
-*Disclaimer*
-
-This document is based on my personal experiences. You are not supposed to agree with it 100%. It exists as a book on [GitHub](https://github.com/lefticus/cppbestpractices) so that you can fork it for your own uses or submit back proposed changes for everyone to share.
-
-This book has inspired an O'Reilly video: [Learning C++ Best Practices](http://shop.oreilly.com/product/0636920049814.do)
diff --git a/docs/cpp-coding/02-Use_the_Tools_Available.md b/docs/cpp-coding/02-Use_the_Tools_Available.md
deleted file mode 100644
index 15c4df76d..000000000
--- a/docs/cpp-coding/02-Use_the_Tools_Available.md
+++ /dev/null
@@ -1,415 +0,0 @@
-# Use The Tools Available
-
-An automated framework for executing these tools should be established very early in the development process. It should not take more than 2-3 commands to checkout the source code, build, and execute the tests. Once the tests are done executing, you should have an almost complete picture of the state and quality of the code.
-
-## Source Control
-
-Source control is an absolute necessity for any software development project. If you are not using one yet, start using one.
-
- * [GitHub](https://github.com/) - allows for unlimited public repositories, and unlimited private repositories with up to 3 collaborators.
- * [Bitbucket](https://bitbucket.org/) - allows for unlimited private repositories with up to 5 collaborators, for free.
- * [SourceForge](http://sourceforge.net/) - open source hosting only.
- * [GitLab](https://gitlab.com/) - allows for unlimited public and private repositories, unlimited CI Runners included, for free.
- * [Visual Studio Online](https://visualstudio.com) (http://www.visualstudio.com/what-is-visual-studio-online-vs) - allows for unlimited public repositories, must pay for private repository. Repositories can be git or TFVC. Additionally: Issue tracking, project planning (multiple Agile templates, such as SCRUM), integrated hosted builds, integration of all this into Microsoft Visual Studio. Windows only.
-
-## Build Tool
-
-Use an industry standard widely accepted build tool. This prevents you from reinventing the wheel whenever you discover / link to a new library / package your product / etc. Examples include:
-
- * [Autotools](https://autotools.io) - The traditional GNU build system.
- * [CMake](http://www.cmake.org/)
- * Consider: https://github.com/sakra/cotire/ for build performance
- * Consider: https://github.com/toeb/cmakepp for enhanced usability
- * Utilize: https://cmake.org/cmake/help/v3.6/command/target_compile_features.html for C++ standard flags
- * Consider: https://github.com/cheshirekow/cmake_format for automatic formatting of your CMakeLists.txt
- * See the [Further Reading](11-Further_Reading.md) section for CMake specific best practices
- * `cmake --build` provides a common interface for compiling your project regardless of platform
- * [Waf](https://waf.io/)
- * [FASTBuild](http://www.fastbuild.org/)
- * [Ninja](https://ninja-build.org/) - Can greatly improve the incremental build time of your larger projects. Can be used as a target for CMake.
- * [Bazel](http://bazel.io/) - Fast incremental builds using network artefact caching and remote execution.
- * [Buck](http://buckbuild.com/) - Similar to Bazel, with very good support for iOS and Andoid.
- * [gyp](https://chromium.googlesource.com/external/gyp/) - Google's build tool for chromium.
- * [maiken](https://github.com/Dekken/maiken) - Crossplatform build tool with Maven-esque configuration style.
- * [Qt Build Suite](http://doc.qt.io/qbs/) - Crossplatform build tool From Qt.
- * [meson](http://mesonbuild.com/index.html) - Open source build system meant to be both extremely fast, and, even more importantly, as user friendly as possible.
- * [premake](https://premake.github.io/)
- * [xmake](https://xmake.io) - A cross-platform build utility based on Lua. Modern C/C++ build tools, Support multi-language hybrid compilation
-
-Remember, it's not just a build tool, it's also a programming language. Try to maintain good clean build scripts and follow the recommended practices for the tool you are using.
-
-## Package Manager
-
-Package management is an important topic in C++, with currently no clear winner. Consider using a package manager to help you keep track of the dependencies for your project and make it easier for new people to get started with the project.
-
- * [Conan](https://www.conan.io/) - a crossplatform dependency manager for C++
- * [hunter](https://github.com/ruslo/hunter) - CMake driven cross-platform package manager for C/C++
- * [C++ Archive Network (CPPAN)](https://cppan.org/) - a crossplatform dependency manager for C++
- * [qpm](https://www.qpm.io/) - Package manager for Qt
- * [build2](https://build2.org/) - cargo-like package management for C++
- * [Buckaroo](https://buckaroo.pm) - Truly decentralized cross-platform dependency manager for C/C++ and more
- * [Vcpkg](https://github.com/microsoft/vcpkg) - Microsoft C++ Library Manager for Windows, Linux, and MacOS - [description](https://docs.microsoft.com/en-us/cpp/build/vcpkg)
-
-## Continuous Integration
-
-Once you have picked your build tool, set up a continuous integration environment.
-
-Continuous Integration (CI) tools automatically build the source code as changes are pushed to the repository. These can be hosted privately or with a CI host.
-
- * [Travis CI](http://travis-ci.org)
- * works well with C++
- * designed for use with GitHub
- * free for public repositories on GitHub
- * [AppVeyor](http://www.appveyor.com/)
- * supports Windows, MSVC and MinGW
- * free for public repositories on GitHub
- * [Hudson CI](http://hudson-ci.org/) / [Jenkins CI](https://jenkins-ci.org/)
- * Java Application Server is required
- * supports Windows, OS X, and Linux
- * extendable with a lot of plugins
- * [TeamCity](https://www.jetbrains.com/teamcity)
- * has a free option for open source projects
- * [Decent CI](https://github.com/lefticus/decent_ci)
- * simple ad-hoc continuous integration that posts results to GitHub
- * supports Windows, OS X, and Linux
- * used by [ChaiScript](http://chaiscript.com/ChaiScript-BuildResults/full_dashboard.html)
- * [Visual Studio Online](https://visualstudio.com) (http://www.visualstudio.com/what-is-visual-studio-online-vs)
- * Tightly integrated with the source repositories from Visual Studio Online
- * Uses MSBuild (Visual Studio's build engine), which is available on Windows, OS X and Linux
- * Provides hosted build agents and also allows for user-provided build agents
- * Can be controlled and monitored from within Microsoft Visual Studio
- * On-Premise installation via Microsoft Team Foundation Server
- * [GitLab](https://gitlab.com)
- * use custom Docker images, so can be used for C++
- * has free shared runners
- * has trivial processing of result of coverage analyze
-
-If you have an open source, publicly-hosted project on GitHub:
-
- * go enable Travis Ci and AppVeyor integration right now. We'll wait for you to come back. For a simple example of how to enable it for your C++ CMake-based application, see here: https://github.com/ChaiScript/ChaiScript/blob/master/.travis.yml
- * enable one of the coverage tools listed below (Codecov or Coveralls)
- * enable [Coverity Scan](https://scan.coverity.com)
-
-These tools are all free and relatively easy to set up. Once they are set up you are getting continuous building, testing, analysis and reporting of your project. For free.
-
-
-## Compilers
-
-Use every available and reasonable set of warning options. Some warning options only work with optimizations enabled, or work better the higher the chosen level of optimization is, for example [`-Wnull-dereference`](https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html#index-Wnull-dereference-367) with GCC.
-
-You should use as many compilers as you can for your platform(s). Each compiler implements the standard slightly differently and supporting multiple will help ensure the most portable, most reliable code.
-
-### GCC / Clang
-
-`-Wall -Wextra -Wshadow -Wnon-virtual-dtor -pedantic`
-
- * `-Wall -Wextra` reasonable and standard
- * `-Wshadow` warn the user if a variable declaration shadows one from a parent context
- * `-Wnon-virtual-dtor` warn the user if a class with virtual functions has a non-virtual destructor. This helps catch hard to track down memory errors
- * `-Wold-style-cast` warn for c-style casts
- * `-Wcast-align` warn for potential performance problem casts
- * `-Wunused` warn on anything being unused
- * `-Woverloaded-virtual` warn if you overload (not override) a virtual function
- * `-Wpedantic` (all versions of GCC, Clang >= 3.2) warn if non-standard C++ is used
- * `-Wconversion` warn on type conversions that may lose data
- * `-Wsign-conversion` (Clang all versions, GCC >= 4.3) warn on sign conversions
- * `-Wmisleading-indentation` (only in GCC >= 6.0) warn if indentation implies blocks where blocks do not exist
- * `-Wduplicated-cond` (only in GCC >= 6.0) warn if `if` / `else` chain has duplicated conditions
- * `-Wduplicated-branches` (only in GCC >= 7.0) warn if `if` / `else` branches have duplicated code
- * `-Wlogical-op` (only in GCC) warn about logical operations being used where bitwise were probably wanted
- * `-Wnull-dereference` (only in GCC >= 6.0) warn if a null dereference is detected
- * `-Wuseless-cast` (only in GCC >= 4.8) warn if you perform a cast to the same type
- * `-Wdouble-promotion` (GCC >= 4.6, Clang >= 3.8) warn if `float` is implicit promoted to `double`
- * `-Wformat=2` warn on security issues around functions that format output (ie `printf`)
- * `-Wlifetime` (only special branch of Clang currently) shows object lifetime issues
-
-Consider using `-Weverything` and disabling the few warnings you need to on Clang
-
-
-`-Weffc++` warning mode can be too noisy, but if it works for your project, use it also.
-
-### MSVC
-
-`/permissive-` - [Enforces standards conformance](https://docs.microsoft.com/en-us/cpp/build/reference/permissive-standards-conformance).
-
-`/W4 /w14640` - use these and consider the following (see descriptions below)
-
- * `/W4` All reasonable warnings
- * `/w14242` 'identfier': conversion from 'type1' to 'type1', possible loss of data
- * `/w14254` 'operator': conversion from 'type1:field_bits' to 'type2:field_bits', possible loss of data
- * `/w14263` 'function': member function does not override any base class virtual member function
- * `/w14265` 'classname': class has virtual functions, but destructor is not virtual instances of this class may not be destructed correctly
- * `/w14287` 'operator': unsigned/negative constant mismatch
- * `/we4289` nonstandard extension used: 'variable': loop control variable declared in the for-loop is used outside the for-loop scope
- * `/w14296` 'operator': expression is always 'boolean_value'
- * `/w14311` 'variable': pointer truncation from 'type1' to 'type2'
- * `/w14545` expression before comma evaluates to a function which is missing an argument list
- * `/w14546` function call before comma missing argument list
- * `/w14547` 'operator': operator before comma has no effect; expected operator with side-effect
- * `/w14549` 'operator': operator before comma has no effect; did you intend 'operator'?
- * `/w14555` expression has no effect; expected expression with side-effect
- * `/w14619` pragma warning: there is no warning number 'number'
- * `/w14640` Enable warning on thread un-safe static member initialization
- * `/w14826` Conversion from 'type1' to 'type_2' is sign-extended. This may cause unexpected runtime behavior.
- * `/w14905` wide string literal cast to 'LPSTR'
- * `/w14906` string literal cast to 'LPWSTR'
- * `/w14928` illegal copy-initialization; more than one user-defined conversion has been implicitly applied
-
-Not recommended
-
- * `/Wall` - Also warns on files included from the standard library, so it's not very useful and creates too many extra warnings.
-
-
-
-### General
-
-Start with very strict warning settings from the beginning. Trying to raise the warning level after the project is underway can be painful.
-
-Consider using the *treat warnings as errors* setting. `/WX` with MSVC, `-Werror` with GCC / Clang
-
-## LLVM-based tools
-
-LLVM based tools work best with a build system (such as cmake) that can output a compile command database, for example:
-
-```
-$ cmake -DCMAKE_EXPORT_COMPILE_COMMANDS=ON .
-```
-
-If you are not using a build system like that, you can consider [Build EAR](https://github.com/rizsotto/Bear) which will hook into your build system and generate a compile command database for you.
-
-CMake now also comes with built-in support for calling `clang-tidy` during [normal compilation](https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html).
-
- * [include-what-you-use](https://github.com/include-what-you-use), [example results](https://github.com/ChaiScript/ChaiScript/commit/c0bf6ee99dac14a19530179874f6c95255fde173)
- * [clang-modernize](http://clang.llvm.org/extra/clang-modernize.html), [example results](https://github.com/ChaiScript/ChaiScript/commit/6eab8ddfe154a4ebbe956a5165b390ee700fae1b)
- * [clang-check](http://clang.llvm.org/docs/ClangCheck.html)
- * [clang-tidy](http://clang.llvm.org/extra/clang-tidy.html)
-
-## Static Analyzers
-
-The best bet is the static analyzer that you can run as part of your automated build system. Cppcheck and clang meet that requirement for free options.
-
-### Coverity Scan
-
-[Coverity](https://scan.coverity.com/) has a free (for open source) static analysis toolkit that can work on every commit in integration with [Travis CI](http://travis-ci.org) and [AppVeyor](http://www.appveyor.com/).
-
-### PVS-Studio
-
-[PVS-Studio](http://www.viva64.com/en/pvs-studio/) is a tool for bug detection in the source code of programs, written in C, C++ and C#. It is free for personal academic projects, open source non-commercial projects and independent projects of individual developers. It works in Windows and Linux environment.
-
-### Cppcheck
-[Cppcheck](http://cppcheck.sourceforge.net/) is free and open source. It strives for 0 false positives and does a good job at it. Therefore all warnings should be enabled: `--enable=all`
-
-Notes:
-
- * For correct work it requires well formed path for headers, so before usage don't forget to pass: `--check-config`.
- * Finding unused headers does not work with `-j` more than 1.
- * Remember to add `--force` for code with a lot number of `#ifdef` if you need check all of them.
-
-### cppclean
-
-[cppclean](https://github.com/myint/cppclean) - Open source static analyzer focused on finding problems in C++ source that slow development of large code bases.
-
-
-### CppDepend
-
-[CppDepend](https://www.cppdepend.com/) Simplifies managing a complex C/C++ code base by analyzing and visualizing code dependencies, by defining design rules, by doing impact analysis, and comparing different versions of the code. It's free for OSS contributors.
-
-### Clang's Static Analyzer
-
-Clang's analyzer's default options are good for the respective platform. It can be used directly [from CMake](http://garykramlich.blogspot.com/2011/10/using-scan-build-from-clang-with-cmake.html). They can also be called via clang-check and clang-tidy from the [LLVM-based Tools](#llvm-based-tools).
-
-Also, [CodeChecker](https://github.com/Ericsson/CodeChecker) is available as a front-end to clang's static analysis.
-
-`clang-tidy` can be easily used with Visual Studio via the [Clang Power Tools](https://clangpowertools.com) extension.
-
-### MSVC's Static Analyzer
-
-Can be enabled with the `/analyze` [command line option](http://msdn.microsoft.com/en-us/library/ms173498.aspx). For now we will stick with the default options.
-
-### Flint / Flint++
-
-[Flint](https://github.com/facebook/flint) and [Flint++](https://github.com/L2Program/FlintPlusPlus) are linters that analyze C++ code against Facebook's coding standards.
-
-### OCLint
-
-[OCLint](http://oclint.org/) is a free, libre and open source static code analysis tool for improving quality of C++ code in many different ways.
-
-### ReSharper C++ / CLion
-
-Both of these tools from [JetBrains](https://www.jetbrains.com/cpp/) offer some level of static analysis and automated fixes for common things that can be done better. They have options available for free licenses for open source project leaders.
-
-### Cevelop
-
-The Eclipse based [Cevelop](https://www.cevelop.com/) IDE has various static analysis and refactoring / code fix tools available. For example, you can replace macros with C++ `constexprs`, refactor namespaces (extract/inline `using`, qualify name), and refactor your code to C++11's uniform initialization syntax. Cevelop is free to use.
-
-### Qt Creator
-
-Qt Creator can plug into the clang static analyzer.
-
-### clazy
-
-[clazy](https://github.com/KDE/clazy) is a clang based tool for analyzing Qt usage.
-
-### IKOS
-
-[IKOS](https://ti.arc.nasa.gov/opensource/ikos/) is an open source static analyzer, developed by NASA. It is based on the Abstract Interpretation. It is written in C++ and provides an analyzer for C and C++, using LLVM.
-The source code is [available on Github](https://github.com/NASA-SW-VnV/ikos).
-
-## Runtime Checkers
-
-### Code Coverage Analysis
-
-A coverage analysis tool shall be run when tests are executed to make sure the entire application is being tested. Unfortunately, coverage analysis requires that compiler optimizations be disabled. This can result in significantly longer test execution times.
-
- * [Codecov](https://codecov.io/)
- * integrates with Travis CI and AppVeyor
- * free for open source projects
- * [Coveralls](https://coveralls.io/)
- * integrates with Travis CI and AppVeyor
- * free for open source projects
- * [LCOV](http://ltp.sourceforge.net/coverage/lcov.php)
- * very configurable
- * [Gcovr](http://gcovr.com/)
- * [kcov](http://simonkagstrom.github.io/kcov/index.html)
- * integrates with codecov and coveralls
- * performs code coverage reporting without needing special compiler flags, just by instrumenting debug symbols.
- * [OpenCppCoverage](https://github.com/OpenCppCoverage/OpenCppCoverage) - open source coverage reporting tool for Windows.
-
-
-### Heap profiling
-
- * [Valgrind](http://www.valgrind.org/)
- * Valgrind is a runtime code analyzer that can detect memory leaks, race conditions, and other associated problems. It is supported on various Unix platforms.
- * [Heaptrack](https://github.com/KDE/heaptrack)
- * A profiler created by a Valgrind's Massif developper. Quite similar to Massif with pros and cons over it, way more intuitive though.
- * [Dr Memory](http://www.drmemory.org)
- * [Memoro](https://epfl-vlsc.github.io/memoro/) - A detailed heap profiler.
-
-### CPU profiling
-
- * [Hotspot](https://github.com/KDAB/hotspot) - An intuitive front-end to visualize datas produced by the [perf](https://perf.wiki.kernel.org) CPU profiler.
- * [uftrace](https://github.com/namhyung/uftrace) - Can be used to generating function call graphs of a program execution.
-
-### Reverse engineering tools
-
- * [Cutter](https://cutter.re/) - A front-end for [Radare2](https://www.radare.org/n/radare2.html). It provides tools such as decompiler, disassembly, graph visualizer, hex editor.
-
-### GCC / Clang Sanitizers
-
-These tools provide many of the same features as Valgrind, but built into the compiler. They are easy to use and provide a report of what went wrong.
-
- * AddressSanitizer
- * MemorySanitizer
- * ThreadSanitizer
- * UndefinedBehaviorSanitizer
-
-Be aware of the sanitizer options available, including runtime options. https://kristerw.blogspot.com/2018/06/useful-gcc-address-sanitizer-checks-not.html
-
-### Fuzzy Analyzers
-
-If your project accepts user defined input, considering running a fuzzy input tester.
-
-Both of these tools use coverage reporting to find new code execution paths and try to breed novel inputs for your code. They can find crashes, hangs, and inputs you didn't know were considered valid.
-
- * [american fuzzy lop](http://lcamtuf.coredump.cx/afl/)
- * [LibFuzzer](http://llvm.org/docs/LibFuzzer.html)
- * [KLEE](http://klee.github.io/) - Can be used to fuzz individual functions
-
-#### Continuous Fuzzing
-
-Continuous fuzzing tools exist to run fuzz tests for you with each commit.
-
- * [Fuzzit](https://fuzzit.dev/)
-
-### Mutation Testers
-
-These tools take code executed during unit test runs and mutate the executed code. If the test continues to pass with a mutation in place, then there is likely a flawed test in your suite.
-
- * [Dextool Mutate](https://github.com/joakim-brannstrom/dextool/tree/master/plugin/mutate)
- * [MuCPP](https://neptuno.uca.es/redmine/projects/mucpp-mutation-tool/wiki)
- * [mull](https://github.com/mull-project/mull)
- * [CCMutator](https://github.com/markus-kusano/CCMutator)
-
-### Control Flow Guard
-
-MSVC's [Control Flow Guard](https://msdn.microsoft.com/en-us/library/windows/desktop/mt637065%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396) adds high performance runtime security checks.
-
-### Checked STL Implementations
-
- * `_GLIBCXX_DEBUG` with GCC's implementation libstdc++ implementation. See [Krister's blog article](https://kristerw.blogspot.se/2018/03/detecting-incorrect-c-stl-usage.html).
-
-### Heap Profiling
-
- * [Memoro](https://epfl-vlsc.github.io/memoro/) - A detailed heap profiler
-
-## Ignoring Warnings
-
-If it is determined by team consensus that the compiler or analyzer is warning on something that is either incorrect or unavoidable, the team will disable the specific error to as localized part of the code as possible.
-
-Be sure to reenable the warning after disabling it for a section of code. You do not want your disabled warnings to [leak into other code](http://www.forwardscattering.org/post/48).
-
-## Testing
-
-CMake, mentioned above, has a built in framework for executing tests. Make sure whatever build system you use has a way to execute tests built in.
-
-To further aid in executing tests, consider a library such as [Google Test](https://github.com/google/googletest), [Catch](https://github.com/philsquared/Catch), [CppUTest](https://github.com/cpputest/cpputest) or [Boost.Test](http://www.boost.org/doc/libs/release/libs/test/) to help you organize the tests.
-
-### Unit Tests
-
-Unit tests are for small chunks of code, individual functions which can be tested standalone.
-
-### Integration Tests
-
-There should be a test enabled for every feature or bug fix that is committed. See also [Code Coverage Analysis](#code-coverage-analysis). These are tests that are higher level than unit tests. They should still be limited in scope to individual features.
-
-### Negative Testing
-
-Don't forget to make sure that your error handling is being tested and works properly as well. This will become obvious if you aim for 100% code coverage.
-
-## Debugging
-
-### GDB
-
-[GDB](https://www.gnu.org/software/gdb/) - The GNU debugger, powerful and widely used. Most IDEs implement an interface to use it.
-
-### rr
-
-[rr](http://rr-project.org/) is a free (open source) reverse debugger that supports C++.
-
-## Other Tools
-
-### Lizard
-
-[Lizard](http://www.lizard.ws/) provides a very simple interface for running complexity analysis against a C++ codebase.
-
-### Metrix++
-
-[Metrix++](http://metrixplusplus.sourceforge.net/) can identify and report on the most complex sections of your code. Reducing complex code helps you and the compiler understand it better and optimize it better.
-
-### ABI Compliance Checker
-
-[ABI Compliance Checker](http://ispras.linuxbase.org/index.php/ABI_compliance_checker) (ACC) can analyze two library versions and generates a detailed compatibility report regarding API and C++ ABI changes. This can help a library developer spot unintentional breaking changes to ensure backward compatibility.
-
-### CNCC
-
-[Customizable Naming Convention Checker](https://github.com/mapbox/cncc) can report on identifiers in your code that do not follow certain naming conventions.
-
-### ClangFormat
-
-[ClangFormat](http://clang.llvm.org/docs/ClangFormat.html) can check and correct code formatting to match organizational conventions automatically. [Multipart series](https://engineering.mongodb.com/post/succeeding-with-clangformat-part-1-pitfalls-and-planning/) on utilizing clang-format.
-
-### SourceMeter
-
-[SourceMeter](https://www.sourcemeter.com/) offers a free version which provides many different metrics for your code and can also call into cppcheck.
-
-### Bloaty McBloatface
-
-[Bloaty McBloatface](https://github.com/google/bloaty) is a binary size analyzer/profiler for unix-like platforms
-
-### pahole
-
-[pahole](https://linux.die.net/man/1/pahole) generates data on holes in the packing of data structures and classes in compiled code. It can also the size of structures and how they fit within the system's cache lines.
-
-### BinSkim
-
-[BinSkim](https://github.com/Microsoft/binskim) is a binary static analysis tool that provides security and correctness results for Windows Portable Executable and *nix ELF binary formats
diff --git a/docs/cpp-coding/03-Style.md b/docs/cpp-coding/03-Style.md
deleted file mode 100644
index 241f5c733..000000000
--- a/docs/cpp-coding/03-Style.md
+++ /dev/null
@@ -1,457 +0,0 @@
-# Style
-
-Consistency is the most important aspect of style. The second most important aspect is following a style that the average C++ programmer is used to reading.
-
-C++ allows for arbitrary-length identifier names, so there's no reason to be terse when naming things. Use descriptive names, and be consistent in the style.
-
- * `CamelCase`
- * `snake_case`
-
-are common examples. *snake_case* has the advantage that it can also work with spell checkers, if desired.
-
-## Establishing A Style Guideline
-
-Whatever style guidelines you establish, be sure to implement a `.clang-format` file that specifies the style you expect. While this cannot help with naming, it is particularly important for an open source project to maintain a consistent style.
-
-Every IDE and many editors have support for clang-format built in or easily installable with an add-in.
-
- * VSCode: [Microsoft C/C++ extension for VS Code](https://github.com/Microsoft/vscode-cpptools)
- * CLion: https://www.jetbrains.com/help/clion/clangformat-as-alternative-formatter.html
- * VisualStudio https://marketplace.visualstudio.com/items?itemName=LLVMExtensions.ClangFormat#review-details
- * Resharper++: https://www.jetbrains.com/help/resharper/2017.2/Using_Clang_Format.html
- * Vim
- * https://github.com/rhysd/vim-clang-format
- * https://github.com/chiel92/vim-autoformat
- * XCode: https://github.com/travisjeffery/ClangFormat-Xcode
-
-
-
-## Common C++ Naming Conventions
-
- * Types start with upper case: `MyClass`.
- * Functions and variables start with lower case: `myMethod`.
- * Constants are all upper case: `const double PI=3.14159265358979323;`.
-
-C++ Standard Library (and other well-known C++ libraries like [Boost](http://www.boost.org/)) use these guidelines:
-
- * Macro names use upper case with underscores: `INT_MAX`.
- * Template parameter names use camel case: `InputIterator`.
- * All other names use snake case: `unordered_map`.
-
-## Distinguish Private Object Data
-
-Name private data with a `m_` prefix to distinguish it from public data. `m_` stands for "member" data.
-
-## Distinguish Function Parameters
-
-The most important thing is consistency within your codebase; this is one possibility to help with consistency.
-
-Name function parameters with an `t_` prefix. `t_` can be thought of as "the", but the meaning is arbitrary. The point is to distinguish function parameters from other variables in scope while giving us a consistent naming strategy.
-
-Any prefix or postfix can be chosen for your organization. This is just one example. *This suggestion is controversial, for a discussion about it see issue [#11](https://github.com/lefticus/cppbestpractices/issues/11).*
-
-```cpp
-struct Size
-{
- int width;
- int height;
-
- Size(int t_width, int t_height) : width(t_width), height(t_height) {}
-};
-
-// This version might make sense for thread safety or something,
-// but more to the point, sometimes we need to hide data, sometimes we don't.
-class PrivateSize
-{
- public:
- int width() const { return m_width; }
- int height() const { return m_height; }
- PrivateSize(int t_width, int t_height) : m_width(t_width), m_height(t_height) {}
-
- private:
- int m_width;
- int m_height;
-};
-```
-
-
-
-
-## Don't Name Anything Starting With `_`
-
-If you do, you risk colliding with names reserved for compiler and standard library implementation use:
-
-http://stackoverflow.com/questions/228783/what-are-the-rules-about-using-an-underscore-in-a-c-identifier
-
-
-## Well-Formed Example
-
-```cpp
-class MyClass
-{
-public:
- MyClass(int t_data)
- : m_data(t_data)
- {
- }
-
- int getData() const
- {
- return m_data;
- }
-
-private:
- int m_data;
-};
-```
-
-
-
-## Enable Out-of-Source-Directory Builds
-
-Make sure generated files go into an output folder that is separate from the source folder.
-
-
-## Use `nullptr`
-
-C++11 introduces `nullptr` which is a special value denoting a null pointer. This should be used instead of `0` or `NULL` to indicate a null pointer.
-
-## Comments
-
-Comment blocks should use `//`, not `/* */`. Using `//` makes it much easier to comment out a block of code while debugging.
-
-```cpp
-// this function does something
-int myFunc()
-{
-}
-```
-
-To comment out this function block during debugging we might do:
-
-```cpp
-/*
-// this function does something
-int myFunc()
-{
-}
-*/
-```
-
-which would be impossible if the function comment header used `/* */`.
-
-## Never Use `using namespace` in a Header File
-
-This causes the namespace you are `using` to be pulled into the namespace of all files that include the header file.
-It pollutes the namespace and it may lead to name collisions in the future.
-Writing `using namespace` in an implementation file is fine though.
-
-
-## Include Guards
-
-Header files must contain a distinctly-named include guard to avoid problems with including the same header multiple times and to prevent conflicts with headers from other projects.
-
-```cpp
-#ifndef MYPROJECT_MYCLASS_HPP
-#define MYPROJECT_MYCLASS_HPP
-
-namespace MyProject {
- class MyClass {
- };
-}
-
-#endif
-```
-
-You may also consider using the `#pragma once` directive instead which is quasi-standard across many compilers.
-It's short and makes the intent clear.
-
-
-## {} Are Required for Blocks.
-Leaving them off can lead to semantic errors in the code.
-
-```cpp
-// Bad Idea
-// This compiles and does what you want, but can lead to confusing
-// errors if modification are made in the future and close attention
-// is not paid.
-for (int i = 0; i < 15; ++i)
- std::cout << i << std::endl;
-
-// Bad Idea
-// The cout is not part of the loop in this case even though it appears to be.
-int sum = 0;
-for (int i = 0; i < 15; ++i)
- ++sum;
- std::cout << i << std::endl;
-
-
-// Good Idea
-// It's clear which statements are part of the loop (or if block, or whatever).
-int sum = 0;
-for (int i = 0; i < 15; ++i) {
- ++sum;
- std::cout << i << std::endl;
-}
-```
-
-## Keep Lines a Reasonable Length
-
-```cpp
-// Bad Idea
-// hard to follow
-if (x && y && myFunctionThatReturnsBool() && caseNumber3 && (15 > 12 || 2 < 3)) {
-}
-
-// Good Idea
-// Logical grouping, easier to read
-if (x && y && myFunctionThatReturnsBool()
- && caseNumber3
- && (15 > 12 || 2 < 3)) {
-}
-```
-
-Many projects and coding standards have a soft guideline that one should try to use less than about 80 or 100 characters per line.
-Such code is generally easier to read.
-It also makes it possible to have two separate files next to each other on one screen without having a tiny font.
-
-
-## Use "" for Including Local Files
-... `<>` is [reserved for system includes](http://blog2.emptycrate.com/content/when-use-include-verses-include).
-
-```cpp
-// Bad Idea. Requires extra -I directives to the compiler
-// and goes against standards.
-#include <string>
-#include <includes/MyHeader.hpp>
-
-// Worse Idea
-// Requires potentially even more specific -I directives and
-// makes code more difficult to package and distribute.
-#include <string>
-#include <MyHeader.hpp>
-
-
-// Good Idea
-// Requires no extra params and notifies the user that the file
-// is a local file.
-#include <string>
-#include "MyHeader.hpp"
-```
-
-## Initialize Member Variables
-...with the member initializer list.
-
-For POD types, the performance of an initializer list is the same as manual initialization, but for other types there is a clear performance gain, see below.
-
-```cpp
-// Bad Idea
-class MyClass
-{
-public:
- MyClass(int t_value)
- {
- m_value = t_value;
- }
-
-private:
- int m_value;
-};
-
-// Bad Idea
-// This leads to an additional constructor call for m_myOtherClass
-// before the assignment.
-class MyClass
-{
-public:
- MyClass(MyOtherClass t_myOtherClass)
- {
- m_myOtherClass = t_myOtherClass;
- }
-
-private:
- MyOtherClass m_myOtherClass;
-};
-
-// Good Idea
-// There is no performance gain here but the code is cleaner.
-class MyClass
-{
-public:
- MyClass(int t_value)
- : m_value(t_value)
- {
- }
-
-private:
- int m_value;
-};
-
-// Good Idea
-// The default constructor for m_myOtherClass is never called here, so
-// there is a performance gain if MyOtherClass is not is_trivially_default_constructible.
-class MyClass
-{
-public:
- MyClass(MyOtherClass t_myOtherClass)
- : m_myOtherClass(t_myOtherClass)
- {
- }
-
-private:
- MyOtherClass m_myOtherClass;
-};
-```
-
-In C++11 you can assign default values to each member (using `=` or using `{}`).
-
-### Assigning default values with =
-
-```cpp
-// ... //
-private:
- int m_value = 0; // allowed
- unsigned m_value_2 = -1; // narrowing from signed to unsigned allowed
-// ... //
-```
-This ensures that no constructor ever "forgets" to initialize a member object.
-
-### Assigning default values with brace initialization
-
-Using brace initialization does not allow narrowing at compile-time.
-
-```cpp
-// Best Idea
-
-// ... //
-private:
- int m_value{ 0 }; // allowed
- unsigned m_value_2 { -1 }; // narrowing from signed to unsigned not allowed, leads to a compile time error
-// ... //
-```
-
-Prefer `{}` initialization over `=` unless you have a strong reason not to.
-
-Forgetting to initialize a member is a source of undefined behavior bugs which are often extremely hard to find.
-
-If the member variable is not expected to change after the initialization, then mark it `const`.
-
-```cpp
-class MyClass
-{
-public:
- MyClass(int t_value)
- : m_value{t_value}
- {
- }
-
-private:
- const int m_value{0};
-};
-```
-
-Since a const member variable cannot be assigned a new value, such a class may not have a meaningful copy assignment operator.
-
-## Always Use Namespaces
-
-There is almost never a reason to declare an identifier in the global namespace. Instead, functions and classes should exist in an appropriately named namespace or in a class inside of a namespace. Identifiers which are placed in the global namespace risk conflicting with identifiers from other libraries (mostly C, which doesn't have namespaces).
-
-
-## Use the Correct Integer Type for Standard Library Features
-
-The standard library generally uses `std::size_t` for anything related to size. The size of `size_t` is implementation defined.
-
-In general, using `auto` will avoid most of these issues, but not all.
-
-Make sure you stick with the correct integer types and remain consistent with the C++ standard library. It might not warn on the platform you are currently using, but it probably will when you change platforms.
-
-*Note that you can cause integer underflow when performing some operations on unsigned values. For example:*
-
-```cpp
-std::vector<int> v1{2,3,4,5,6,7,8,9};
-std::vector<int> v2{9,8,7,6,5,4,3,2,1};
-const auto s1 = v1.size();
-const auto s2 = v2.size();
-const auto diff = s1 - s2; // diff underflows to a very large number
-```
-
-## Use .hpp and .cpp for Your File Extensions
-
-Ultimately this is a matter of preference, but .hpp and .cpp are widely recognized by various editors and tools. So the choice is pragmatic. Specifically, Visual Studio only automatically recognizes .cpp and .cxx for C++ files, and Vim doesn't necessarily recognize .cc as a C++ file.
-
-One particularly large project ([OpenStudio](https://github.com/NREL/OpenStudio)) uses .hpp and .cpp for user-generated files and .hxx and .cxx for tool-generated files. Both are well recognized and having the distinction is helpful.
-
-## Never Mix Tabs and Spaces
-
-Some editors like to indent with a mixture of tabs and spaces by default. This makes the code unreadable to anyone not using the exact same tab indentation settings. Configure your editor so this does not happen.
-
-## Never Put Code with Side Effects Inside an assert()
-
-```cpp
-assert(registerSomeThing()); // make sure that registerSomeThing() returns true
-```
-
-The above code succeeds when making a debug build, but gets removed by the compiler when making a release build, giving you different behavior between debug and release builds.
-This is because `assert()` is a macro which expands to nothing in release mode.
-
-## Don't Be Afraid of Templates
-
-They can help you stick to [DRY principles](http://en.wikipedia.org/wiki/Don%27t_repeat_yourself).
-They should be preferred to macros, because macros do not honor namespaces, etc.
-
-## Use Operator Overloads Judiciously
-
-Operator overloading was invented to enable expressive syntax. Expressive in the sense that adding two big integers looks like `a + b` and not `a.add(b)`. Another common example is `std::string`, where it is very common to concatenate two strings with `string1 + string2`.
-
-However, you can easily create unreadable expressions using too much or wrong operator overloading. When overloading operators, there are three basic rules to follow as described [on stackoverflow](http://stackoverflow.com/questions/4421706/operator-overloading/4421708#4421708).
-
-Specifically, you should keep these things in mind:
-
-* Overloading `operator=()` when handling resources is a must. See [Consider the Rule of Zero](03-Style.md#consider-the-rule-of-zero) below.
-* For all other operators, only overload them when they are used in a context that is commonly connected to these operators. Typical scenarios are concatenating things with +, negating expressions that can be considered "true" or "false", etc.
-* Always be aware of the [operator precedence](http://en.cppreference.com/w/cpp/language/operator_precedence) and try to circumvent unintuitive constructs.
-* Do not overload exotic operators such as ~ or % unless implementing a numeric type or following a well recognized syntax in specific domain.
-* [Never](http://stackoverflow.com/questions/5602112/when-to-overload-the-comma-operator?answertab=votes#tab-top) overload `operator,()` (the comma operator).
-* Use non-member functions `operator>>()` and `operator<<()` when dealing with streams. For example, you can overload `operator<<(std::ostream &, MyClass const &)` to enable "writing" your class into a stream, such as `std::cout` or an `std::fstream` or `std::stringstream`. The latter is often used to create a string representation of a value.
-* There are more common operators to overload [described here](http://stackoverflow.com/questions/4421706/operator-overloading?answertab=votes#tab-top).
-
-More tips regarding the implementation details of your custom operators can be found [here](http://courses.cms.caltech.edu/cs11/material/cpp/donnie/cpp-ops.html).
-
-## Avoid Implicit Conversions
-
-### Single Parameter Constructors
-
-Single parameter constructors can be applied at compile time to automatically convert between types. This is handy for things like `std::string(const char *)` but should be avoided in general because they can add to accidental runtime overhead.
-
-Instead mark single parameter constructors as `explicit`, which requires them to be explicitly called.
-
-### Conversion Operators
-
-Similarly to single parameter constructors, conversion operators can be called by the compiler and introduce unexpected overhead. They should also be marked as `explicit`.
-
-```cpp
-//bad idea
-struct S {
- operator int() {
- return 2;
- }
-};
-```
-
-```cpp
-//good idea
-struct S {
- explicit operator int() {
- return 2;
- }
-};
-```
-
-## Consider the Rule of Zero
-
-The Rule of Zero states that you do not provide any of the functions that the compiler can provide (copy constructor, copy assignment operator, move constructor, move assignment operator, destructor) unless the class you are constructing does some novel form of ownership.
-
-The goal is to let the compiler provide optimal versions that are automatically maintained when more member variables are added.
-
-[This article](http://www.nirfriedman.com/2015/06/27/cpp-rule-of-zero/) provides a background and explains techniques for implementing nearly 100% of the time.
-
diff --git a/docs/cpp-coding/04-Considering_Safety.md b/docs/cpp-coding/04-Considering_Safety.md
deleted file mode 100644
index 713676743..000000000
--- a/docs/cpp-coding/04-Considering_Safety.md
+++ /dev/null
@@ -1,145 +0,0 @@
-# Considering Safety
-
-
-## Const as Much as Possible
-`const` tells the compiler that a variable or method is immutable. This helps the compiler optimize the code and helps the developer know if a function has a side effect. Also, using `const &` prevents the compiler from copying data unnecessarily. The [comments on `const` from John Carmack](http://kotaku.com/454293019) are also a good read.
-
-```cpp
-// Bad Idea
-class MyClass
-{
-public:
- void do_something(int i);
- void do_something(std::string str);
-};
-
-
-// Good Idea
-class MyClass
-{
-public:
- void do_something(const int i);
- void do_something(const std::string &str);
-};
-
-```
-
-### Carefully Consider Your Return Types
-
- * Getters
- * Returning by `&` or `const &` can have significant performance savings when the normal use of the returned value is for observation
- * Returning by value is better for thread safety and if the normal use of the returned value is to make a copy anyhow, there's no performance lost
- * If your API uses covariant return types, you must return by `&` or `*`
- * Temporaries and local values
- * Always return by value.
-
-
-references: https://github.com/lefticus/cppbestpractices/issues/21 https://twitter.com/lefticus/status/635943577328095232
-
-### Do not pass and return simple types by const ref
-
-```cpp
-// Very Bad Idea
-class MyClass
-{
-public:
- explicit MyClass(const int& t_int_value)
- : m_int_value(t_int_value)
- {
- }
-
- const int& get_int_value() const
- {
- return m_int_value;
- }
-
-private:
- int m_int_value;
-}
-```
-
-Instead, pass and return simple types by value. If you plan not to change passed value, declare them as `const`, but not `const` refs:
-
-```cpp
-// Good Idea
-class MyClass
-{
-public:
- explicit MyClass(const int t_int_value)
- : m_int_value(t_int_value)
- {
- }
-
- int get_int_value() const
- {
- return m_int_value;
- }
-
-private:
- int m_int_value;
-}
-```
-
-Why? Because passing and returning by reference leads to pointer operations instead by much more faster passing values in processor registers.
-
-## Avoid Raw Memory Access
-
-Raw memory access, allocation and deallocation, are difficult to get correct in C++ without [risking memory errors and leaks](http://blog2.emptycrate.com/content/nobody-understands-c-part-6-are-you-still-using-pointers). C++11 provides tools to avoid these problems.
-
-```cpp
-// Bad Idea
-MyClass *myobj = new MyClass;
-
-// ...
-delete myobj;
-
-
-// Good Idea
-auto myobj = std::make_unique<MyClass>(constructor_param1, constructor_param2); // C++14
-auto myobj = std::unique_ptr<MyClass>(new MyClass(constructor_param1, constructor_param2)); // C++11
-auto mybuffer = std::make_unique<char[]>(length); // C++14
-auto mybuffer = std::unique_ptr<char[]>(new char[length]); // C++11
-
-// or for reference counted objects
-auto myobj = std::make_shared<MyClass>();
-
-// ...
-// myobj is automatically freed for you whenever it is no longer used.
-```
-
-## Use `std::array` or `std::vector` Instead of C-style Arrays
-
-Both of these guarantee contiguous memory layout of objects and can (and should) completely replace your usage of C-style arrays for many of the reasons listed for not using bare pointers.
-
-Also, [avoid](http://stackoverflow.com/questions/3266443/can-you-use-a-shared-ptr-for-raii-of-c-style-arrays) using `std::shared_ptr` to hold an array.
-
-## Use Exceptions
-
-Exceptions cannot be ignored. Return values, such as using `boost::optional`, can be ignored and if not checked can cause crashes or memory errors. An exception, on the other hand, can be caught and handled. Potentially all the way up the highest level of the application with a log and automatic restart of the application.
-
-Stroustrup, the original designer of C++, [makes this point](http://www.stroustrup.com/bs_faq2.html#exceptions-why) much better than I ever could.
-
-## Use C++-style cast instead of C-style cast
-Use the C++-style cast (static\_cast<>, dynamic\_cast<> ...) instead of the C-style cast. The C++-style cast allows more compiler checks and is considerably safer.
-
-```cpp
-// Bad Idea
-double x = getX();
-int i = (int) x;
-
-// Not a Bad Idea
-int i = static_cast<int>(x);
-```
-Additionally the C++ cast style is more visible and has the possibility to search for.
-
-But consider refactoring of program logic (for example, additional checking on overflow and underflow) if you need to cast `double` to `int`. Measure three times and cut 0.9999999999981 times.
-
-## Do not define a variadic function
-Variadic functions can accept a variable number of parameters. The probably best known example is printf(). You have the possibility to define this kind of functions by yourself but this is a possible security risk. The usage of variadic functions is not type safe and the wrong input parameters can cause a program termination with an undefined behavior. This undefined behavior can be exploited to a security problem.
-If you have the possibility to use a compiler that supports C++11, you can use variadic templates instead.
-
-[It is technically possible to make typesafe C-style variadic functions with some compilers](https://github.com/lefticus/cppbestpractices/issues/53)
-
-## Additional Resources
-
-[How to Prevent The Next Heartbleed](http://www.dwheeler.com/essays/heartbleed.html) by David Wheeler is a good analysis of the current state of code safety and how to ensure safe code.
diff --git a/docs/cpp-coding/05-Considering_Maintainability.md b/docs/cpp-coding/05-Considering_Maintainability.md
deleted file mode 100644
index 4547559e5..000000000
--- a/docs/cpp-coding/05-Considering_Maintainability.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# Considering Maintainability
-
-
-## Avoid Compiler Macros
-
-Compiler definitions and macros are replaced by the preprocessor before the compiler is ever run. This can make debugging very difficult because the debugger doesn't know where the source came from.
-
-```cpp
-// Bad Idea
-#define PI 3.14159;
-
-// Good Idea
-namespace my_project {
- class Constants {
- public:
- // if the above macro would be expanded, then the following line would be:
- // static const double 3.14159 = 3.14159;
- // which leads to a compile-time error. Sometimes such errors are hard to understand.
- static constexpr double PI = 3.14159;
- };
-}
-```
-
-## Consider Avoiding Boolean Parameters
-
-They do not provide any additional meaning while reading the code. You can either create a separate function that has a more meaningful name, or pass an enumeration that makes the meaning more clear.
-
-See http://mortoray.com/2015/06/15/get-rid-of-those-boolean-function-parameters/ for more information.
-
-## Avoid Raw Loops
-
-Know and understand the existing C++ standard algorithms and put them to use.
-
- * See [cppreference](https://en.cppreference.com/w/cpp/algorithm)
- * Watch [C++ Seasoning](https://www.youtube.com/watch?v=qH6sSOr-yk8)
-
-Consider a call to `[]` as a potential code smell, indicating that an algorithm was not used where it could have been.
-
-
-## Never Use `assert` With Side Effects
-
-```cpp
-// Bad Idea
-assert(set_value(something));
-
-// Better Idea
-[[maybe_unused]] const auto success = set_value(something);
-assert(success);
-```
-
-The `assert()` will be removed in release builds which will prevent the `set_value` call from ever happening.
-
-So while the second version is uglier, the first version is simply not correct.
-
-
-## Properly Utilize 'override' and 'final'
-
-These keywords make it clear to other developers how virtual functions are being utilized, can catch potential errors if the signature of a virtual function changes, and can possibly [hint to the compiler](http://stackoverflow.com/questions/7538820/how-does-the-compiler-benefit-from-cs-new-final-keyword) of optimizations that can be performed.
diff --git a/docs/cpp-coding/06-Considering_Portability.md b/docs/cpp-coding/06-Considering_Portability.md
deleted file mode 100644
index 5fd89ef10..000000000
--- a/docs/cpp-coding/06-Considering_Portability.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# Considering Portability
-
-## Know Your Types
-
-Most portability issues that generate warnings are because we are not careful about our types. Standard library and arrays are indexed with `size_t`. Standard container sizes are reported in `size_t`. If you get the handling of `size_t` wrong, you can create subtle lurking 64-bit issues that arise only after you start to overflow the indexing of 32-bit integers. char vs unsigned char.
-
-http://www.viva64.com/en/a/0010/
-
-## Use The Standard Library
-
-### `std::filesystem`
-
-C++17 added a new `filesystem` library which provides portable filesystem access across all supporting compilers
-
-### `std::thread`
-
-C++11's threading capabilities should be utilized over `pthread` or `WinThreads`.
-
-## Other Concerns
-
-Most of the other concerns in this document ultimately come back to portability issues. [Avoid statics](07-Considering_Threadability.md#statics) is particularly of note.
diff --git a/docs/cpp-coding/07-Considering_Threadability.md b/docs/cpp-coding/07-Considering_Threadability.md
deleted file mode 100644
index a6b9f3444..000000000
--- a/docs/cpp-coding/07-Considering_Threadability.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Considering Threadability
-
-## Avoid Global Data
-
-Global data leads to unintended side effects between functions and can make code difficult or impossible to parallelize. Even if the code is not intended today for parallelization, there is no reason to make it impossible for the future.
-
-### Statics
-
-Besides being global data, statics are not always constructed and deconstructed as you would expect. This is particularly true in cross-platform environments. See for example, [this g++ bug](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66830) regarding the order of destruction of shared static data loaded from dynamic modules.
-
-### Shared Pointers
-
-`std::shared_ptr` is "as good as a global" (http://stackoverflow.com/a/18803611/29975) because it allows multiple pieces of code to interact with the same data.
-
-### Singletons
-
-A singleton is often implemented with a static and/or `shared_ptr`.
-
-## Avoid Heap Operations
-
-Much slower in threaded environments. In many or maybe even most cases, copying data is faster. Plus with move operations and such and things.
-
-## Mutex and mutable go together (M&M rule, C++11)
-For member variables it is good practice to use mutex and mutable together. This applies in both ways:
-* A mutable member variable is presumed to be a shared variable so it should be synchronized with a mutex (or made atomic)
-* If a member variable is itself a mutex, it should be mutable. This is required to use it inside a const member function.
-
-For more information see the following article from Herb Sutter: http://herbsutter.com/2013/05/24/gotw-6a-const-correctness-part-1-3/
-
-See also [related safety discussion](04-Considering_Safety.md#consider-return-by-value-for-mutable-data-const--for-immutable) about `const &` return values
diff --git a/docs/cpp-coding/08-Considering_Performance.md b/docs/cpp-coding/08-Considering_Performance.md
deleted file mode 100644
index 784ca33da..000000000
--- a/docs/cpp-coding/08-Considering_Performance.md
+++ /dev/null
@@ -1,356 +0,0 @@
-# Considering Performance
-
-## Build Time
-
-
-
-### Forward Declare When Possible
-
-This:
-
-```cpp
-// some header file
-class MyClass;
-
-void doSomething(const MyClass &);
-```
-
-instead of:
-
-```cpp
-// some header file
-#include "MyClass.hpp"
-
-void doSomething(const MyClass &);
-```
-
-
-This applies to templates as well:
-
-```cpp
-template<typename T> class MyTemplatedType;
-```
-
-This is a proactive approach to reduce compilation time and rebuilding dependencies.
-
-*Note: forward declaration does prevent more inlining and optimizations. It's recommended to use Link Time Optimization or Link Time Code Generation for release builds.*
-
-### Avoid Unnecessary Template Instantiations
-
-Templates are not free to instantiate. Instantiating many templates, or templates with more code than necessary increases compiled code size and build time.
-
-For more examples see [this article](http://blog2.emptycrate.com/content/template-code-bloat-revisited-smaller-makeshared).
-
-### Avoid Recursive Template Instantiations
-
-Recursive template instantiations can result in a significant load on the compiler and more difficult to understand code.
-
-[Consider using variadic expansions and folds when possible instead.](http://articles.emptycrate.com/2016/05/14/folds_in_cpp11_ish.html)
-
-### Analyze the Build
-
-The tool [Templight](https://github.com/mikael-s-persson/templight) can be used to analyze the build time of your project. It takes some effort to get built, but once you do, it's a drop in replacement for clang++.
-
-After you build using Templight, you will need to analyze the results. The [templight-tools](https://github.com/mikael-s-persson/templight-tools) project provides various methods. (Author's Note: I suggest using the callgrind converter and visualizing the results with kcachegrind).
-
-
-
-### Firewall Frequently Changing Header Files
-
-
-
-#### Don't Unnecessarily Include Headers
-
-The compiler has to do something with each include directive it sees. Even if it stops as soon as it sees the `#ifndef` include guard, it still had to open the file and begin processing it.
-
-[include-what-you-use](https://github.com/include-what-you-use/include-what-you-use) is a tool that can help you identify which headers you need.
-
-#### Reduce the load on the preprocessor
-
-This is a general form of "Firewall Frequently Changing Header Files" and "Don't Unnecessarily Include Headers." Tools like BOOST_PP can be very helpful, but they also put a huge burden on the preprocessor.
-
-### Consider using precompiled headers
-
-The usage of precompiled headers can considerably reduce the compile time in large projects. Selected headers are compiled to an intermediate form (PCH files) that can be faster processed by the compiler. It is recommended to define only frequently used header that changes rarely as precompiled header (e.g. system and library headers) to achieve the compile time reduction.
-But you have to keep in mind, that using precompiled headers has several disadvantages:
-* The usage of precompiled header is not portable.
-* The generated PCH files are machine dependent.
-* The generated PCH files can be quite large.
-* It can break your header dependencies. Because of the precompiled headers, every file has the possibility to include every header that is marked as a precompiled header. In result it can happen, that the build fails if you disable the precompiled headers. This can be an issue if you ship something like a library. Because of this it is highly recommend to build once with precompiled header enabled and a second time without them.
-
-Precompiled headers is supported by the most common compiler, like [GCC](https://gcc.gnu.org/onlinedocs/gcc/Precompiled-Headers.html), [Clang](http://clang.llvm.org/docs/PCHInternals.html) and [Visual Studio](https://msdn.microsoft.com/en-us/library/szfdksca.aspx).
-Tools like [cotire](https://github.com/sakra/cotire/) (a plugin for cmake) can help you to add precompiled headers to your build system.
-
-### Consider Using Tools
-
-These are not meant to supersede good design
-
- * [ccache](https://ccache.samba.org/), compile results caching for unix-like operating systems
- * [clcache](https://github.com/frerich/clcache), compile results caching for cl.exe (MSVC)
- * [warp](https://github.com/facebook/warp), Facebook's preprocessor
-
-### Put tmp on Ramdisk
-
-See [this](https://www.youtube.com/watch?v=t4M3yG1dWho) YouTube video for more details.
-
-### Use the gold linker
-
-If on Linux, consider using the gold linker for GCC.
-
-## Runtime
-
-### Analyze the Code!
-
-There's no real way to know where your bottlenecks are without analyzing the code.
-
- * http://developer.amd.com/tools-and-sdks/opencl-zone/codexl/
- * http://www.codersnotes.com/sleepy
-
-### Simplify the Code
-
-The cleaner, simpler, and easier to read the code is, the better chance the compiler has at implementing it well.
-
-### Use Initializer Lists
-
-```cpp
-// This
-std::vector<ModelObject> mos{mo1, mo2};
-
-// -or-
-auto mos = std::vector<ModelObject>{mo1, mo2};
-```
-
-```cpp
-// Don't do this
-std::vector<ModelObject> mos;
-mos.push_back(mo1);
-mos.push_back(mo2);
-```
-
-Initializer lists are significantly more efficient; reducing object copies and resizing of containers.
-
-### Reduce Temporary Objects
-
-```cpp
-// Instead of
-auto mo1 = getSomeModelObject();
-auto mo2 = getAnotherModelObject();
-
-doSomething(mo1, mo2);
-```
-
-```cpp
-// consider:
-
-doSomething(getSomeModelObject(), getAnotherModelObject());
-```
-
-This sort of code prevents the compiler from performing a move operation...
-
-### Enable move operations
-
-Move operations are one of the most touted features of C++11. They allow the compiler to avoid extra copies by moving temporary objects instead of copying them in certain cases.
-
-Certain coding choices we make (such as declaring our own destructor or assignment operator or copy constructor) prevents the compiler from generating a move constructor.
-
-For most code, a simple
-
-```cpp
-ModelObject(ModelObject &&) = default;
-```
-
-would suffice. However, MSVC2013 doesn't seem to like this code yet.
-
-### Kill `shared_ptr` Copies
-
-`shared_ptr` objects are much more expensive to copy than you'd think they would be. This is because the reference count must be atomic and thread-safe. So this comment just re-enforces the note above: avoid temporaries and too many copies of objects. Just because we are using a pImpl it does not mean our copies are free.
-
-### Reduce Copies and Reassignments as Much as Possible
-
-For more simple cases, the ternary operator can be used:
-
-```cpp
-// Bad Idea
-std::string somevalue;
-
-if (caseA) {
- somevalue = "Value A";
-} else {
- somevalue = "Value B";
-}
-```
-
-```cpp
-// Better Idea
-const std::string somevalue = caseA ? "Value A" : "Value B";
-```
-
-More complex cases can be facilitated with an [immediately-invoked lambda](http://blog2.emptycrate.com/content/complex-object-initialization-optimization-iife-c11).
-
-```cpp
-// Bad Idea
-std::string somevalue;
-
-if (caseA) {
- somevalue = "Value A";
-} else if(caseB) {
- somevalue = "Value B";
-} else {
- somevalue = "Value C";
-}
-```
-
-```cpp
-// Better Idea
-const std::string somevalue = [&](){
- if (caseA) {
- return "Value A";
- } else if (caseB) {
- return "Value B";
- } else {
- return "Value C";
- }
- }();
-```
-
-
-### Avoid Excess Exceptions
-
-Exceptions which are thrown and captured internally during normal processing slow down the application execution. They also destroy the user experience from within a debugger, as debuggers monitor and report on each exception event. It is best to just avoid internal exception processing when possible.
-
-### Get rid of “new”
-
-We already know that we should not be using raw memory access, so we are using `unique_ptr` and `shared_ptr` instead, right?
-Heap allocations are much more expensive than stack allocations, but sometimes we have to use them. To make matters worse, creating a `shared_ptr` actually requires 2 heap allocations.
-
-However, the `make_shared` function reduces this down to just one.
-
-```cpp
-std::shared_ptr<ModelObject_Impl>(new ModelObject_Impl());
-
-// should become
-std::make_shared<ModelObject_Impl>(); // (it's also more readable and concise)
-```
-
-### Prefer `unique_ptr` to `shared_ptr`
-
-If possible use `unique_ptr` instead of `shared_ptr`. The `unique_ptr` does not need to keep track of its copies because it is not copyable. Because of this it is more efficient than the `shared_ptr`. Equivalent to `shared_ptr` and `make_shared` you should use `make_unique` (C++14 or greater) to create the `unique_ptr`:
-
-```cpp
-std::make_unique<ModelObject_Impl>();
-```
-
-Current best practices suggest returning a `unique_ptr` from factory functions as well, then converting the `unique_ptr` to a `shared_ptr` if necessary.
-
-```cpp
-std::unique_ptr<ModelObject_Impl> factory();
-
-auto shared = std::shared_ptr<ModelObject_Impl>(factory());
-```
-
-### Get rid of std::endl
-
-`std::endl` implies a flush operation. It's equivalent to `"\n" << std::flush`.
-
-
-### Limit Variable Scope
-
-Variables should be declared as late as possible, and ideally only when it's possible to initialize the object. Reduced variable scope results in less memory being used, more efficient code in general, and helps the compiler optimize the code further.
-
-```cpp
-// Good Idea
-for (int i = 0; i < 15; ++i)
-{
- MyObject obj(i);
- // do something with obj
-}
-
-// Bad Idea
-MyObject obj; // meaningless object initialization
-for (int i = 0; i < 15; ++i)
-{
- obj = MyObject(i); // unnecessary assignment operation
- // do something with obj
-}
-// obj is still taking up memory for no reason
-```
-
-For C++17 and onwards, consider using init-statement in the `if` and `switch` statements:
-
-```cpp
-if (MyObject obj(index); obj.good()) {
- // do something if obj is good
-} else {
- // do something if obj is not good
-}
-```
-
-[This topic has an associated discussion thread](https://github.com/lefticus/cppbestpractices/issues/52).
-
-### Prefer `double` to `float`, But Test First
-
-Depending on the situation and the compiler's ability to optimize, one may be faster over the other. Choosing `float` will result in lower precision and may be slower due to conversions. On vectorizable operations `float` may be faster if you are able to sacrifice precision.
-
-`double` is the recommended default choice as it is the default type for floating point values in C++.
-
-See this [stackoverflow](http://stackoverflow.com/questions/4584637/double-or-float-which-is-faster) discussion for some more information.
-
-### Prefer `++i` to `i++`
-... when it is semantically correct. Pre-increment is [faster](http://blog2.emptycrate.com/content/why-i-faster-i-c) than post-increment because it does not require a copy of the object to be made.
-
-```cpp
-// Bad Idea
-for (int i = 0; i < 15; i++)
-{
- std::cout << i << '\n';
-}
-
-// Good Idea
-for (int i = 0; i < 15; ++i)
-{
- std::cout << i << '\n';
-}
-```
-
-Even if many modern compilers will optimize these two loops to the same assembly code, it is still good practice to prefer `++i`. There is absolutely no reason not to and you can never be certain that your code will not pass a compiler that does not optimize this.
-You should be also aware that the compiler will not be able optimize this only for integer types and not necessarily for all iterator or other user defined types.
-The bottom line is that it is always easier and recommended to use the pre-increment operator if it is semantically identical to the post-increment operator.
-
-### Char is a char, string is a string
-
-```cpp
-// Bad Idea
-std::cout << someThing() << "\n";
-
-// Good Idea
-std::cout << someThing() << '\n';
-```
-
-This is very minor, but a `"\n"` has to be parsed by the compiler as a `const char *` which has to do a range check for `\0` when writing it to the stream (or appending to a string). A '\n' is known to be a single character and avoids many CPU instructions.
-
-If used inefficiently very many times it might have an impact on your performance, but more importantly thinking about these two usage cases gets you thinking more about what the compiler and runtime has to do to execute your code.
-
-
-### Never Use `std::bind`
-
-`std::bind` is almost always way more overhead (both compile time and runtime) than you need. Instead simply use a lambda.
-
-```cpp
-// Bad Idea
-auto f = std::bind(&my_function, "hello", std::placeholders::_1);
-f("world");
-
-// Good Idea
-auto f = [](const std::string &s) { return my_function("hello", s); };
-f("world");
-```
-
-
-### Know The Standard Library
-
-Properly use the already highly optimized components of the vendor provided standard library.
-
-#### `in_place_t` And Related
-
-Be aware of how to use `in_place_t` and related tags for efficient creation of objects such as `std::tuple`, `std::any` and `std::variant`.
-
diff --git a/docs/cpp-coding/09-Considering_Correctness.md b/docs/cpp-coding/09-Considering_Correctness.md
deleted file mode 100644
index 5bc8b61ec..000000000
--- a/docs/cpp-coding/09-Considering_Correctness.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Considering Correctness
-
-## Avoid Typeless Interfaces
-
-
-Bad Idea:
-
-```cpp
-std::string find_file(const std::string &base, const std::string &pattern);
-```
-
-Better Idea:
-
-```cpp
-std::filesystem::path find_file(const std::filesystem::path &base, const std::regex &pattern);
-```
-
-The above is better but still suffers from having implicit conversions from `std::string` to `std::filesystem::path` and back.
-
-Consider using a typesafe library like
-
- * https://foonathan.net/type_safe/
- * https://github.com/rollbear/strong_type
- * https://github.com/joboccara/NamedType
-
-Note that stronger typing can also allow for more compiler optimizations.
-
-* [Sorting in C vs C++](Sorting in C vs C++.pdf)
-
-
diff --git a/docs/cpp-coding/10-Enable_Scripting.md b/docs/cpp-coding/10-Enable_Scripting.md
deleted file mode 100644
index e22724b3e..000000000
--- a/docs/cpp-coding/10-Enable_Scripting.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Enable Scripting
-
-The combination of scripting and compiled languages is very powerful. It gives us the things we've come to love about compiled languages: type safety, performance, thread safety options, consistent memory model while also giving us the flexibility to try something new quickly without a full rebuild.
-
-The VM based compiled languages have learned this already: JRuby, Jython, IronRuby, IronPython
-
- * [ChaiScript](http://chaiscript.com/)
- * [AngelScript](http://www.angelcode.com/angelscript/)
- * [luabind](http://www.rasterbar.com/products/luabind.html)
- * [sol2](https://github.com/ThePhD/sol2) (bindings for Lua)
- * [SWIG](http://www.swig.org/) (simplified wrapper and interface generator)
- * [pybind11](https://pybind11.readthedocs.io/en/stable/) (Python and modern C++ interoperability)
diff --git a/docs/cpp-coding/11-Further_Reading.md b/docs/cpp-coding/11-Further_Reading.md
deleted file mode 100644
index 515e16fc2..000000000
--- a/docs/cpp-coding/11-Further_Reading.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Further Reading
-
-*Note: This book has now inspired a video series from O'Reilly, [Learning C++ Best Practices](http://shop.oreilly.com/product/0636920049814.do)*
-
-## C++
-
- * https://github.com/isocpp/CppCoreGuidelines The C++ Core Guidelines are a set of tried-and-true guidelines, rules, and best practices about coding in C++
- * https://www.gitbook.com/book/alexastva/the-ultimate-question-of-programming-refactoring-/details - The Ultimate Question of Programming, Refactoring, and Everything
- * http://llvm.org/docs/CodingStandards.html - LLVM Coding Standards - very well written
- * http://geosoft.no/development/cppstyle.html
- * https://google.github.io/styleguide/cppguide.html (Note that Google's standard document makes several recommendations which we will NOT be following. For example, they explicitly forbid the use of exceptions, which makes [RAII](http://blog2.emptycrate.com/content/nobody-understands-c-part-2-raii) impossible.)
- * https://isocpp.org/faq/
- * http://www.cplusplus.com/
- * http://www.gamasutra.com/view/news/128836/InDepth_Static_Code_Analysis.php - Article from John Carmack on the advantages of static analysis
- * https://svn.boost.org/trac/boost/wiki/BestPracticeHandbook - Best Practice Handbook from Nial Douglas
- * http://sourceforge.net/apps/mediawiki/cppcheck/index.php?title=ListOfChecks
- * http://emptycrate.com/
- * http://stackoverflow.com/questions/tagged/c%2b%2b-faq?sort=votes&pageSize=15 - StackOverflow C++ FAQ
- * http://codergears.com/qacenter/ discussion center for C and C++ best practices
- * http://www.viva64.com/en/b/0391/ The Ultimate Question of Programming, Refactoring, and Everything
-
-## CMake
-
- * https://cmake.org/cmake/help/latest/manual/cmake.1.html - Be aware that there are `--warn` command line options for CMake that can catch some issues.
- * https://github.com/Akagi201/learning-cmake
- * https://codingnest.com/basic-cmake/
- * https://gist.github.com/mbinna/c61dbb39bca0e4fb7d1f73b0d66a4fd1 - Effective CMake online book
- * https://pabloariasal.github.io/2018/02/19/its-time-to-do-cmake-right/
- * https://cliutils.gitlab.io/modern-cmake/ - An Introduction to Modern CMake
diff --git a/docs/cpp-coding/12-Final_Thoughts.md b/docs/cpp-coding/12-Final_Thoughts.md
deleted file mode 100644
index e7f711bf3..000000000
--- a/docs/cpp-coding/12-Final_Thoughts.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# Final Thoughts
-
-Expand your horizons and use other programming languages. Other languages have different constructs and expressions. Learning what else is out there will encourage you to be more creative with your C++ and write cleaner, more expressive code.
-
diff --git a/docs/cpp-coding/SUMMARY.md b/docs/cpp-coding/SUMMARY.md
deleted file mode 100644
index 6ecc4de54..000000000
--- a/docs/cpp-coding/SUMMARY.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Summary
-
-* [Preface](01-Preface.md)
-* [Use the Tools Available](02-Use_the_Tools_Available.md)
-* [Style](03-Style.md)
-* [Considering Safety](04-Considering_Safety.md)
-* [Considering Maintainability](05-Considering_Maintainability.md)
-* [Considering Portability](06-Considering_Portability.md)
-* [Considering Threadability](07-Considering_Threadability.md)
-* [Considering Performance](08-Considering_Performance.md)
-* [Considering Correctness](09-Considering_Correctness.md)
-* [Enable Scripting](10-Enable_Scripting.md)
-* [Further Reading](11-Further_Reading.md)
-* [Final Thoughts](12-Final_Thoughts.md)
-
diff --git a/prepare_commit.bat b/prepare_commit.bat
deleted file mode 100644
index 8e621c96c..000000000
--- a/prepare_commit.bat
+++ /dev/null
@@ -1 +0,0 @@
-python %~dp0scripts\formatcode.py %*
diff --git a/scripts/formatcode.py b/scripts/formatcode.py
deleted file mode 100644
index 49a8753da..000000000
--- a/scripts/formatcode.py
+++ /dev/null
@@ -1,138 +0,0 @@
-import argparse
-import fileinput
-import os
-import pathlib
-import re
-import subprocess
-
-match_expressions = []
-valid_extensions = []
-root_dir = ''
-use_batching = True
-
-def is_header_missing(f):
- with open(f) as reader:
- lines = reader.read().lstrip().splitlines()
- if len(lines) > 0: return not lines[0].startswith("// ")
- return True
-
-def add_headers(files, header):
- for line in fileinput.input(files, inplace=True):
- if fileinput.isfirstline():
- [ print(h) for h in header.splitlines() ]
- print(line, end="")
-
-def scan_tree(root):
- files = []
- header_files = []
- with os.scandir(root) as dirs:
- for entry in dirs:
- if entry.is_dir():
- scan_tree(os.path.join(root, entry.name))
- continue
- full_path = os.path.join(root, entry.name)
- relative_root_path = os.path.relpath(full_path, start=root_dir)
- if is_matching_filename(relative_root_path):
- print("... formatting: {}".format(relative_root_path))
- files.append(full_path)
- if is_header_missing(full_path):
- header_files.append(full_path)
- args = ""
- if files:
- if use_batching:
- os.system("clang-format -i " + " ".join(files))
- else:
- for file in files:
- os.system("clang-format -i " + file)
- if header_files:
- add_headers(header_files, "// Copyright Epic Games, Inc. All Rights Reserved.\n\n")
-
-def scan_zen(root):
- with os.scandir(root) as dirs:
- for entry in dirs:
- if entry.is_dir() and entry.name.startswith("zen"):
- scan_tree(os.path.join(root, entry.name))
-
-def is_matching_filename(relative_root_path):
- global match_expressions
- global root_dir
- global valid_extensions
-
- if os.path.splitext(relative_root_path)[1].lower() not in valid_extensions:
- return False
- if not match_expressions:
- return True
- relative_root_path = relative_root_path.replace('\\', '/')
- for regex in match_expressions:
- if regex.fullmatch(relative_root_path):
- return True
- return False
-
-def parse_match_expressions(wildcards, matches):
- global match_expressions
- global valid_extensions
-
- valid_extensions = ['.cpp', '.h']
-
- for wildcard in wildcards:
- regex = wildcard.replace('*', '%FORMAT_STAR%').replace('\\', '/')
- regex = re.escape(regex)
- regex = '.*' + regex.replace('%FORMAT_STAR%', '.*') + '.*'
- try:
- match_expressions.append(re.compile(regex, re.IGNORECASE))
- except Exception as ex:
- print(f'Could not parse input filename expression \'{wildcard}\': {str(ex)}')
- quit()
- for regex in matches:
- try:
- match_expressions.append(re.compile(regex, re.IGNORECASE))
- except Exception as ex:
- print(f'Could not parse input --match expression \'{regex}\': {str(ex)}')
- quit()
-
-def validate_clang_format():
- vstring = subprocess.check_output("clang-format --version", shell=True).decode().rstrip()
-
- match = re.search(r'(\d+)\.(\d+)(\.(\d+))?$', vstring)
- if not match:
- raise ValueError("invalid version number '%s'" % vstring)
-
- (major, minor, patch) = match.group(1, 2, 4)
-
- if int(major) < 13:
- if int(minor) == 0:
- if int(patch) < 1:
- raise ValueError(f'invalid clang-format version -- we require at least v12.0.1')
-
-def _main():
- global root_dir, use_batching
-
- parser = argparse.ArgumentParser()
- parser.add_argument('filenames', nargs='*', help="Match text for filenames. If fullpath contains text it is a match, " +\
- "* is a wildcard. Directory separators are matched by either / or \\. Case insensitive.")
- parser.add_argument('--match', action='append', default=[], help="Match regular expression for filenames. " +\
- "Relative path from the root zen directory must be a complete match. Directory separators are matched only by /. Case insensitive.")
- parser.add_argument('--batch', dest='use_batching', action='store_true', help="Enable batching calls to clang-format.")
- parser.add_argument('--no-batch', dest='use_batching', action='store_false', help="Disable batching calls to clang-format.")
- parser.set_defaults(use_batching=True)
- options = parser.parse_args()
-
- parse_match_expressions(options.filenames, options.match)
- root_dir = pathlib.Path(__file__).parent.parent.resolve()
- use_batching = options.use_batching
-
- validate_clang_format()
-
- while True:
- if (os.path.isfile(".clang-format")):
- scan_zen(".")
- quit()
- else:
- cwd = os.getcwd()
- if os.path.dirname(cwd) == cwd:
- quit()
- os.chdir("..")
-
-
-if __name__ == '__main__':
- _main() \ No newline at end of file
diff --git a/scripts/remote_build.py b/scripts/remote_build.py
index 70f9cf9bf..d814d4a66 100644
--- a/scripts/remote_build.py
+++ b/scripts/remote_build.py
@@ -108,13 +108,14 @@ def _local(args):
# Validate key file. Git's SSH uses OpenSSL which needs UNIX line-endings
if args.keyfile:
+ """
with open(args.keyfile, "rt") as key_file:
lines = [x.strip() for x in key_file]
with open(args.keyfile, "wb") as key_file:
for line in lines:
key_file.write(line.encode() + b"\n")
-
+ """
identity = ("-i", args.keyfile)
else:
identity = ()
diff --git a/zen/zen.cpp b/zen/zen.cpp
index 0a1136da5..302e8496f 100644
--- a/zen/zen.cpp
+++ b/zen/zen.cpp
@@ -1,3 +1,5 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
// Zen command line client utility
//
diff --git a/zencore-test/zencore-test.cpp b/zencore-test/zencore-test.cpp
index 37f309be0..327f2f0b5 100644
--- a/zencore-test/zencore-test.cpp
+++ b/zencore-test/zencore-test.cpp
@@ -1,3 +1,5 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
// zencore-test.cpp : Defines the entry point for the console application.
//
diff --git a/zencore/compactbinary.cpp b/zencore/compactbinary.cpp
index 05ae32f2b..aa87fb4cb 100644
--- a/zencore/compactbinary.cpp
+++ b/zencore/compactbinary.cpp
@@ -60,22 +60,27 @@ GetPlatformToDateTimeBiasInSeconds()
return uint64_t(double(PlatformEpochYear - DateTimeEpochYear) * 365.2425) * 86400;
}
-DateTime
-DateTime::Now()
+uint64_t
+DateTime::NowTicks()
{
- static const uint64_t EpochBias = GetPlatformToDateTimeBiasInSeconds();
- static const uint64_t SecsTo100nsTicks = int64_t(10e9 / 100);
+ static constexpr uint64_t EpochBias = GetPlatformToDateTimeBiasInSeconds();
#if ZEN_PLATFORM_WINDOWS
FILETIME SysTime;
- GetSystemTimeAsFileTime(&SysTime);
- return DateTime{(EpochBias * SecsTo100nsTicks) + (uint64_t(SysTime.dwHighDateTime) << 32) | SysTime.dwLowDateTime};
+ GetSystemTimePreciseAsFileTime(&SysTime);
+ return (EpochBias * TimeSpan::TicksPerSecond) + ((uint64_t(SysTime.dwHighDateTime) << 32) | SysTime.dwLowDateTime);
#else
int64_t SecondsSinceUnixEpoch = time(nullptr);
- return DateTime{(EpochBias + SecondsSinceUnixEpoch) * SecsTo100nsTicks};
+ return (EpochBias + SecondsSinceUnixEpoch) * TimeSpan::TicksPerSecond;
#endif
}
+DateTime
+DateTime::Now()
+{
+ return DateTime{NowTicks()};
+}
+
void
DateTime::Set(int Year, int Month, int Day, int Hour, int Minute, int Second, int MilliSecond)
{
diff --git a/zencore/filesystem.cpp b/zencore/filesystem.cpp
index 041abaf1d..437741161 100644
--- a/zencore/filesystem.cpp
+++ b/zencore/filesystem.cpp
@@ -23,6 +23,7 @@
#if ZEN_PLATFORM_LINUX
# include <dirent.h>
# include <fcntl.h>
+# include <sys/resource.h>
# include <sys/stat.h>
# include <unistd.h>
#endif
@@ -31,6 +32,7 @@
# include <dirent.h>
# include <fcntl.h>
# include <libproc.h>
+# include <sys/resource.h>
# include <sys/stat.h>
# include <sys/syslimits.h>
# include <unistd.h>
@@ -921,6 +923,10 @@ PathFromHandle(void* NativeHandle)
}
const DWORD RequiredLengthIncludingNul = GetFinalPathNameByHandleW(NativeHandle, nullptr, 0, FILE_NAME_OPENED);
+ if (RequiredLengthIncludingNul == 0)
+ {
+ ThrowLastError(fmt::format("failed to get path from file handle {}", NativeHandle));
+ }
std::wstring FullPath;
FullPath.resize(RequiredLengthIncludingNul - 1);
@@ -982,6 +988,40 @@ GetRunningExecutablePath()
#endif // ZEN_PLATFORM_WINDOWS
}
+void
+MaximizeOpenFileCount()
+{
+#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ struct rlimit Limit;
+ int Error = getrlimit(RLIMIT_NOFILE, &Limit);
+ if (Error)
+ {
+ ZEN_WARN("failed getting rlimit RLIMIT_NOFILE, reason '{}'", zen::MakeErrorCode(Error).message());
+ }
+ else
+ {
+ struct rlimit NewLimit = Limit;
+ NewLimit.rlim_cur = NewLimit.rlim_max;
+ ZEN_INFO("changing RLIMIT_NOFILE from rlim_cur = {}, rlim_max {} to rlim_cur = {}, rlim_max {}",
+ Limit.rlim_cur,
+ Limit.rlim_max,
+ NewLimit.rlim_cur,
+ NewLimit.rlim_max);
+
+ Error = setrlimit(RLIMIT_NOFILE, &NewLimit);
+ if (Error != 0)
+ {
+ ZEN_WARN("failed to set RLIMIT_NOFILE limits from rlim_cur = {}, rlim_max {} to rlim_cur = {}, rlim_max {}, reason '{}'",
+ Limit.rlim_cur,
+ Limit.rlim_max,
+ NewLimit.rlim_cur,
+ NewLimit.rlim_max,
+ zen::MakeErrorCode(Error).message());
+ }
+ }
+#endif
+}
+
//////////////////////////////////////////////////////////////////////////
//
// Testing related code follows...
diff --git a/zencore/include/zencore/compactbinary.h b/zencore/include/zencore/compactbinary.h
index 25fd4a7b2..19f1597dc 100644
--- a/zencore/include/zencore/compactbinary.h
+++ b/zencore/include/zencore/compactbinary.h
@@ -43,6 +43,7 @@ public:
inline uint64_t GetTicks() const { return Ticks; }
+ static uint64_t NowTicks();
static DateTime Now();
int GetYear() const;
diff --git a/zencore/include/zencore/filesystem.h b/zencore/include/zencore/filesystem.h
index d1a5f3e0c..a6e76eaa0 100644
--- a/zencore/include/zencore/filesystem.h
+++ b/zencore/include/zencore/filesystem.h
@@ -34,6 +34,10 @@ ZENCORE_API std::filesystem::path PathFromHandle(void* NativeHandle);
ZENCORE_API std::filesystem::path GetRunningExecutablePath();
+/** Set the max open file handle count to max allowed for the current process on Linux and MacOS
+ */
+ZENCORE_API void MaximizeOpenFileCount();
+
struct FileContents
{
std::vector<IoBuffer> Data;
diff --git a/zencore/include/zencore/string.h b/zencore/include/zencore/string.h
index 4c378730f..027730063 100644
--- a/zencore/include/zencore/string.h
+++ b/zencore/include/zencore/string.h
@@ -9,6 +9,7 @@
#include <string.h>
#include <charconv>
#include <codecvt>
+#include <concepts>
#include <optional>
#include <span>
#include <string_view>
@@ -488,6 +489,26 @@ std::string WideToUtf8(const wchar_t* wstr);
void WideToUtf8(const std::wstring_view& wstr, StringBuilderBase& out);
std::string WideToUtf8(const std::wstring_view Wstr);
+inline uint8_t
+Char2Nibble(char c)
+{
+ if (c >= '0' && c <= '9')
+ {
+ return uint8_t(c - '0');
+ }
+ if (c >= 'a' && c <= 'f')
+ {
+ return uint8_t(c - 'a' + 10);
+ }
+ if (c >= 'A' && c <= 'F')
+ {
+ return uint8_t(c - 'A' + 10);
+ }
+ return uint8_t(0xff);
+};
+
+static constexpr const char HexChars[] = "0123456789abcdef";
+
/// <summary>
/// Parse hex string into a byte buffer
/// </summary>
@@ -501,38 +522,56 @@ ParseHexBytes(const char* InputString, size_t CharacterCount, uint8_t* OutPtr)
{
ZEN_ASSERT((CharacterCount & 1) == 0);
- auto char2nibble = [](char c) {
- uint8_t c8 = uint8_t(c - '0');
+ uint8_t allBits = 0;
- if (c8 < 10)
- return c8;
+ while (CharacterCount)
+ {
+ uint8_t n0 = Char2Nibble(InputString[0]);
+ uint8_t n1 = Char2Nibble(InputString[1]);
- c8 -= 'A' - '0' - 10;
+ allBits |= n0 | n1;
- if (c8 < 16)
- return c8;
+ *OutPtr = (n0 << 4) | n1;
- c8 -= 'a' - 'A';
+ OutPtr += 1;
+ InputString += 2;
+ CharacterCount -= 2;
+ }
- if (c8 < 16)
- return c8;
+ return (allBits & 0x80) == 0;
+}
- return uint8_t(0xff);
- };
+inline void
+ToHexBytes(const uint8_t* InputData, size_t ByteCount, char* OutString)
+{
+ while (ByteCount--)
+ {
+ uint8_t byte = *InputData++;
+
+ *OutString++ = HexChars[byte >> 4];
+ *OutString++ = HexChars[byte & 15];
+ }
+}
+
+inline bool
+ParseHexNumber(const char* InputString, size_t CharacterCount, uint8_t* OutPtr)
+{
+ ZEN_ASSERT((CharacterCount & 1) == 0);
uint8_t allBits = 0;
+ InputString += CharacterCount;
while (CharacterCount)
{
- uint8_t n0 = char2nibble(InputString[0]);
- uint8_t n1 = char2nibble(InputString[1]);
+ InputString -= 2;
+ uint8_t n0 = Char2Nibble(InputString[0]);
+ uint8_t n1 = Char2Nibble(InputString[1]);
allBits |= n0 | n1;
*OutPtr = (n0 << 4) | n1;
OutPtr += 1;
- InputString += 2;
CharacterCount -= 2;
}
@@ -540,19 +579,43 @@ ParseHexBytes(const char* InputString, size_t CharacterCount, uint8_t* OutPtr)
}
inline void
-ToHexBytes(const uint8_t* InputData, size_t ByteCount, char* OutString)
+ToHexNumber(const uint8_t* InputData, size_t ByteCount, char* OutString)
{
- const char hexchars[] = "0123456789abcdef";
-
+ InputData += ByteCount;
while (ByteCount--)
{
- uint8_t byte = *InputData++;
+ uint8_t byte = *(--InputData);
- *OutString++ = hexchars[byte >> 4];
- *OutString++ = hexchars[byte & 15];
+ *OutString++ = HexChars[byte >> 4];
+ *OutString++ = HexChars[byte & 15];
}
}
+/// <summary>
+/// Generates a hex number from a pointer to an integer type, this formats the number in the correct order for a hexadecimal number
+/// </summary>
+/// <param name="Value">Integer value type</param>
+/// <param name="outString">Output buffer where resulting string is written</param>
+void
+ToHexNumber(UnsignedIntegral auto Value, char* OutString)
+{
+ ToHexNumber((const uint8_t*)&Value, sizeof(Value), OutString);
+ OutString[sizeof(Value) * 2] = 0;
+}
+
+/// <summary>
+/// Parse hex number string into a value, this formats the number in the correct order for a hexadecimal number
+/// </summary>
+/// <param name="string">Input string</param>
+/// <param name="characterCount">Number of characters in string</param>
+/// <param name="OutValue">Pointer to output value</param>
+/// <returns>true if the input consisted of all valid hexadecimal characters</returns>
+bool
+ParseHexNumber(const std::string HexString, UnsignedIntegral auto& OutValue)
+{
+ return ParseHexNumber(HexString.c_str(), sizeof(OutValue) * 2, (uint8_t*)&OutValue);
+}
+
//////////////////////////////////////////////////////////////////////////
// Format numbers for humans
//
diff --git a/zencore/iobuffer.cpp b/zencore/iobuffer.cpp
index e2aaa3169..8a3ab8427 100644
--- a/zencore/iobuffer.cpp
+++ b/zencore/iobuffer.cpp
@@ -186,7 +186,7 @@ IoBufferExtendedCore::IoBufferExtendedCore(const IoBufferExtendedCore* Outer, ui
, m_FileHandle(Outer->m_FileHandle)
, m_FileOffset(Outer->m_FileOffset + Offset)
{
- m_Flags.fetch_or(kIsOwnedByThis | kIsExtended, std::memory_order_relaxed);
+ m_Flags.fetch_or(kIsExtended, std::memory_order_relaxed);
}
IoBufferExtendedCore::~IoBufferExtendedCore()
@@ -217,10 +217,9 @@ IoBufferExtendedCore::~IoBufferExtendedCore()
int Fd = int(uintptr_t(m_FileHandle));
bool Success = (close(Fd) == 0);
#endif
-
if (!Success)
{
- ZEN_WARN("Error reported on file handle close!");
+ ZEN_WARN("Error reported on file handle close, reason '{}'", GetLastErrorAsString());
}
}
diff --git a/zenserver-test/zenserver-test.cpp b/zenserver-test/zenserver-test.cpp
index 2c5d1d11b..548327feb 100644
--- a/zenserver-test/zenserver-test.cpp
+++ b/zenserver-test/zenserver-test.cpp
@@ -2719,7 +2719,7 @@ TEST_CASE("http.package")
CHECK_EQ(ResponsePackage, TestPackage);
}
-TEST_CASE("websocket.basic")
+TEST_CASE("websocket.basic" * doctest::skip(true))
{
std::filesystem::path TestDir = TestEnv.CreateNewTestDir();
const uint16_t PortNumber = 13337;
diff --git a/zenserver/cache/structuredcachestore.cpp b/zenserver/cache/structuredcachestore.cpp
index 769167433..738e4c1fd 100644
--- a/zenserver/cache/structuredcachestore.cpp
+++ b/zenserver/cache/structuredcachestore.cpp
@@ -59,7 +59,11 @@ SaveCompactBinaryObject(const fs::path& Path, const CbObject& Object)
WriteFile(Path, Object.GetBuffer().AsIoBuffer());
}
-ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir) : GcStorage(Gc), GcContributor(Gc), m_DiskLayer(RootDir)
+ZenCacheStore::ZenCacheStore(CasGc& Gc, const std::filesystem::path& RootDir)
+: GcStorage(Gc)
+, GcContributor(Gc)
+, m_RootDir(RootDir)
+, m_DiskLayer(RootDir)
{
ZEN_INFO("initializing structured cache at '{}'", RootDir);
CreateDirectories(RootDir);
@@ -188,6 +192,10 @@ ZenCacheStore::Scrub(ScrubContext& Ctx)
void
ZenCacheStore::GatherReferences(GcContext& GcCtx)
{
+ Stopwatch Timer;
+ const auto Guard = MakeGuard(
+ [this, &Timer] { ZEN_INFO("cache gathered all references from '{}' in {}", m_RootDir, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
+
access_tracking::AccessTimes AccessTimes;
m_MemLayer.GatherAccessTimes(AccessTimes);
@@ -476,25 +484,27 @@ ZenCacheDiskLayer::CacheBucket::OpenLog(const fs::path& BucketDir, const bool Is
std::filesystem::path SobsPath{BucketDir / "zen.sobs"};
std::filesystem::path SlogPath{BucketDir / "zen.slog"};
- m_SobsFile.Open(SobsPath, IsNew);
- m_SlogFile.Open(SlogPath, IsNew);
+ m_SobsFile.Open(SobsPath, IsNew ? BasicFile::Mode::kTruncate : BasicFile::Mode::kWrite);
+ m_SlogFile.Open(SlogPath, IsNew ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite);
- m_SlogFile.Replay([&](const DiskIndexEntry& Entry) {
- if (Entry.Key == IoHash::Zero)
- {
- ++InvalidEntryCount;
- }
- else if (Entry.Location.IsFlagSet(DiskLocation::kTombStone))
- {
- m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed);
- }
- else
- {
- m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount()));
- m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order::relaxed);
- }
- MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Entry.Location.Offset() + Entry.Location.Size());
- });
+ m_SlogFile.Replay(
+ [&](const DiskIndexEntry& Entry) {
+ if (Entry.Key == IoHash::Zero)
+ {
+ ++InvalidEntryCount;
+ }
+ else if (Entry.Location.IsFlagSet(DiskLocation::kTombStone))
+ {
+ m_TotalSize.fetch_sub(Entry.Location.Size(), std::memory_order::relaxed);
+ }
+ else
+ {
+ m_Index.insert_or_assign(Entry.Key, IndexEntry(Entry.Location, GcClock::TickCount()));
+ m_TotalSize.fetch_add(Entry.Location.Size(), std::memory_order::relaxed);
+ }
+ MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Entry.Location.Offset() + Entry.Location.Size());
+ },
+ 0);
if (InvalidEntryCount)
{
@@ -757,6 +767,10 @@ ZenCacheDiskLayer::CacheBucket::GatherReferences(GcContext& GcCtx)
{
ZEN_TRACE_CPU("Z$::DiskLayer::CacheBucket::GatherReferences");
+ Stopwatch Timer;
+ const auto Guard = MakeGuard(
+ [this, &Timer] { ZEN_INFO("gathered references from '{}' in {}", m_BucketDir, NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
+
const GcClock::TimePoint ExpireTime =
GcCtx.MaxCacheDuration() == GcClock::Duration::max() ? GcClock::TimePoint::min() : GcCtx.Time() - GcCtx.MaxCacheDuration();
@@ -905,8 +919,8 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx)
m_SlogFile.Close();
const bool IsNew = true;
- m_SobsFile.Open(m_BucketDir / "zen.sobs", IsNew);
- m_SlogFile.Open(m_BucketDir / "zen.slog", IsNew);
+ m_SobsFile.Open(m_BucketDir / "zen.sobs", IsNew ? BasicFile::Mode::kTruncate : BasicFile::Mode::kWrite);
+ m_SlogFile.Open(m_BucketDir / "zen.slog", IsNew ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite);
m_SobsCursor = 0;
m_TotalSize = 0;
@@ -967,8 +981,8 @@ ZenCacheDiskLayer::CacheBucket::CollectGarbage(GcContext& GcCtx)
uint64_t TmpCursor{};
std::vector<uint8_t> Chunk;
- TmpSobs.Open(TmpSobsPath, true);
- TmpLog.Open(TmpSlogPath, true);
+ TmpSobs.Open(TmpSobsPath, BasicFile::Mode::kTruncate);
+ TmpLog.Open(TmpSlogPath, CasLogFile::Mode::kTruncate);
for (const auto& Entry : ValidEntries)
{
diff --git a/zenserver/compute/function.cpp b/zenserver/compute/function.cpp
index 996573573..dd31013ef 100644
--- a/zenserver/compute/function.cpp
+++ b/zenserver/compute/function.cpp
@@ -54,6 +54,16 @@ HttpFunctionService::HttpFunctionService(CasStore& Store,
m_Router.AddPattern("action", "([[:xdigit:]]{40})");
m_Router.RegisterRoute(
+ "ready",
+ [this](HttpRouterRequest& Req) {
+ HttpServerRequest& HttpReq = Req.ServerRequest();
+
+ // Todo: check upstream health
+ return HttpReq.WriteResponse(HttpResponseCode::OK);
+ },
+ HttpVerb::kGet);
+
+ m_Router.RegisterRoute(
"workers/{worker}",
[this](HttpRouterRequest& Req) {
HttpServerRequest& HttpReq = Req.ServerRequest();
@@ -485,8 +495,8 @@ HttpFunctionService::ExecActionUpstream(const WorkerDesc& Worker, CbObject& Obje
HttpResponseCode
HttpFunctionService::ExecActionUpstreamResult(const IoHash& WorkerId, CbObject& Object)
{
- const static IoHash Empty = CbObject().GetHash();
- auto Status = m_UpstreamApply->GetStatus(WorkerId, Empty);
+ const static IoHash Empty = CbObject().GetHash();
+ auto Status = m_UpstreamApply->GetStatus(WorkerId, Empty);
if (!Status.Success)
{
return HttpResponseCode::NotFound;
@@ -532,6 +542,15 @@ HttpFunctionService::ExecActionUpstreamResult(const IoHash& WorkerId, CbObject&
ResultObject.AddString("stdout"sv, Completed.StdOut);
ResultObject.AddString("stderr"sv, Completed.StdErr);
ResultObject.AddInteger("exitcode"sv, Completed.Error.ErrorCode);
+ ResultObject.BeginArray("stats"sv);
+ for (const auto& Timepoint : Completed.Timepoints)
+ {
+ ResultObject.BeginObject();
+ ResultObject.AddString("name"sv, Timepoint.first);
+ ResultObject.AddDateTimeTicks("time"sv, Timepoint.second);
+ ResultObject.EndObject();
+ }
+ ResultObject.EndArray();
ResultObject.BeginArray("files"sv);
for (const auto& File : Completed.OutputFiles)
diff --git a/zenserver/config.cpp b/zenserver/config.cpp
index b7fc18b4e..be91ae4f8 100644
--- a/zenserver/config.cpp
+++ b/zenserver/config.cpp
@@ -389,6 +389,49 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions)
options.add_option("compute",
"",
+ "upstream-horde-storage-url",
+ "URL to a Horde Storage instance.",
+ cxxopts::value<std::string>(ServerOptions.UpstreamCacheConfig.HordeConfig.StorageUrl)->default_value(""),
+ "");
+
+ options.add_option("compute",
+ "",
+ "upstream-horde-storage-oauth-url",
+ "URL to the OAuth provier",
+ cxxopts::value<std::string>(ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOAuthUrl)->default_value(""),
+ "");
+
+ options.add_option("compute",
+ "",
+ "upstream-horde-storage-oauth-clientid",
+ "The OAuth client ID",
+ cxxopts::value<std::string>(ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOAuthClientId)->default_value(""),
+ "");
+
+ options.add_option(
+ "compute",
+ "",
+ "upstream-horde-storage-oauth-clientsecret",
+ "The OAuth client secret",
+ cxxopts::value<std::string>(ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOAuthClientSecret)->default_value(""),
+ "");
+
+ options.add_option("compute",
+ "",
+ "upstream-horde-storage-openid-provider",
+ "Name of a registered Open ID provider",
+ cxxopts::value<std::string>(ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOpenIdProvider)->default_value(""),
+ "");
+
+ options.add_option("compute",
+ "",
+ "upstream-horde-storage-token",
+ "A static authentication token",
+ cxxopts::value<std::string>(ServerOptions.UpstreamCacheConfig.HordeConfig.StorageAccessToken)->default_value(""),
+ "");
+
+ options.add_option("compute",
+ "",
"upstream-horde-cluster",
"The Horde compute cluster id",
cxxopts::value<std::string>(ServerOptions.UpstreamCacheConfig.HordeConfig.Cluster)->default_value(""),
@@ -428,6 +471,13 @@ ParseCliOptions(int argc, char* argv[], ZenServerOptions& ServerOptions)
"Max duration in seconds before Z$ entries get evicted.",
cxxopts::value<int32_t>(ServerOptions.GcConfig.Cache.MaxDurationSeconds)->default_value("86400"),
"");
+
+ options.add_option("gc",
+ "",
+ "disk-reserve-size",
+ "Size of gc disk reserve in bytes.",
+ cxxopts::value<uint64_t>(ServerOptions.GcConfig.DiskReserveSize)->default_value("268435456"),
+ "");
try
{
auto result = options.parse(argc, argv);
@@ -693,12 +743,35 @@ ParseConfigFile(const std::filesystem::path& Path, ZenServerOptions& ServerOptio
std::string_view("namespace"),
ServerOptions.UpstreamCacheConfig.HordeConfig.Namespace);
};
+
+ if (auto StorageConfig = UpstreamConfig->get<sol::optional<sol::table>>("storage"))
+ {
+ UpdateStringValueFromConfig(StorageConfig.value(),
+ std::string_view("url"),
+ ServerOptions.UpstreamCacheConfig.HordeConfig.StorageUrl);
+ UpdateStringValueFromConfig(StorageConfig.value(),
+ std::string_view("oauthprovider"),
+ ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOAuthUrl);
+ UpdateStringValueFromConfig(StorageConfig.value(),
+ std::string_view("oauthclientid"),
+ ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOAuthClientId);
+ UpdateStringValueFromConfig(StorageConfig.value(),
+ std::string_view("oauthclientsecret"),
+ ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOAuthClientSecret);
+ UpdateStringValueFromConfig(StorageConfig.value(),
+ std::string_view("openidprovider"),
+ ServerOptions.UpstreamCacheConfig.HordeConfig.StorageOpenIdProvider);
+ UpdateStringValueFromConfig(StorageConfig.value(),
+ std::string_view("token"),
+ ServerOptions.UpstreamCacheConfig.HordeConfig.StorageAccessToken);
+ };
}
}
if (sol::optional<sol::table> GcConfig = lua["gc"])
{
ServerOptions.GcConfig.IntervalSeconds = GcConfig.value().get_or("intervalseconds", 0);
+ ServerOptions.GcConfig.DiskReserveSize = GcConfig.value().get_or("diskreservesize", uint64_t(1u << 28));
if (sol::optional<sol::table> CacheGcConfig = GcConfig.value()["cache"])
{
diff --git a/zenserver/config.h b/zenserver/config.h
index a61a7f89f..49f039d8d 100644
--- a/zenserver/config.h
+++ b/zenserver/config.h
@@ -38,6 +38,14 @@ struct ZenUpstreamHordeConfig
std::string OAuthClientSecret;
std::string OpenIdProvider;
std::string AccessToken;
+
+ std::string StorageUrl;
+ std::string StorageOAuthUrl;
+ std::string StorageOAuthClientId;
+ std::string StorageOAuthClientSecret;
+ std::string StorageOpenIdProvider;
+ std::string StorageAccessToken;
+
std::string Cluster;
std::string Namespace;
};
@@ -91,6 +99,7 @@ struct ZenGcConfig
int32_t IntervalSeconds = 0;
bool CollectSmallObjects = true;
bool Enabled = true;
+ uint64_t DiskReserveSize = 1ul << 28;
};
struct ZenServerOptions
diff --git a/zenserver/projectstore.cpp b/zenserver/projectstore.cpp
index 58b806989..617f50660 100644
--- a/zenserver/projectstore.cpp
+++ b/zenserver/projectstore.cpp
@@ -8,6 +8,7 @@
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
+#include <zencore/scopeguard.h>
#include <zencore/stream.h>
#include <zencore/string.h>
#include <zencore/testing.h>
@@ -114,10 +115,10 @@ struct ProjectStore::OplogStorage : public RefCounted
CreateDirectories(m_OplogStoragePath);
}
- m_Oplog.Open(m_OplogStoragePath / "ops.zlog", IsCreate);
+ m_Oplog.Open(m_OplogStoragePath / "ops.zlog", IsCreate ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite);
m_Oplog.Initialize();
- m_OpBlobs.Open(m_OplogStoragePath / "ops.zops", IsCreate);
+ m_OpBlobs.Open(m_OplogStoragePath / "ops.zops", IsCreate ? BasicFile::Mode::kTruncate : BasicFile::Mode::kWrite);
ZEN_ASSERT(IsPow2(m_OpsAlign));
ZEN_ASSERT(!(m_NextOpsOffset & (m_OpsAlign - 1)));
@@ -180,36 +181,39 @@ struct ProjectStore::OplogStorage : public RefCounted
uint64_t InvalidEntries = 0;
- m_Oplog.Replay([&](const zen::OplogEntry& LogEntry) {
- if (LogEntry.OpCoreSize == 0)
- {
- ++InvalidEntries;
+ m_Oplog.Replay(
+ [&](const zen::OplogEntry& LogEntry) {
+ if (LogEntry.OpCoreSize == 0)
+ {
+ ++InvalidEntries;
- return;
- }
+ return;
+ }
- IoBuffer OpBuffer(LogEntry.OpCoreSize);
+ IoBuffer OpBuffer(LogEntry.OpCoreSize);
- const uint64_t OpFileOffset = LogEntry.OpCoreOffset * m_OpsAlign;
+ const uint64_t OpFileOffset = LogEntry.OpCoreOffset * m_OpsAlign;
- m_OpBlobs.Read((void*)OpBuffer.Data(), LogEntry.OpCoreSize, OpFileOffset);
+ m_OpBlobs.Read((void*)OpBuffer.Data(), LogEntry.OpCoreSize, OpFileOffset);
- // Verify checksum, ignore op data if incorrect
- const auto OpCoreHash = uint32_t(XXH3_64bits(OpBuffer.Data(), OpBuffer.Size()) & 0xffffFFFF);
+ // Verify checksum, ignore op data if incorrect
+ const auto OpCoreHash = uint32_t(XXH3_64bits(OpBuffer.Data(), OpBuffer.Size()) & 0xffffFFFF);
- if (OpCoreHash != LogEntry.OpCoreHash)
- {
- ZEN_WARN("skipping oplog entry with bad checksum!");
- return;
- }
+ if (OpCoreHash != LogEntry.OpCoreHash)
+ {
+ ZEN_WARN("skipping oplog entry with bad checksum!");
+ return;
+ }
- CbObject Op(SharedBuffer::MakeView(OpBuffer.Data(), OpBuffer.Size()));
+ CbObject Op(SharedBuffer::MakeView(OpBuffer.Data(), OpBuffer.Size()));
- m_NextOpsOffset = Max(m_NextOpsOffset.load(std::memory_order_relaxed), RoundUp(OpFileOffset + LogEntry.OpCoreSize, m_OpsAlign));
- m_MaxLsn = Max(m_MaxLsn.load(std::memory_order_relaxed), LogEntry.OpLsn);
+ m_NextOpsOffset =
+ Max(m_NextOpsOffset.load(std::memory_order_relaxed), RoundUp(OpFileOffset + LogEntry.OpCoreSize, m_OpsAlign));
+ m_MaxLsn = Max(m_MaxLsn.load(std::memory_order_relaxed), LogEntry.OpLsn);
- Handler(Op, LogEntry);
- });
+ Handler(Op, LogEntry);
+ },
+ 0);
if (InvalidEntries)
{
@@ -653,7 +657,7 @@ ProjectStore::Project::Read()
ZEN_INFO("reading config for project '{}' from {}", Identifier, ProjectStateFilePath);
BasicFile Blob;
- Blob.Open(ProjectStateFilePath, false);
+ Blob.Open(ProjectStateFilePath, BasicFile::Mode::kRead);
IoBuffer Obj = Blob.ReadAll();
CbValidateError ValidationError = ValidateCompactBinary(MemoryView(Obj.Data(), Obj.Size()), CbValidateMode::All);
@@ -693,7 +697,7 @@ ProjectStore::Project::Write()
ZEN_INFO("persisting config for project '{}' to {}", Identifier, ProjectStateFilePath);
BasicFile Blob;
- Blob.Open(ProjectStateFilePath, true);
+ Blob.Open(ProjectStateFilePath, BasicFile::Mode::kTruncate);
Blob.Write(Mem.Data(), Mem.Size(), 0);
Blob.Flush();
}
@@ -970,6 +974,10 @@ ProjectStore::Scrub(ScrubContext& Ctx)
void
ProjectStore::GatherReferences(GcContext& GcCtx)
{
+ Stopwatch Timer;
+ const auto Guard =
+ MakeGuard([this, &Timer] { ZEN_INFO("project store gathered all references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
+
DiscoverProjects();
RwLock::SharedLockScope _(m_ProjectsLock);
diff --git a/zenserver/testing/launch.cpp b/zenserver/testing/launch.cpp
index 0334429b0..1236e6adb 100644
--- a/zenserver/testing/launch.cpp
+++ b/zenserver/testing/launch.cpp
@@ -167,10 +167,10 @@ SandboxedJob::GrantNamedObjectAccess(PWSTR ObjectName, SE_OBJECT_TYPE ObjectType
.grfAccessMode = GRANT_ACCESS,
.grfInheritance = grfInhericance,
.Trustee = {.pMultipleTrustee = nullptr,
- .MultipleTrusteeOperation = NO_MULTIPLE_TRUSTEE,
- .TrusteeForm = TRUSTEE_IS_SID,
- .TrusteeType = TRUSTEE_IS_GROUP,
- .ptstrName = (PWSTR)m_AppContainerSid}};
+ .MultipleTrusteeOperation = NO_MULTIPLE_TRUSTEE,
+ .TrusteeForm = TRUSTEE_IS_SID,
+ .TrusteeType = TRUSTEE_IS_GROUP,
+ .ptstrName = (PWSTR)m_AppContainerSid}};
PACL OldAcl = nullptr;
diff --git a/zenserver/upstream/hordecompute.cpp b/zenserver/upstream/hordecompute.cpp
new file mode 100644
index 000000000..dbf86cc13
--- /dev/null
+++ b/zenserver/upstream/hordecompute.cpp
@@ -0,0 +1,1374 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "upstreamapply.h"
+
+#if ZEN_WITH_COMPUTE_SERVICES
+
+# include "jupiter.h"
+
+# include <zencore/compactbinary.h>
+# include <zencore/compactbinarybuilder.h>
+# include <zencore/compactbinarypackage.h>
+# include <zencore/compactbinaryvalidation.h>
+# include <zencore/fmtutils.h>
+# include <zencore/session.h>
+# include <zencore/stream.h>
+# include <zencore/thread.h>
+# include <zencore/timer.h>
+# include <zencore/workthreadpool.h>
+
+# include <zenstore/cas.h>
+# include <zenstore/cidstore.h>
+
+# include <auth/authmgr.h>
+# include <upstream/upstreamcache.h>
+
+# include "cache/structuredcachestore.h"
+# include "diag/logging.h"
+
+# include <fmt/format.h>
+
+# include <algorithm>
+# include <atomic>
+# include <set>
+# include <stack>
+
+namespace zen {
+
+using namespace std::literals;
+
+static const IoBuffer EmptyBuffer;
+static const IoHash EmptyBufferId = IoHash::HashBuffer(EmptyBuffer);
+
+namespace detail {
+
+ class HordeUpstreamApplyEndpoint final : public UpstreamApplyEndpoint
+ {
+ public:
+ HordeUpstreamApplyEndpoint(const CloudCacheClientOptions& ComputeOptions,
+ const UpstreamAuthConfig& ComputeAuthConfig,
+ const CloudCacheClientOptions& StorageOptions,
+ const UpstreamAuthConfig& StorageAuthConfig,
+ CasStore& CasStore,
+ CidStore& CidStore,
+ AuthMgr& Mgr)
+ : m_Log(logging::Get("upstream-apply"))
+ , m_CasStore(CasStore)
+ , m_CidStore(CidStore)
+ , m_AuthMgr(Mgr)
+ {
+ m_DisplayName = fmt::format("{} - '{}'+'{}'", ComputeOptions.Name, ComputeOptions.ServiceUrl, StorageOptions.ServiceUrl);
+ m_ChannelId = fmt::format("zen-{}", zen::GetSessionIdString());
+
+ {
+ std::unique_ptr<CloudCacheTokenProvider> TokenProvider;
+
+ if (ComputeAuthConfig.OAuthUrl.empty() == false)
+ {
+ TokenProvider =
+ CloudCacheTokenProvider::CreateFromOAuthClientCredentials({.Url = ComputeAuthConfig.OAuthUrl,
+ .ClientId = ComputeAuthConfig.OAuthClientId,
+ .ClientSecret = ComputeAuthConfig.OAuthClientSecret});
+ }
+ else if (ComputeAuthConfig.OpenIdProvider.empty() == false)
+ {
+ TokenProvider =
+ CloudCacheTokenProvider::CreateFromCallback([this, ProviderName = std::string(ComputeAuthConfig.OpenIdProvider)]() {
+ AuthMgr::OpenIdAccessToken Token = m_AuthMgr.GetOpenIdAccessToken(ProviderName);
+ return CloudCacheAccessToken{.Value = Token.AccessToken, .ExpireTime = Token.ExpireTime};
+ });
+ }
+ else
+ {
+ CloudCacheAccessToken AccessToken{.Value = std::string(ComputeAuthConfig.AccessToken),
+ .ExpireTime = CloudCacheAccessToken::TimePoint::max()};
+ TokenProvider = CloudCacheTokenProvider::CreateFromStaticToken(AccessToken);
+ }
+
+ m_Client = new CloudCacheClient(ComputeOptions, std::move(TokenProvider));
+ }
+
+ {
+ std::unique_ptr<CloudCacheTokenProvider> TokenProvider;
+
+ if (StorageAuthConfig.OAuthUrl.empty() == false)
+ {
+ TokenProvider =
+ CloudCacheTokenProvider::CreateFromOAuthClientCredentials({.Url = StorageAuthConfig.OAuthUrl,
+ .ClientId = StorageAuthConfig.OAuthClientId,
+ .ClientSecret = StorageAuthConfig.OAuthClientSecret});
+ }
+ else if (StorageAuthConfig.OpenIdProvider.empty() == false)
+ {
+ TokenProvider =
+ CloudCacheTokenProvider::CreateFromCallback([this, ProviderName = std::string(StorageAuthConfig.OpenIdProvider)]() {
+ AuthMgr::OpenIdAccessToken Token = m_AuthMgr.GetOpenIdAccessToken(ProviderName);
+ return CloudCacheAccessToken{.Value = Token.AccessToken, .ExpireTime = Token.ExpireTime};
+ });
+ }
+ else
+ {
+ CloudCacheAccessToken AccessToken{.Value = std::string(StorageAuthConfig.AccessToken),
+ .ExpireTime = CloudCacheAccessToken::TimePoint::max()};
+ TokenProvider = CloudCacheTokenProvider::CreateFromStaticToken(AccessToken);
+ }
+
+ m_StorageClient = new CloudCacheClient(StorageOptions, std::move(TokenProvider));
+ }
+ }
+
+ virtual ~HordeUpstreamApplyEndpoint() = default;
+
+ virtual UpstreamEndpointHealth Initialize() override { return CheckHealth(); }
+
+ virtual bool IsHealthy() const override { return m_HealthOk.load(); }
+
+ virtual UpstreamEndpointHealth CheckHealth() override
+ {
+ try
+ {
+ CloudCacheSession Session(m_Client);
+ CloudCacheResult Result = Session.Authenticate();
+
+ m_HealthOk = Result.ErrorCode == 0;
+
+ return {.Reason = std::move(Result.Reason), .Ok = Result.Success};
+ }
+ catch (std::exception& Err)
+ {
+ return {.Reason = Err.what(), .Ok = false};
+ }
+ }
+
+ virtual std::string_view DisplayName() const override { return m_DisplayName; }
+
+ virtual PostUpstreamApplyResult PostApply(UpstreamApplyRecord ApplyRecord) override
+ {
+ PostUpstreamApplyResult ApplyResult{};
+ ApplyResult.Timepoints.merge(ApplyRecord.Timepoints);
+
+ try
+ {
+ UpstreamData UpstreamData;
+ if (!ProcessApplyKey(ApplyRecord, UpstreamData))
+ {
+ return {.Error{.ErrorCode = -1, .Reason = "Failed to generate task data"}};
+ }
+
+ {
+ ApplyResult.Timepoints["zen-storage-build-ref"] = DateTime::NowTicks();
+ std::scoped_lock Lock(m_TaskMutex);
+ if (m_PendingTasks.contains(UpstreamData.TaskId))
+ {
+ // Pending task is already queued, return success
+ ApplyResult.Success = true;
+ return ApplyResult;
+ }
+ m_PendingTasks[UpstreamData.TaskId] = std::move(ApplyRecord);
+ }
+
+ CloudCacheSession ComputeSession(m_Client);
+ CloudCacheSession StorageSession(m_StorageClient);
+
+ {
+ CloudCacheResult Result = BatchPutBlobsIfMissing(StorageSession, UpstreamData.Blobs);
+ ApplyResult.Bytes += Result.Bytes;
+ ApplyResult.ElapsedSeconds += Result.ElapsedSeconds;
+ ApplyResult.Timepoints["zen-storage-upload-blobs"] = DateTime::NowTicks();
+ if (!Result.Success)
+ {
+ ApplyResult.Error = {.ErrorCode = Result.ErrorCode,
+ .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to upload blobs"};
+ return ApplyResult;
+ }
+ UpstreamData.Blobs.clear();
+ }
+
+ {
+ CloudCacheResult Result = BatchPutObjectsIfMissing(StorageSession, UpstreamData.Objects);
+ ApplyResult.Bytes += Result.Bytes;
+ ApplyResult.ElapsedSeconds += Result.ElapsedSeconds;
+ ApplyResult.Timepoints["zen-storage-upload-objects"] = DateTime::NowTicks();
+ if (!Result.Success)
+ {
+ ApplyResult.Error = {.ErrorCode = Result.ErrorCode,
+ .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to upload objects"};
+ return ApplyResult;
+ }
+ }
+
+ {
+ PutRefResult RefResult = StorageSession.PutRef("requests"sv,
+ UpstreamData.TaskId,
+ UpstreamData.Objects[UpstreamData.TaskId].GetBuffer().AsIoBuffer(),
+ ZenContentType::kCbObject);
+ Log().debug("Put ref {} Need={} Bytes={} Duration={}s Result={}",
+ UpstreamData.TaskId,
+ RefResult.Needs.size(),
+ RefResult.Bytes,
+ RefResult.ElapsedSeconds,
+ RefResult.Success);
+ ApplyResult.Bytes += RefResult.Bytes;
+ ApplyResult.ElapsedSeconds += RefResult.ElapsedSeconds;
+ ApplyResult.Timepoints["zen-storage-put-ref"] = DateTime::NowTicks();
+
+ if (RefResult.Needs.size() > 0)
+ {
+ Log().error("Failed to add task ref {} due to {} missing blobs", UpstreamData.TaskId, RefResult.Needs.size());
+ for (const auto& Hash : RefResult.Needs)
+ {
+ Log().debug("Task ref {} missing blob {}", UpstreamData.TaskId, Hash);
+ }
+
+ ApplyResult.Error = {.ErrorCode = RefResult.ErrorCode,
+ .Reason = !RefResult.Reason.empty() ? std::move(RefResult.Reason)
+ : "Failed to add task ref due to missing blob"};
+ return ApplyResult;
+ }
+
+ if (!RefResult.Success)
+ {
+ ApplyResult.Error = {.ErrorCode = RefResult.ErrorCode,
+ .Reason = !RefResult.Reason.empty() ? std::move(RefResult.Reason) : "Failed to add task ref"};
+ return ApplyResult;
+ }
+ UpstreamData.Objects.clear();
+ }
+
+ {
+ CbObjectWriter Writer;
+ Writer.AddString("c"sv, m_ChannelId);
+ Writer.AddObjectAttachment("r"sv, UpstreamData.RequirementsId);
+ Writer.BeginArray("t"sv);
+ Writer.AddObjectAttachment(UpstreamData.TaskId);
+ Writer.EndArray();
+ CbObject TasksObject = Writer.Save();
+ IoBuffer TasksData = TasksObject.GetBuffer().AsIoBuffer();
+
+ CloudCacheResult Result = ComputeSession.PostComputeTasks(TasksData);
+ Log().debug("Post compute task {} Bytes={} Duration={}s Result={}",
+ TasksObject.GetHash(),
+ Result.Bytes,
+ Result.ElapsedSeconds,
+ Result.Success);
+ ApplyResult.Bytes += Result.Bytes;
+ ApplyResult.ElapsedSeconds += Result.ElapsedSeconds;
+ ApplyResult.Timepoints["zen-horde-post-task"] = DateTime::NowTicks();
+ if (!Result.Success)
+ {
+ {
+ std::scoped_lock Lock(m_TaskMutex);
+ m_PendingTasks.erase(UpstreamData.TaskId);
+ }
+
+ ApplyResult.Error = {.ErrorCode = Result.ErrorCode,
+ .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to post compute task"};
+ return ApplyResult;
+ }
+ }
+
+ Log().info("Task posted {}", UpstreamData.TaskId);
+ ApplyResult.Success = true;
+ return ApplyResult;
+ }
+ catch (std::exception& Err)
+ {
+ m_HealthOk = false;
+ return {.Error{.ErrorCode = -1, .Reason = Err.what()}};
+ }
+ }
+
+ [[nodiscard]] CloudCacheResult BatchPutBlobsIfMissing(CloudCacheSession& Session, const std::map<IoHash, IoBuffer>& Blobs)
+ {
+ if (Blobs.size() == 0)
+ {
+ return {.Success = true};
+ }
+
+ int64_t Bytes{};
+ double ElapsedSeconds{};
+
+ // Batch check for missing blobs
+ std::set<IoHash> Keys;
+ std::transform(Blobs.begin(), Blobs.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; });
+
+ CloudCacheExistsResult ExistsResult = Session.BlobExists(Keys);
+ Log().debug("Queried {} missing blobs Need={} Duration={}s Result={}",
+ Keys.size(),
+ ExistsResult.Needs.size(),
+ ExistsResult.ElapsedSeconds,
+ ExistsResult.Success);
+ ElapsedSeconds += ExistsResult.ElapsedSeconds;
+ if (!ExistsResult.Success)
+ {
+ return {.Bytes = Bytes,
+ .ElapsedSeconds = ElapsedSeconds,
+ .ErrorCode = ExistsResult.ErrorCode ? ExistsResult.ErrorCode : -1,
+ .Reason = !ExistsResult.Reason.empty() ? std::move(ExistsResult.Reason) : "Failed to check if blobs exist"};
+ }
+
+ for (const auto& Hash : ExistsResult.Needs)
+ {
+ CloudCacheResult Result = Session.PutBlob(Hash, Blobs.at(Hash));
+ Log().debug("Put blob {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success);
+ Bytes += Result.Bytes;
+ ElapsedSeconds += Result.ElapsedSeconds;
+ if (!Result.Success)
+ {
+ return {.Bytes = Bytes,
+ .ElapsedSeconds = ElapsedSeconds,
+ .ErrorCode = Result.ErrorCode ? Result.ErrorCode : -1,
+ .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to put blobs"};
+ }
+ }
+
+ return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
+ }
+
+ [[nodiscard]] CloudCacheResult BatchPutObjectsIfMissing(CloudCacheSession& Session, const std::map<IoHash, CbObject>& Objects)
+ {
+ if (Objects.size() == 0)
+ {
+ return {.Success = true};
+ }
+
+ int64_t Bytes{};
+ double ElapsedSeconds{};
+
+ // Batch check for missing objects
+ std::set<IoHash> Keys;
+ std::transform(Objects.begin(), Objects.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; });
+
+ CloudCacheExistsResult ExistsResult = Session.ObjectExists(Keys);
+ Log().debug("Queried {} missing objects Need={} Duration={}s Result={}",
+ Keys.size(),
+ ExistsResult.Needs.size(),
+ ExistsResult.ElapsedSeconds,
+ ExistsResult.Success);
+ ElapsedSeconds += ExistsResult.ElapsedSeconds;
+ if (!ExistsResult.Success)
+ {
+ return {.Bytes = Bytes,
+ .ElapsedSeconds = ElapsedSeconds,
+ .ErrorCode = ExistsResult.ErrorCode ? ExistsResult.ErrorCode : -1,
+ .Reason = !ExistsResult.Reason.empty() ? std::move(ExistsResult.Reason) : "Failed to check if objects exist"};
+ }
+
+ for (const auto& Hash : ExistsResult.Needs)
+ {
+ CloudCacheResult Result = Session.PutObject(Hash, Objects.at(Hash).GetBuffer().AsIoBuffer());
+ Log().debug("Put object {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success);
+ Bytes += Result.Bytes;
+ ElapsedSeconds += Result.ElapsedSeconds;
+ if (!Result.Success)
+ {
+ return {.Bytes = Bytes,
+ .ElapsedSeconds = ElapsedSeconds,
+ .ErrorCode = Result.ErrorCode ? Result.ErrorCode : -1,
+ .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to put objects"};
+ }
+ }
+
+ return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
+ }
+
+ enum class ComputeTaskState : int32_t
+ {
+ Queued = 0,
+ Executing = 1,
+ Complete = 2,
+ };
+
+ enum class ComputeTaskOutcome : int32_t
+ {
+ Success = 0,
+ Failed = 1,
+ Cancelled = 2,
+ NoResult = 3,
+ Exipred = 4,
+ BlobNotFound = 5,
+ Exception = 6,
+ };
+
+ [[nodiscard]] static std::string_view ComputeTaskStateToString(const ComputeTaskState Outcome)
+ {
+ switch (Outcome)
+ {
+ case ComputeTaskState::Queued:
+ return "Queued"sv;
+ case ComputeTaskState::Executing:
+ return "Executing"sv;
+ case ComputeTaskState::Complete:
+ return "Complete"sv;
+ };
+ return "Unknown"sv;
+ }
+
+ [[nodiscard]] static std::string_view ComputeTaskOutcomeToString(const ComputeTaskOutcome Outcome)
+ {
+ switch (Outcome)
+ {
+ case ComputeTaskOutcome::Success:
+ return "Success"sv;
+ case ComputeTaskOutcome::Failed:
+ return "Failed"sv;
+ case ComputeTaskOutcome::Cancelled:
+ return "Cancelled"sv;
+ case ComputeTaskOutcome::NoResult:
+ return "NoResult"sv;
+ case ComputeTaskOutcome::Exipred:
+ return "Exipred"sv;
+ case ComputeTaskOutcome::BlobNotFound:
+ return "BlobNotFound"sv;
+ case ComputeTaskOutcome::Exception:
+ return "Exception"sv;
+ };
+ return "Unknown"sv;
+ }
+
+ virtual GetUpstreamApplyUpdatesResult GetUpdates(WorkerThreadPool& ThreadPool) override
+ {
+ int64_t Bytes{};
+ double ElapsedSeconds{};
+
+ {
+ std::scoped_lock Lock(m_TaskMutex);
+ if (m_PendingTasks.empty())
+ {
+ if (m_CompletedTasks.empty())
+ {
+ // Nothing to do.
+ return {.Success = true};
+ }
+
+ UpstreamApplyCompleted CompletedTasks;
+ std::swap(CompletedTasks, m_CompletedTasks);
+ return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Completed = std::move(CompletedTasks), .Success = true};
+ }
+ }
+
+ try
+ {
+ CloudCacheSession ComputeSession(m_Client);
+
+ CloudCacheResult UpdatesResult = ComputeSession.GetComputeUpdates(m_ChannelId);
+ Log().debug("Get compute updates Bytes={} Duration={}s Result={}",
+ UpdatesResult.Bytes,
+ UpdatesResult.ElapsedSeconds,
+ UpdatesResult.Success);
+ Bytes += UpdatesResult.Bytes;
+ ElapsedSeconds += UpdatesResult.ElapsedSeconds;
+ if (!UpdatesResult.Success)
+ {
+ return {.Error{.ErrorCode = UpdatesResult.ErrorCode, .Reason = std::move(UpdatesResult.Reason)},
+ .Bytes = Bytes,
+ .ElapsedSeconds = ElapsedSeconds};
+ }
+
+ if (!UpdatesResult.Success)
+ {
+ return {.Error{.ErrorCode = -1, .Reason = "Failed get task updates"}, .Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds};
+ }
+
+ CbObject TaskStatus = LoadCompactBinaryObject(std::move(UpdatesResult.Response));
+
+ for (auto& It : TaskStatus["u"sv])
+ {
+ CbObjectView Status = It.AsObjectView();
+ IoHash TaskId = Status["h"sv].AsHash();
+ const ComputeTaskState State = (ComputeTaskState)Status["s"sv].AsInt32();
+ const ComputeTaskOutcome Outcome = (ComputeTaskOutcome)Status["o"sv].AsInt32();
+
+ Log().info("Task {} State={}", TaskId, ComputeTaskStateToString(State));
+
+ // Only completed tasks need to be processed
+ if (State != ComputeTaskState::Complete)
+ {
+ continue;
+ }
+
+ IoHash WorkerId{};
+ IoHash ActionId{};
+ UpstreamApplyType ApplyType{};
+
+ {
+ std::scoped_lock Lock(m_TaskMutex);
+ auto TaskIt = m_PendingTasks.find(TaskId);
+ if (TaskIt != m_PendingTasks.end())
+ {
+ WorkerId = TaskIt->second.WorkerDescriptor.GetHash();
+ ActionId = TaskIt->second.Action.GetHash();
+ ApplyType = TaskIt->second.Type;
+ m_PendingTasks.erase(TaskIt);
+ }
+ }
+
+ if (WorkerId == IoHash::Zero)
+ {
+ Log().warn("Task {} missing from pending tasks", TaskId);
+ continue;
+ }
+
+ std::map<std::string, uint64_t> Timepoints;
+ ProcessQueueTimings(Status["qs"sv].AsObjectView(), Timepoints);
+ ProcessExecuteTimings(Status["es"sv].AsObjectView(), Timepoints);
+
+ if (Outcome != ComputeTaskOutcome::Success)
+ {
+ const std::string_view Detail = Status["d"sv].AsString();
+ {
+ std::scoped_lock Lock(m_TaskMutex);
+ m_CompletedTasks[WorkerId][ActionId] = {
+ .Error{.ErrorCode = -1, .Reason = fmt::format("Task {} {}", ComputeTaskOutcomeToString(Outcome), Detail)},
+ .Timepoints = std::move(Timepoints)};
+ }
+ continue;
+ }
+
+ Timepoints["zen-complete-queue-added"] = DateTime::NowTicks();
+ ThreadPool.ScheduleWork([this,
+ ApplyType,
+ ResultHash = Status["r"sv].AsHash(),
+ Timepoints = std::move(Timepoints),
+ TaskId = std::move(TaskId),
+ WorkerId = std::move(WorkerId),
+ ActionId = std::move(ActionId)]() mutable {
+ Timepoints["zen-complete-queue-dispatched"] = DateTime::NowTicks();
+ GetUpstreamApplyResult Result = ProcessTaskStatus(ApplyType, ResultHash);
+ Timepoints["zen-complete-queue-complete"] = DateTime::NowTicks();
+ Result.Timepoints.merge(Timepoints);
+
+ Log().debug("Task Processed {} Files={} Attachments={} ExitCode={}",
+ TaskId,
+ Result.OutputFiles.size(),
+ Result.OutputPackage.GetAttachments().size(),
+ Result.Error.ErrorCode);
+ {
+ std::scoped_lock Lock(m_TaskMutex);
+ m_CompletedTasks[WorkerId][ActionId] = std::move(Result);
+ }
+ });
+ }
+
+ {
+ std::scoped_lock Lock(m_TaskMutex);
+ if (m_CompletedTasks.empty())
+ {
+ // Nothing to do.
+ return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
+ }
+ UpstreamApplyCompleted CompletedTasks;
+ std::swap(CompletedTasks, m_CompletedTasks);
+ return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Completed = std::move(CompletedTasks), .Success = true};
+ }
+ }
+ catch (std::exception& Err)
+ {
+ m_HealthOk = false;
+ return {
+ .Error{.ErrorCode = -1, .Reason = Err.what()},
+ .Bytes = Bytes,
+ .ElapsedSeconds = ElapsedSeconds,
+ };
+ }
+ }
+
+ virtual UpstreamApplyEndpointStats& Stats() override { return m_Stats; }
+
+ private:
+ spdlog::logger& Log() { return m_Log; }
+
+ spdlog::logger& m_Log;
+ CasStore& m_CasStore;
+ CidStore& m_CidStore;
+ AuthMgr& m_AuthMgr;
+ std::string m_DisplayName;
+ RefPtr<CloudCacheClient> m_Client;
+ RefPtr<CloudCacheClient> m_StorageClient;
+ UpstreamApplyEndpointStats m_Stats;
+ std::atomic_bool m_HealthOk{false};
+ std::string m_ChannelId;
+
+ std::mutex m_TaskMutex;
+ std::unordered_map<IoHash, UpstreamApplyRecord> m_PendingTasks;
+ UpstreamApplyCompleted m_CompletedTasks;
+
+ struct UpstreamData
+ {
+ std::map<IoHash, IoBuffer> Blobs;
+ std::map<IoHash, CbObject> Objects;
+ IoHash TaskId;
+ IoHash RequirementsId;
+ };
+
+ struct UpstreamDirectory
+ {
+ std::filesystem::path Path;
+ std::map<std::string, UpstreamDirectory> Directories;
+ std::set<std::string> Files;
+ };
+
+ static void ProcessQueueTimings(CbObjectView QueueStats, std::map<std::string, uint64_t>& Timepoints)
+ {
+ uint64_t Ticks = QueueStats["t"sv].AsDateTimeTicks();
+ if (Ticks == 0)
+ {
+ return;
+ }
+
+ // Scope is an array of miliseconds after start time
+ // TODO: cleanup
+ Timepoints["horde-queue-added"] = Ticks;
+ int Index = 0;
+ for (auto& Item : QueueStats["s"sv].AsArrayView())
+ {
+ Ticks += Item.AsInt32() * TimeSpan::TicksPerMillisecond;
+ switch (Index)
+ {
+ case 0:
+ Timepoints["horde-queue-dispatched"] = Ticks;
+ break;
+ case 1:
+ Timepoints["horde-queue-complete"] = Ticks;
+ break;
+ }
+ Index++;
+ }
+ }
+
+ static void ProcessExecuteTimings(CbObjectView ExecutionStats, std::map<std::string, uint64_t>& Timepoints)
+ {
+ uint64_t Ticks = ExecutionStats["t"sv].AsDateTimeTicks();
+ if (Ticks == 0)
+ {
+ return;
+ }
+
+ // Scope is an array of miliseconds after start time
+ // TODO: cleanup
+ Timepoints["horde-execution-start"] = Ticks;
+ int Index = 0;
+ for (auto& Item : ExecutionStats["s"sv].AsArrayView())
+ {
+ Ticks += Item.AsInt32() * TimeSpan::TicksPerMillisecond;
+ switch (Index)
+ {
+ case 0:
+ Timepoints["horde-execution-download-ref"] = Ticks;
+ break;
+ case 1:
+ Timepoints["horde-execution-download-input"] = Ticks;
+ break;
+ case 2:
+ Timepoints["horde-execution-execute"] = Ticks;
+ break;
+ case 3:
+ Timepoints["horde-execution-upload-log"] = Ticks;
+ break;
+ case 4:
+ Timepoints["horde-execution-upload-output"] = Ticks;
+ break;
+ case 5:
+ Timepoints["horde-execution-upload-ref"] = Ticks;
+ break;
+ }
+ Index++;
+ }
+ }
+
+ [[nodiscard]] GetUpstreamApplyResult ProcessTaskStatus(const UpstreamApplyType ApplyType, const IoHash& ResultHash)
+ {
+ try
+ {
+ CloudCacheSession Session(m_StorageClient);
+
+ GetUpstreamApplyResult ApplyResult{};
+
+ IoHash StdOutHash;
+ IoHash StdErrHash;
+ IoHash OutputHash;
+
+ std::map<IoHash, IoBuffer> BinaryData;
+
+ {
+ CloudCacheResult ObjectRefResult = Session.GetRef("responses"sv, ResultHash, ZenContentType::kCbObject);
+ Log().debug("Get ref {} Bytes={} Duration={}s Result={}",
+ ResultHash,
+ ObjectRefResult.Bytes,
+ ObjectRefResult.ElapsedSeconds,
+ ObjectRefResult.Success);
+ ApplyResult.Bytes += ObjectRefResult.Bytes;
+ ApplyResult.ElapsedSeconds += ObjectRefResult.ElapsedSeconds;
+ ApplyResult.Timepoints["zen-storage-get-ref"] = DateTime::NowTicks();
+
+ if (!ObjectRefResult.Success)
+ {
+ ApplyResult.Error.Reason = "Failed to get result object data";
+ return ApplyResult;
+ }
+
+ CbObject ResultObject = LoadCompactBinaryObject(ObjectRefResult.Response);
+ ApplyResult.Error.ErrorCode = ResultObject["e"sv].AsInt32();
+ StdOutHash = ResultObject["so"sv].AsBinaryAttachment();
+ StdErrHash = ResultObject["se"sv].AsBinaryAttachment();
+ OutputHash = ResultObject["o"sv].AsObjectAttachment();
+ }
+
+ {
+ std::set<IoHash> NeededData;
+ if (OutputHash != IoHash::Zero)
+ {
+ GetObjectReferencesResult ObjectReferenceResult = Session.GetObjectReferences(OutputHash);
+ Log().debug("Get object references {} References={} Bytes={} Duration={}s Result={}",
+ ResultHash,
+ ObjectReferenceResult.References.size(),
+ ObjectReferenceResult.Bytes,
+ ObjectReferenceResult.ElapsedSeconds,
+ ObjectReferenceResult.Success);
+ ApplyResult.Bytes += ObjectReferenceResult.Bytes;
+ ApplyResult.ElapsedSeconds += ObjectReferenceResult.ElapsedSeconds;
+ ApplyResult.Timepoints["zen-storage-get-object-references"] = DateTime::NowTicks();
+
+ if (!ObjectReferenceResult.Success)
+ {
+ ApplyResult.Error.Reason = "Failed to get result object references";
+ return ApplyResult;
+ }
+
+ NeededData = std::move(ObjectReferenceResult.References);
+ }
+
+ NeededData.insert(OutputHash);
+ NeededData.insert(StdOutHash);
+ NeededData.insert(StdErrHash);
+
+ for (const auto& Hash : NeededData)
+ {
+ if (Hash == IoHash::Zero)
+ {
+ continue;
+ }
+ CloudCacheResult BlobResult = Session.GetBlob(Hash);
+ Log().debug("Get blob {} Bytes={} Duration={}s Result={}",
+ Hash,
+ BlobResult.Bytes,
+ BlobResult.ElapsedSeconds,
+ BlobResult.Success);
+ ApplyResult.Bytes += BlobResult.Bytes;
+ ApplyResult.ElapsedSeconds += BlobResult.ElapsedSeconds;
+ if (!BlobResult.Success)
+ {
+ ApplyResult.Error.Reason = "Failed to get blob";
+ return ApplyResult;
+ }
+ BinaryData[Hash] = std::move(BlobResult.Response);
+ }
+ ApplyResult.Timepoints["zen-storage-get-blobs"] = DateTime::NowTicks();
+ }
+
+ ApplyResult.StdOut = StdOutHash != IoHash::Zero
+ ? std::string((const char*)BinaryData[StdOutHash].GetData(), BinaryData[StdOutHash].GetSize())
+ : "";
+ ApplyResult.StdErr = StdErrHash != IoHash::Zero
+ ? std::string((const char*)BinaryData[StdErrHash].GetData(), BinaryData[StdErrHash].GetSize())
+ : "";
+
+ if (OutputHash == IoHash::Zero)
+ {
+ ApplyResult.Error.Reason = "Task completed with no output object";
+ return ApplyResult;
+ }
+
+ CbObject OutputObject = LoadCompactBinaryObject(BinaryData[OutputHash]);
+
+ switch (ApplyType)
+ {
+ case UpstreamApplyType::Simple:
+ {
+ ResolveMerkleTreeDirectory(""sv, OutputHash, BinaryData, ApplyResult.OutputFiles);
+ for (const auto& Pair : BinaryData)
+ {
+ ApplyResult.FileData[Pair.first] = std::move(BinaryData.at(Pair.first));
+ }
+
+ ApplyResult.Success = ApplyResult.Error.ErrorCode == 0;
+ return ApplyResult;
+ }
+ break;
+ case UpstreamApplyType::Asset:
+ {
+ if (ApplyResult.Error.ErrorCode != 0)
+ {
+ ApplyResult.Error.Reason = "Task completed with errors";
+ return ApplyResult;
+ }
+
+ // Get build.output
+ IoHash BuildOutputId;
+ IoBuffer BuildOutput;
+ for (auto& It : OutputObject["f"sv])
+ {
+ const CbObjectView FileObject = It.AsObjectView();
+ if (FileObject["n"sv].AsString() == "Build.output"sv)
+ {
+ BuildOutputId = FileObject["h"sv].AsBinaryAttachment();
+ BuildOutput = BinaryData[BuildOutputId];
+ break;
+ }
+ }
+
+ if (BuildOutput.GetSize() == 0)
+ {
+ ApplyResult.Error.Reason = "Build.output file not found in task results";
+ return ApplyResult;
+ }
+
+ // Get Output directory node
+ IoBuffer OutputDirectoryTree;
+ for (auto& It : OutputObject["d"sv])
+ {
+ const CbObjectView DirectoryObject = It.AsObjectView();
+ if (DirectoryObject["n"sv].AsString() == "Outputs"sv)
+ {
+ OutputDirectoryTree = BinaryData[DirectoryObject["h"sv].AsObjectAttachment()];
+ break;
+ }
+ }
+
+ if (OutputDirectoryTree.GetSize() == 0)
+ {
+ ApplyResult.Error.Reason = "Outputs directory not found in task results";
+ return ApplyResult;
+ }
+
+ // load build.output as CbObject
+
+ // Move Outputs from Horde to CbPackage
+
+ std::unordered_map<IoHash, IoHash> CidToCompressedId;
+ CbPackage OutputPackage;
+ CbObject OutputDirectoryTreeObject = LoadCompactBinaryObject(OutputDirectoryTree);
+
+ for (auto& It : OutputDirectoryTreeObject["f"sv])
+ {
+ CbObjectView FileObject = It.AsObjectView();
+ // Name is the uncompressed hash
+ IoHash DecompressedId = IoHash::FromHexString(FileObject["n"sv].AsString());
+ // Hash is the compressed data hash, and how it is stored in Horde
+ IoHash CompressedId = FileObject["h"sv].AsBinaryAttachment();
+
+ if (!BinaryData.contains(CompressedId))
+ {
+ Log().warn("Object attachment chunk not retrieved from Horde {}", CompressedId);
+ ApplyResult.Error.Reason = "Object attachment chunk not retrieved from Horde";
+ return ApplyResult;
+ }
+ CidToCompressedId[DecompressedId] = CompressedId;
+ }
+
+ // Iterate attachments, verify all chunks exist, and add to CbPackage
+ bool AnyErrors = false;
+ CbObject BuildOutputObject = LoadCompactBinaryObject(BuildOutput);
+ BuildOutputObject.IterateAttachments([&](CbFieldView Field) {
+ const IoHash DecompressedId = Field.AsHash();
+ if (!CidToCompressedId.contains(DecompressedId))
+ {
+ Log().warn("Attachment not found {}", DecompressedId);
+ AnyErrors = true;
+ return;
+ }
+ const IoHash& CompressedId = CidToCompressedId.at(DecompressedId);
+
+ if (!BinaryData.contains(CompressedId))
+ {
+ Log().warn("Missing output {} compressed {} uncompressed", CompressedId, DecompressedId);
+ AnyErrors = true;
+ return;
+ }
+
+ CompressedBuffer AttachmentBuffer =
+ CompressedBuffer::FromCompressed(SharedBuffer(BinaryData[CompressedId]));
+
+ if (!AttachmentBuffer)
+ {
+ Log().warn(
+ "Invalid output encountered (not valid CompressedBuffer format) {} compressed {} uncompressed",
+ CompressedId,
+ DecompressedId);
+ AnyErrors = true;
+ return;
+ }
+
+ ApplyResult.TotalAttachmentBytes += AttachmentBuffer.GetCompressedSize();
+ ApplyResult.TotalRawAttachmentBytes += AttachmentBuffer.GetRawSize();
+
+ CbAttachment Attachment(AttachmentBuffer);
+ OutputPackage.AddAttachment(Attachment);
+ });
+
+ if (AnyErrors)
+ {
+ ApplyResult.Error.Reason = "Failed to get result object attachment data";
+ return ApplyResult;
+ }
+
+ OutputPackage.SetObject(BuildOutputObject);
+ ApplyResult.OutputPackage = std::move(OutputPackage);
+
+ ApplyResult.Success = ApplyResult.Error.ErrorCode == 0;
+ return ApplyResult;
+ }
+ break;
+ }
+
+ ApplyResult.Error.Reason = "Unknown apply type";
+ return ApplyResult;
+ }
+ catch (std::exception& Err)
+ {
+ return {.Error{.ErrorCode = -1, .Reason = Err.what()}};
+ }
+ }
+
+ [[nodiscard]] bool ProcessApplyKey(const UpstreamApplyRecord& ApplyRecord, UpstreamData& Data)
+ {
+ std::string ExecutablePath;
+ std::string WorkingDirectory;
+ std::vector<std::string> Arguments;
+ std::map<std::string, std::string> Environment;
+ std::set<std::filesystem::path> InputFiles;
+ std::set<std::string> Outputs;
+ std::map<std::filesystem::path, IoHash> InputFileHashes;
+
+ ExecutablePath = ApplyRecord.WorkerDescriptor["path"sv].AsString();
+ if (ExecutablePath.empty())
+ {
+ Log().warn("process apply upstream FAILED, '{}', path missing from worker descriptor",
+ ApplyRecord.WorkerDescriptor.GetHash());
+ return false;
+ }
+
+ WorkingDirectory = ApplyRecord.WorkerDescriptor["workdir"sv].AsString();
+
+ for (auto& It : ApplyRecord.WorkerDescriptor["executables"sv])
+ {
+ CbObjectView FileEntry = It.AsObjectView();
+ if (!ProcessFileEntry(FileEntry, InputFiles, InputFileHashes, Data.Blobs))
+ {
+ return false;
+ }
+ }
+
+ for (auto& It : ApplyRecord.WorkerDescriptor["files"sv])
+ {
+ CbObjectView FileEntry = It.AsObjectView();
+ if (!ProcessFileEntry(FileEntry, InputFiles, InputFileHashes, Data.Blobs))
+ {
+ return false;
+ }
+ }
+
+ for (auto& It : ApplyRecord.WorkerDescriptor["dirs"sv])
+ {
+ std::string_view Directory = It.AsString();
+ std::string DummyFile = fmt::format("{}/.zen_empty_file", Directory);
+ InputFiles.insert(DummyFile);
+ Data.Blobs[EmptyBufferId] = EmptyBuffer;
+ InputFileHashes[DummyFile] = EmptyBufferId;
+ }
+
+ if (!WorkingDirectory.empty())
+ {
+ std::string DummyFile = fmt::format("{}/.zen_empty_file", WorkingDirectory);
+ InputFiles.insert(DummyFile);
+ Data.Blobs[EmptyBufferId] = EmptyBuffer;
+ InputFileHashes[DummyFile] = EmptyBufferId;
+ }
+
+ for (auto& It : ApplyRecord.WorkerDescriptor["environment"sv])
+ {
+ std::string_view Env = It.AsString();
+ auto Index = Env.find('=');
+ if (Index == std::string_view::npos)
+ {
+ Log().warn("process apply upstream FAILED, environment '{}' malformed", Env);
+ return false;
+ }
+
+ Environment[std::string(Env.substr(0, Index))] = Env.substr(Index + 1);
+ }
+
+ switch (ApplyRecord.Type)
+ {
+ case UpstreamApplyType::Simple:
+ {
+ for (auto& It : ApplyRecord.WorkerDescriptor["arguments"sv])
+ {
+ Arguments.push_back(std::string(It.AsString()));
+ }
+
+ for (auto& It : ApplyRecord.WorkerDescriptor["outputs"sv])
+ {
+ Outputs.insert(std::string(It.AsString()));
+ }
+ }
+ break;
+ case UpstreamApplyType::Asset:
+ {
+ static const std::filesystem::path BuildActionPath = "Build.action"sv;
+ static const std::filesystem::path InputPath = "Inputs"sv;
+ const IoHash ActionId = ApplyRecord.Action.GetHash();
+
+ Arguments.push_back("-Build=build.action");
+ Outputs.insert("Build.output");
+ Outputs.insert("Outputs");
+
+ InputFiles.insert(BuildActionPath);
+ InputFileHashes[BuildActionPath] = ActionId;
+ Data.Blobs[ActionId] = IoBufferBuilder::MakeCloneFromMemory(ApplyRecord.Action.GetBuffer().GetData(),
+ ApplyRecord.Action.GetBuffer().GetSize());
+
+ bool AnyErrors = false;
+ ApplyRecord.Action.IterateAttachments([&](CbFieldView Field) {
+ const IoHash Cid = Field.AsHash();
+ const std::filesystem::path FilePath = {InputPath / Cid.ToHexString()};
+ IoBuffer DataBuffer = m_CidStore.FindChunkByCid(Cid);
+
+ if (!DataBuffer)
+ {
+ Log().warn("process apply upstream FAILED, input CID chunk '{}' missing", Cid);
+ AnyErrors = true;
+ return;
+ }
+
+ if (InputFiles.contains(FilePath))
+ {
+ return;
+ }
+
+ const IoHash CompressedId = IoHash::HashBuffer(DataBuffer.GetData(), DataBuffer.GetSize());
+
+ InputFiles.insert(FilePath);
+ InputFileHashes[FilePath] = CompressedId;
+ Data.Blobs[CompressedId] = std::move(DataBuffer);
+ });
+
+ if (AnyErrors)
+ {
+ return false;
+ }
+ }
+ break;
+ }
+
+ const UpstreamDirectory RootDirectory = BuildDirectoryTree(InputFiles);
+
+ CbObject Sandbox = BuildMerkleTreeDirectory(RootDirectory, InputFileHashes, Data.Blobs, Data.Objects);
+ const IoHash SandboxHash = Sandbox.GetHash();
+ Data.Objects[SandboxHash] = std::move(Sandbox);
+
+ {
+ std::string_view HostPlatform = ApplyRecord.WorkerDescriptor["host"sv].AsString();
+ if (HostPlatform.empty())
+ {
+ Log().warn("process apply upstream FAILED, 'host' platform not provided");
+ return false;
+ }
+
+ int32_t LogicalCores = ApplyRecord.WorkerDescriptor["cores"sv].AsInt32();
+ int64_t Memory = ApplyRecord.WorkerDescriptor["memory"sv].AsInt64();
+ bool Exclusive = ApplyRecord.WorkerDescriptor["exclusive"sv].AsBool();
+
+ std::string Condition = fmt::format("Platform == '{}'", HostPlatform);
+ if (HostPlatform == "Win64")
+ {
+ // TODO
+ // Condition += " && Pool == 'Win-RemoteExec'";
+ }
+
+ std::map<std::string_view, int64_t> Resources;
+ if (LogicalCores > 0)
+ {
+ Resources["LogicalCores"sv] = LogicalCores;
+ }
+ if (Memory > 0)
+ {
+ Resources["RAM"sv] = std::max(Memory / 1024LL / 1024LL / 1024LL, 1LL);
+ }
+
+ CbObject Requirements = BuildRequirements(Condition, Resources, Exclusive);
+ const IoHash RequirementsId = Requirements.GetHash();
+ Data.Objects[RequirementsId] = std::move(Requirements);
+ Data.RequirementsId = RequirementsId;
+ }
+
+ CbObject Task = BuildTask(ExecutablePath, Arguments, Environment, WorkingDirectory, SandboxHash, Data.RequirementsId, Outputs);
+
+ const IoHash TaskId = Task.GetHash();
+ Data.Objects[TaskId] = std::move(Task);
+ Data.TaskId = TaskId;
+
+ return true;
+ }
+
+ [[nodiscard]] bool ProcessFileEntry(const CbObjectView& FileEntry,
+ std::set<std::filesystem::path>& InputFiles,
+ std::map<std::filesystem::path, IoHash>& InputFileHashes,
+ std::map<IoHash, IoBuffer>& Blobs)
+ {
+ const std::filesystem::path FilePath = FileEntry["name"sv].AsString();
+ const IoHash ChunkId = FileEntry["hash"sv].AsHash();
+ const uint64_t Size = FileEntry["size"sv].AsUInt64();
+ IoBuffer DataBuffer = m_CasStore.FindChunk(ChunkId);
+
+ if (!DataBuffer)
+ {
+ Log().warn("process apply upstream FAILED, worker CAS chunk '{}' missing", ChunkId);
+ return false;
+ }
+
+ if (DataBuffer.Size() != Size)
+ {
+ Log().warn("process apply upstream FAILED, worker CAS chunk '{}' size: {}, action spec expected {}",
+ ChunkId,
+ DataBuffer.Size(),
+ Size);
+ return false;
+ }
+
+ if (InputFiles.contains(FilePath))
+ {
+ Log().warn("process apply upstream FAILED, worker CAS chunk '{}' size: {} duplicate filename {}", ChunkId, Size, FilePath);
+ return false;
+ }
+
+ InputFiles.insert(FilePath);
+ InputFileHashes[FilePath] = ChunkId;
+ Blobs[ChunkId] = std::move(DataBuffer);
+ return true;
+ }
+
+ [[nodiscard]] UpstreamDirectory BuildDirectoryTree(const std::set<std::filesystem::path>& InputFiles)
+ {
+ static const std::filesystem::path RootPath;
+ std::map<std::filesystem::path, UpstreamDirectory*> AllDirectories;
+ UpstreamDirectory RootDirectory = {.Path = RootPath};
+
+ AllDirectories[RootPath] = &RootDirectory;
+
+ // Build tree from flat list
+ for (const auto& Path : InputFiles)
+ {
+ if (Path.has_parent_path())
+ {
+ if (!AllDirectories.contains(Path.parent_path()))
+ {
+ std::stack<std::string> PathSplit;
+ {
+ std::filesystem::path ParentPath = Path.parent_path();
+ PathSplit.push(ParentPath.filename().string());
+ while (ParentPath.has_parent_path())
+ {
+ ParentPath = ParentPath.parent_path();
+ PathSplit.push(ParentPath.filename().string());
+ }
+ }
+ UpstreamDirectory* ParentPtr = &RootDirectory;
+ while (!PathSplit.empty())
+ {
+ if (!ParentPtr->Directories.contains(PathSplit.top()))
+ {
+ std::filesystem::path NewParentPath = {ParentPtr->Path / PathSplit.top()};
+ ParentPtr->Directories[PathSplit.top()] = {.Path = NewParentPath};
+ AllDirectories[NewParentPath] = &ParentPtr->Directories[PathSplit.top()];
+ }
+ ParentPtr = &ParentPtr->Directories[PathSplit.top()];
+ PathSplit.pop();
+ }
+ }
+
+ AllDirectories[Path.parent_path()]->Files.insert(Path.filename().string());
+ }
+ else
+ {
+ RootDirectory.Files.insert(Path.filename().string());
+ }
+ }
+
+ return RootDirectory;
+ }
+
+ [[nodiscard]] CbObject BuildMerkleTreeDirectory(const UpstreamDirectory& RootDirectory,
+ const std::map<std::filesystem::path, IoHash>& InputFileHashes,
+ const std::map<IoHash, IoBuffer>& Blobs,
+ std::map<IoHash, CbObject>& Objects)
+ {
+ CbObjectWriter DirectoryTreeWriter;
+
+ if (!RootDirectory.Files.empty())
+ {
+ DirectoryTreeWriter.BeginArray("f"sv);
+ for (const auto& File : RootDirectory.Files)
+ {
+ const std::filesystem::path FilePath = {RootDirectory.Path / File};
+ const IoHash& FileHash = InputFileHashes.at(FilePath);
+ const uint64_t FileSize = Blobs.at(FileHash).Size();
+ DirectoryTreeWriter.BeginObject();
+ DirectoryTreeWriter.AddString("n"sv, File);
+ DirectoryTreeWriter.AddBinaryAttachment("h"sv, FileHash);
+ DirectoryTreeWriter.AddInteger("s"sv, FileSize); // Size
+ // DirectoryTreeWriter.AddInteger("a"sv, 0); // Attributes Currently unneeded
+ DirectoryTreeWriter.EndObject();
+ }
+ DirectoryTreeWriter.EndArray();
+ }
+
+ if (!RootDirectory.Directories.empty())
+ {
+ DirectoryTreeWriter.BeginArray("d"sv);
+ for (const auto& Item : RootDirectory.Directories)
+ {
+ CbObject Directory = BuildMerkleTreeDirectory(Item.second, InputFileHashes, Blobs, Objects);
+ const IoHash DirectoryHash = Directory.GetHash();
+ Objects[DirectoryHash] = std::move(Directory);
+
+ DirectoryTreeWriter.BeginObject();
+ DirectoryTreeWriter.AddString("n"sv, Item.first);
+ DirectoryTreeWriter.AddObjectAttachment("h"sv, DirectoryHash);
+ DirectoryTreeWriter.EndObject();
+ }
+ DirectoryTreeWriter.EndArray();
+ }
+
+ return DirectoryTreeWriter.Save();
+ }
+
+ void ResolveMerkleTreeDirectory(const std::filesystem::path& ParentDirectory,
+ const IoHash& DirectoryHash,
+ const std::map<IoHash, IoBuffer>& Objects,
+ std::map<std::filesystem::path, IoHash>& OutputFiles)
+ {
+ CbObject Directory = LoadCompactBinaryObject(Objects.at(DirectoryHash));
+
+ for (auto& It : Directory["f"sv])
+ {
+ const CbObjectView FileObject = It.AsObjectView();
+ const std::filesystem::path Path = ParentDirectory / FileObject["n"sv].AsString();
+
+ OutputFiles[Path] = FileObject["h"sv].AsBinaryAttachment();
+ }
+
+ for (auto& It : Directory["d"sv])
+ {
+ const CbObjectView DirectoryObject = It.AsObjectView();
+
+ ResolveMerkleTreeDirectory(ParentDirectory / DirectoryObject["n"sv].AsString(),
+ DirectoryObject["h"sv].AsObjectAttachment(),
+ Objects,
+ OutputFiles);
+ }
+ }
+
+ [[nodiscard]] CbObject BuildRequirements(const std::string_view Condition,
+ const std::map<std::string_view, int64_t>& Resources,
+ const bool Exclusive)
+ {
+ CbObjectWriter Writer;
+ Writer.AddString("c", Condition);
+ if (!Resources.empty())
+ {
+ Writer.BeginArray("r");
+ for (const auto& Resource : Resources)
+ {
+ Writer.BeginArray();
+ Writer.AddString(Resource.first);
+ Writer.AddInteger(Resource.second);
+ Writer.EndArray();
+ }
+ Writer.EndArray();
+ }
+ Writer.AddBool("e", Exclusive);
+ return Writer.Save();
+ }
+
+ [[nodiscard]] CbObject BuildTask(const std::string_view Executable,
+ const std::vector<std::string>& Arguments,
+ const std::map<std::string, std::string>& Environment,
+ const std::string_view WorkingDirectory,
+ const IoHash& SandboxHash,
+ const IoHash& RequirementsId,
+ const std::set<std::string>& Outputs)
+ {
+ CbObjectWriter TaskWriter;
+ TaskWriter.AddString("e"sv, Executable);
+
+ if (!Arguments.empty())
+ {
+ TaskWriter.BeginArray("a"sv);
+ for (const auto& Argument : Arguments)
+ {
+ TaskWriter.AddString(Argument);
+ }
+ TaskWriter.EndArray();
+ }
+
+ if (!Environment.empty())
+ {
+ TaskWriter.BeginArray("v"sv);
+ for (const auto& Env : Environment)
+ {
+ TaskWriter.BeginArray();
+ TaskWriter.AddString(Env.first);
+ TaskWriter.AddString(Env.second);
+ TaskWriter.EndArray();
+ }
+ TaskWriter.EndArray();
+ }
+
+ if (!WorkingDirectory.empty())
+ {
+ TaskWriter.AddString("w"sv, WorkingDirectory);
+ }
+
+ TaskWriter.AddObjectAttachment("s"sv, SandboxHash);
+ TaskWriter.AddObjectAttachment("r"sv, RequirementsId);
+
+ // Outputs
+ if (!Outputs.empty())
+ {
+ TaskWriter.BeginArray("o"sv);
+ for (const auto& Output : Outputs)
+ {
+ TaskWriter.AddString(Output);
+ }
+ TaskWriter.EndArray();
+ }
+
+ return TaskWriter.Save();
+ }
+ };
+} // namespace detail
+
+//////////////////////////////////////////////////////////////////////////
+
+std::unique_ptr<UpstreamApplyEndpoint>
+UpstreamApplyEndpoint::CreateHordeEndpoint(const CloudCacheClientOptions& ComputeOptions,
+ const UpstreamAuthConfig& ComputeAuthConfig,
+ const CloudCacheClientOptions& StorageOptions,
+ const UpstreamAuthConfig& StorageAuthConfig,
+ CasStore& CasStore,
+ CidStore& CidStore,
+ AuthMgr& Mgr)
+{
+ return std::make_unique<detail::HordeUpstreamApplyEndpoint>(ComputeOptions,
+ ComputeAuthConfig,
+ StorageOptions,
+ StorageAuthConfig,
+ CasStore,
+ CidStore,
+ Mgr);
+}
+
+} // namespace zen
+
+#endif // ZEN_WITH_COMPUTE_SERVICES \ No newline at end of file
diff --git a/zenserver/upstream/jupiter.cpp b/zenserver/upstream/jupiter.cpp
index 7eef96556..4bec41a29 100644
--- a/zenserver/upstream/jupiter.cpp
+++ b/zenserver/upstream/jupiter.cpp
@@ -540,6 +540,50 @@ CloudCacheSession::RefExists(std::string_view BucketId, const IoHash& Key)
return {.ElapsedSeconds = Response.elapsed, .Success = Response.status_code == 200};
}
+GetObjectReferencesResult
+CloudCacheSession::GetObjectReferences(const IoHash& Key)
+{
+ ZEN_TRACE_CPU("HordeClient::GetObjectReferences");
+
+ ExtendableStringBuilder<256> Uri;
+ Uri << m_CacheClient->ServiceUrl() << "/api/v1/objects/" << m_CacheClient->BlobStoreNamespace() << "/" << Key.ToHexString()
+ << "/references";
+
+ cpr::Session& Session = GetSession();
+ const CloudCacheAccessToken& AccessToken = GetAccessToken();
+
+ Session.SetOption(cpr::Url{Uri.c_str()});
+ Session.SetOption(cpr::Header{{"Authorization", AccessToken.Value}, {"Accept", "application/x-ue-cb"}});
+ Session.SetOption(cpr::Body{});
+
+ cpr::Response Response = Session.Get();
+ ZEN_DEBUG("GET {}", Response);
+
+ if (Response.error)
+ {
+ return {CloudCacheResult{.ErrorCode = static_cast<int32_t>(Response.error.code), .Reason = Response.error.message}};
+ }
+ else if (!VerifyAccessToken(Response.status_code))
+ {
+ return {CloudCacheResult{.ErrorCode = 401, .Reason = std::string("Invalid access token")}};
+ }
+
+ GetObjectReferencesResult Result{
+ CloudCacheResult{.Bytes = Response.downloaded_bytes, .ElapsedSeconds = Response.elapsed, .Success = Response.status_code == 200}};
+
+ if (Result.Success)
+ {
+ IoBuffer Buffer = IoBuffer(zen::IoBuffer::Wrap, Response.text.data(), Response.text.size());
+ const CbObject ReferencesResponse = LoadCompactBinaryObject(Buffer);
+ for (auto& Item : ReferencesResponse["references"sv])
+ {
+ Result.References.insert(Item.AsHash());
+ }
+ }
+
+ return Result;
+}
+
CloudCacheResult
CloudCacheSession::BlobExists(const IoHash& Key)
{
@@ -603,7 +647,7 @@ CloudCacheSession::PostComputeTasks(IoBuffer TasksData)
return {.ErrorCode = 401, .Reason = std::string("Invalid access token")};
}
- return {.ElapsedSeconds = Response.elapsed, .Success = Response.status_code == 200};
+ return {.Bytes = Response.uploaded_bytes, .ElapsedSeconds = Response.elapsed, .Success = Response.status_code == 200};
}
CloudCacheResult
diff --git a/zenserver/upstream/jupiter.h b/zenserver/upstream/jupiter.h
index bc0d84506..cff9a9ef1 100644
--- a/zenserver/upstream/jupiter.h
+++ b/zenserver/upstream/jupiter.h
@@ -76,6 +76,11 @@ struct CloudCacheExistsResult : CloudCacheResult
std::set<IoHash> Needs;
};
+struct GetObjectReferencesResult : CloudCacheResult
+{
+ std::set<IoHash> References;
+};
+
/**
* Context for performing Jupiter operations
*
@@ -108,6 +113,8 @@ public:
CloudCacheResult RefExists(std::string_view BucketId, const IoHash& Key);
+ GetObjectReferencesResult GetObjectReferences(const IoHash& Key);
+
CloudCacheResult BlobExists(const IoHash& Key);
CloudCacheResult CompressedBlobExists(const IoHash& Key);
CloudCacheResult ObjectExists(const IoHash& Key);
diff --git a/zenserver/upstream/upstreamapply.cpp b/zenserver/upstream/upstreamapply.cpp
index 17a6bb3cf..9758e7565 100644
--- a/zenserver/upstream/upstreamapply.cpp
+++ b/zenserver/upstream/upstreamapply.cpp
@@ -4,1303 +4,26 @@
#if ZEN_WITH_COMPUTE_SERVICES
-# include "jupiter.h"
-# include "zen.h"
-
# include <zencore/compactbinary.h>
# include <zencore/compactbinarybuilder.h>
-# include <zencore/compactbinarypackage.h>
-# include <zencore/compactbinaryvalidation.h>
-# include <zencore/compress.h>
# include <zencore/fmtutils.h>
-# include <zencore/session.h>
-# include <zencore/stats.h>
# include <zencore/stream.h>
-# include <zencore/thread.h>
# include <zencore/timer.h>
# include <zencore/workthreadpool.h>
# include <zenstore/cas.h>
# include <zenstore/cidstore.h>
-# include <auth/authmgr.h>
-# include <upstream/upstreamcache.h>
-
-# include "cache/structuredcachestore.h"
# include "diag/logging.h"
# include <fmt/format.h>
-# include <algorithm>
# include <atomic>
-# include <set>
-# include <stack>
-# include <thread>
-# include <unordered_map>
namespace zen {
using namespace std::literals;
-static const IoBuffer EmptyBuffer;
-static const IoHash EmptyBufferId = IoHash::HashBuffer(EmptyBuffer);
-
-namespace detail {
-
- class HordeUpstreamApplyEndpoint final : public UpstreamApplyEndpoint
- {
- public:
- HordeUpstreamApplyEndpoint(const CloudCacheClientOptions& ComputeOptions,
- const UpstreamAuthConfig& ComputeAuthConfig,
- const CloudCacheClientOptions& StorageOptions,
- const UpstreamAuthConfig& StorageAuthConfig,
- CasStore& CasStore,
- CidStore& CidStore,
- AuthMgr& Mgr)
- : m_Log(logging::Get("upstream-apply"))
- , m_CasStore(CasStore)
- , m_CidStore(CidStore)
- , m_AuthMgr(Mgr)
- {
- m_DisplayName = fmt::format("{} - '{}'+'{}'", ComputeOptions.Name, ComputeOptions.ServiceUrl, StorageOptions.ServiceUrl);
- m_ChannelId = fmt::format("zen-{}", zen::GetSessionIdString());
-
- {
- std::unique_ptr<CloudCacheTokenProvider> TokenProvider;
-
- if (ComputeAuthConfig.OAuthUrl.empty() == false)
- {
- TokenProvider =
- CloudCacheTokenProvider::CreateFromOAuthClientCredentials({.Url = ComputeAuthConfig.OAuthUrl,
- .ClientId = ComputeAuthConfig.OAuthClientId,
- .ClientSecret = ComputeAuthConfig.OAuthClientSecret});
- }
- else if (ComputeAuthConfig.OpenIdProvider.empty() == false)
- {
- TokenProvider =
- CloudCacheTokenProvider::CreateFromCallback([this, ProviderName = std::string(ComputeAuthConfig.OpenIdProvider)]() {
- AuthMgr::OpenIdAccessToken Token = m_AuthMgr.GetOpenIdAccessToken(ProviderName);
- return CloudCacheAccessToken{.Value = Token.AccessToken, .ExpireTime = Token.ExpireTime};
- });
- }
- else
- {
- CloudCacheAccessToken AccessToken{.Value = std::string(ComputeAuthConfig.AccessToken),
- .ExpireTime = CloudCacheAccessToken::TimePoint::max()};
- TokenProvider = CloudCacheTokenProvider::CreateFromStaticToken(AccessToken);
- }
-
- m_Client = new CloudCacheClient(ComputeOptions, std::move(TokenProvider));
- }
-
- {
- std::unique_ptr<CloudCacheTokenProvider> TokenProvider;
-
- if (StorageAuthConfig.OAuthUrl.empty() == false)
- {
- TokenProvider =
- CloudCacheTokenProvider::CreateFromOAuthClientCredentials({.Url = StorageAuthConfig.OAuthUrl,
- .ClientId = StorageAuthConfig.OAuthClientId,
- .ClientSecret = StorageAuthConfig.OAuthClientSecret});
- }
- else if (StorageAuthConfig.OpenIdProvider.empty() == false)
- {
- TokenProvider =
- CloudCacheTokenProvider::CreateFromCallback([this, ProviderName = std::string(StorageAuthConfig.OpenIdProvider)]() {
- AuthMgr::OpenIdAccessToken Token = m_AuthMgr.GetOpenIdAccessToken(ProviderName);
- return CloudCacheAccessToken{.Value = Token.AccessToken, .ExpireTime = Token.ExpireTime};
- });
- }
- else
- {
- CloudCacheAccessToken AccessToken{.Value = std::string(StorageAuthConfig.AccessToken),
- .ExpireTime = CloudCacheAccessToken::TimePoint::max()};
- TokenProvider = CloudCacheTokenProvider::CreateFromStaticToken(AccessToken);
- }
-
- m_StorageClient = new CloudCacheClient(StorageOptions, std::move(TokenProvider));
- }
- }
-
- virtual ~HordeUpstreamApplyEndpoint() = default;
-
- virtual UpstreamEndpointHealth Initialize() override { return CheckHealth(); }
-
- virtual bool IsHealthy() const override { return m_HealthOk.load(); }
-
- virtual UpstreamEndpointHealth CheckHealth() override
- {
- try
- {
- CloudCacheSession Session(m_Client);
- CloudCacheResult Result = Session.Authenticate();
-
- m_HealthOk = Result.ErrorCode == 0;
-
- return {.Reason = std::move(Result.Reason), .Ok = Result.Success};
- }
- catch (std::exception& Err)
- {
- return {.Reason = Err.what(), .Ok = false};
- }
- }
-
- virtual std::string_view DisplayName() const override { return m_DisplayName; }
-
- virtual PostUpstreamApplyResult PostApply(UpstreamApplyRecord ApplyRecord) override
- {
- int64_t Bytes{};
- double ElapsedSeconds{};
-
- try
- {
- UpstreamData UpstreamData;
- if (!ProcessApplyKey(ApplyRecord, UpstreamData))
- {
- return {.Error{.ErrorCode = -1, .Reason = "Failed to generate task data"}};
- }
-
- {
- std::scoped_lock Lock(m_TaskMutex);
- if (m_PendingTasks.contains(UpstreamData.TaskId))
- {
- // Pending task is already queued, return success
- return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
- }
- m_PendingTasks[UpstreamData.TaskId] = std::move(ApplyRecord);
- }
-
- CloudCacheSession ComputeSession(m_Client);
- CloudCacheSession StorageSession(m_StorageClient);
-
- {
- CloudCacheResult Result = BatchPutBlobsIfMissing(StorageSession, UpstreamData.Blobs);
- Bytes += Result.Bytes;
- ElapsedSeconds += Result.ElapsedSeconds;
- if (!Result.Success)
- {
- return {.Error{.ErrorCode = Result.ErrorCode ? Result.ErrorCode : -1,
- .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to upload blobs"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
- UpstreamData.Blobs.clear();
- }
-
- {
- CloudCacheResult Result = BatchPutObjectsIfMissing(StorageSession, UpstreamData.Objects);
- Bytes += Result.Bytes;
- ElapsedSeconds += Result.ElapsedSeconds;
- if (!Result.Success)
- {
- return {.Error{.ErrorCode = Result.ErrorCode ? Result.ErrorCode : -1,
- .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to upload objects"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
-
- PutRefResult RefResult = StorageSession.PutRef("requests"sv,
- UpstreamData.TaskId,
- UpstreamData.Objects[UpstreamData.TaskId].GetBuffer().AsIoBuffer(),
- ZenContentType::kCbObject);
- Log().debug("Put ref {} Need={} Bytes={} Duration={}s Result={}",
- UpstreamData.TaskId,
- RefResult.Needs.size(),
- RefResult.Bytes,
- RefResult.ElapsedSeconds,
- RefResult.Success);
-
- Bytes += RefResult.Bytes;
- ElapsedSeconds += RefResult.ElapsedSeconds;
- if (!RefResult.Success)
- {
- return {.Error{.ErrorCode = RefResult.ErrorCode ? RefResult.ErrorCode : -1,
- .Reason = !RefResult.Reason.empty() ? std::move(RefResult.Reason) : "Failed to add task ref"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
- UpstreamData.Objects.clear();
- }
-
- CbObjectWriter Writer;
- Writer.AddString("c"sv, m_ChannelId);
- Writer.AddObjectAttachment("r"sv, UpstreamData.RequirementsId);
- Writer.BeginArray("t"sv);
- Writer.AddObjectAttachment(UpstreamData.TaskId);
- Writer.EndArray();
- CbObject TasksObject = Writer.Save();
- IoBuffer TasksData = TasksObject.GetBuffer().AsIoBuffer();
-
- CloudCacheResult Result = ComputeSession.PostComputeTasks(TasksData);
- Log().debug("Post compute task {} Bytes={} Duration={}s Result={}",
- TasksObject.GetHash(),
- Result.Bytes,
- Result.ElapsedSeconds,
- Result.Success);
- Bytes += Result.Bytes;
- ElapsedSeconds += Result.ElapsedSeconds;
- if (!Result.Success)
- {
- {
- std::scoped_lock Lock(m_TaskMutex);
- m_PendingTasks.erase(UpstreamData.TaskId);
- }
-
- return {.Error{.ErrorCode = Result.ErrorCode ? Result.ErrorCode : -1,
- .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to post compute task"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
-
- Log().info("Task posted {}", UpstreamData.TaskId);
- return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
- }
- catch (std::exception& Err)
- {
- m_HealthOk = false;
- return {.Error{.ErrorCode = -1, .Reason = Err.what()}, .Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds};
- }
- }
-
- [[nodiscard]] CloudCacheResult BatchPutBlobsIfMissing(CloudCacheSession& Session, const std::map<IoHash, IoBuffer>& Blobs)
- {
- if (Blobs.size() == 0)
- {
- return {.Success = true};
- }
-
- int64_t Bytes{};
- double ElapsedSeconds{};
-
- // Batch check for missing blobs
- std::set<IoHash> Keys;
- std::transform(Blobs.begin(), Blobs.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; });
-
- CloudCacheExistsResult ExistsResult = Session.BlobExists(Keys);
- Log().debug("Queried {} missing blobs Need={} Duration={}s Result={}",
- Keys.size(),
- ExistsResult.Needs.size(),
- ExistsResult.ElapsedSeconds,
- ExistsResult.Success);
- ElapsedSeconds += ExistsResult.ElapsedSeconds;
- if (!ExistsResult.Success)
- {
- return {.Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .ErrorCode = ExistsResult.ErrorCode ? ExistsResult.ErrorCode : -1,
- .Reason = !ExistsResult.Reason.empty() ? std::move(ExistsResult.Reason) : "Failed to check if blobs exist"};
- }
-
- // TODO: Batch upload missing blobs
-
- for (const auto& Hash : ExistsResult.Needs)
- {
- CloudCacheResult Result = Session.PutBlob(Hash, Blobs.at(Hash));
- Log().debug("Put blob {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success);
- Bytes += Result.Bytes;
- ElapsedSeconds += Result.ElapsedSeconds;
- if (!Result.Success)
- {
- return {.Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .ErrorCode = Result.ErrorCode ? Result.ErrorCode : -1,
- .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to put blobs"};
- }
- }
-
- return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
- }
-
- [[nodiscard]] CloudCacheResult BatchPutObjectsIfMissing(CloudCacheSession& Session, const std::map<IoHash, CbObject>& Objects)
- {
- if (Objects.size() == 0)
- {
- return {.Success = true};
- }
-
- int64_t Bytes{};
- double ElapsedSeconds{};
-
- // Batch check for missing objects
- std::set<IoHash> Keys;
- std::transform(Objects.begin(), Objects.end(), std::inserter(Keys, Keys.end()), [](const auto& It) { return It.first; });
-
- // Todo: Endpoint doesn't exist for objects
- CloudCacheExistsResult ExistsResult = Session.ObjectExists(Keys);
- Log().debug("Queried {} missing objects Need={} Duration={}s Result={}",
- Keys.size(),
- ExistsResult.Needs.size(),
- ExistsResult.ElapsedSeconds,
- ExistsResult.Success);
- ElapsedSeconds += ExistsResult.ElapsedSeconds;
- if (!ExistsResult.Success)
- {
- return {.Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .ErrorCode = ExistsResult.ErrorCode ? ExistsResult.ErrorCode : -1,
- .Reason = !ExistsResult.Reason.empty() ? std::move(ExistsResult.Reason) : "Failed to check if objects exist"};
- }
-
- // TODO: Batch upload missing objects
-
- for (const auto& Hash : ExistsResult.Needs)
- {
- CloudCacheResult Result = Session.PutObject(Hash, Objects.at(Hash).GetBuffer().AsIoBuffer());
- Log().debug("Put object {} Bytes={} Duration={}s Result={}", Hash, Result.Bytes, Result.ElapsedSeconds, Result.Success);
- Bytes += Result.Bytes;
- ElapsedSeconds += Result.ElapsedSeconds;
- if (!Result.Success)
- {
- return {.Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .ErrorCode = Result.ErrorCode ? Result.ErrorCode : -1,
- .Reason = !Result.Reason.empty() ? std::move(Result.Reason) : "Failed to put objects"};
- }
- }
-
- return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
- }
-
- enum class ComputeTaskState : int32_t
- {
- Queued = 0,
- Executing = 1,
- Complete = 2,
- };
-
- enum class ComputeTaskOutcome : int32_t
- {
- Success = 0,
- Failed = 1,
- Cancelled = 2,
- NoResult = 3,
- Exipred = 4,
- BlobNotFound = 5,
- Exception = 6,
- };
-
- [[nodiscard]] static std::string_view ComputeTaskStateToString(const ComputeTaskState Outcome)
- {
- switch (Outcome)
- {
- case ComputeTaskState::Queued:
- return "Queued"sv;
- case ComputeTaskState::Executing:
- return "Executing"sv;
- case ComputeTaskState::Complete:
- return "Complete"sv;
- };
- return "Unknown"sv;
- }
-
- [[nodiscard]] static std::string_view ComputeTaskOutcomeToString(const ComputeTaskOutcome Outcome)
- {
- switch (Outcome)
- {
- case ComputeTaskOutcome::Success:
- return "Success"sv;
- case ComputeTaskOutcome::Failed:
- return "Failed"sv;
- case ComputeTaskOutcome::Cancelled:
- return "Cancelled"sv;
- case ComputeTaskOutcome::NoResult:
- return "NoResult"sv;
- case ComputeTaskOutcome::Exipred:
- return "Exipred"sv;
- case ComputeTaskOutcome::BlobNotFound:
- return "BlobNotFound"sv;
- case ComputeTaskOutcome::Exception:
- return "Exception"sv;
- };
- return "Unknown"sv;
- }
-
- virtual GetUpstreamApplyUpdatesResult GetUpdates(WorkerThreadPool& ThreadPool) override
- {
- int64_t Bytes{};
- double ElapsedSeconds{};
-
- {
- std::scoped_lock Lock(m_TaskMutex);
- if (m_PendingTasks.empty())
- {
- if (m_CompletedTasks.empty())
- {
- // Nothing to do.
- return {.Success = true};
- }
-
- UpstreamApplyCompleted CompletedTasks;
- std::swap(CompletedTasks, m_CompletedTasks);
- return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Completed = std::move(CompletedTasks), .Success = true};
- }
- }
-
- try
- {
- CloudCacheSession ComputeSession(m_Client);
-
- CloudCacheResult UpdatesResult = ComputeSession.GetComputeUpdates(m_ChannelId);
- Log().debug("Get compute updates Bytes={} Duration={}s Result={}",
- UpdatesResult.Bytes,
- UpdatesResult.ElapsedSeconds,
- UpdatesResult.Success);
- Bytes += UpdatesResult.Bytes;
- ElapsedSeconds += UpdatesResult.ElapsedSeconds;
- if (!UpdatesResult.Success)
- {
- return {.Error{.ErrorCode = UpdatesResult.ErrorCode, .Reason = std::move(UpdatesResult.Reason)},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
-
- if (!UpdatesResult.Success)
- {
- return {.Error{.ErrorCode = -1, .Reason = "Failed get task updates"}, .Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds};
- }
-
- CbObject TaskStatus = LoadCompactBinaryObject(std::move(UpdatesResult.Response));
-
- for (auto& It : TaskStatus["u"sv])
- {
- CbObjectView Status = It.AsObjectView();
- IoHash TaskId = Status["h"sv].AsHash();
- const ComputeTaskState State = (ComputeTaskState)Status["s"sv].AsInt32();
- const ComputeTaskOutcome Outcome = (ComputeTaskOutcome)Status["o"sv].AsInt32();
-
- Log().info("Task {} State={}", TaskId, ComputeTaskStateToString(State));
-
- // Only completed tasks need to be processed
- if (State != ComputeTaskState::Complete)
- {
- continue;
- }
-
- IoHash WorkerId{};
- IoHash ActionId{};
- UpstreamApplyType ApplyType{};
-
- {
- std::scoped_lock Lock(m_TaskMutex);
- auto TaskIt = m_PendingTasks.find(TaskId);
- if (TaskIt != m_PendingTasks.end())
- {
- WorkerId = TaskIt->second.WorkerDescriptor.GetHash();
- ActionId = TaskIt->second.Action.GetHash();
- ApplyType = TaskIt->second.Type;
- m_PendingTasks.erase(TaskIt);
- }
- }
-
- if (WorkerId == IoHash::Zero)
- {
- Log().warn("Task {} missing from pending tasks", TaskId);
- continue;
- }
-
- if (Outcome != ComputeTaskOutcome::Success)
- {
- const std::string_view Detail = Status["d"sv].AsString();
- {
- std::scoped_lock Lock(m_TaskMutex);
- m_CompletedTasks[WorkerId][ActionId] = {
- .Error{.ErrorCode = -1, .Reason = fmt::format("Task {} {}", ComputeTaskOutcomeToString(Outcome), Detail)}};
- }
- continue;
- }
-
- ThreadPool.ScheduleWork([this,
- ApplyType,
- ResultHash = Status["r"sv].AsHash(),
- TaskId = std::move(TaskId),
- WorkerId = std::move(WorkerId),
- ActionId = std::move(ActionId)]() {
- GetUpstreamApplyResult Result = ProcessTaskStatus(ApplyType, ResultHash);
- Log().debug("Task Processed {} Files={} Attachments={} ExitCode={}",
- TaskId,
- Result.OutputFiles.size(),
- Result.OutputPackage.GetAttachments().size(),
- Result.Error.ErrorCode);
- {
- std::scoped_lock Lock(m_TaskMutex);
- m_CompletedTasks[WorkerId][ActionId] = std::move(Result);
- }
- });
- }
-
- {
- std::scoped_lock Lock(m_TaskMutex);
- if (m_CompletedTasks.empty())
- {
- // Nothing to do.
- return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Success = true};
- }
- UpstreamApplyCompleted CompletedTasks;
- std::swap(CompletedTasks, m_CompletedTasks);
- return {.Bytes = Bytes, .ElapsedSeconds = ElapsedSeconds, .Completed = std::move(CompletedTasks), .Success = true};
- }
- }
- catch (std::exception& Err)
- {
- m_HealthOk = false;
- return {
- .Error{.ErrorCode = -1, .Reason = Err.what()},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- };
- }
- }
-
- virtual UpstreamApplyEndpointStats& Stats() override { return m_Stats; }
-
- private:
- spdlog::logger& Log() { return m_Log; }
-
- spdlog::logger& m_Log;
- CasStore& m_CasStore;
- CidStore& m_CidStore;
- AuthMgr& m_AuthMgr;
- std::string m_DisplayName;
- RefPtr<CloudCacheClient> m_Client;
- RefPtr<CloudCacheClient> m_StorageClient;
- UpstreamApplyEndpointStats m_Stats;
- std::atomic_bool m_HealthOk{false};
- std::string m_ChannelId;
-
- std::mutex m_TaskMutex;
- std::unordered_map<IoHash, UpstreamApplyRecord> m_PendingTasks;
- UpstreamApplyCompleted m_CompletedTasks;
-
- struct UpstreamData
- {
- std::map<IoHash, IoBuffer> Blobs;
- std::map<IoHash, CbObject> Objects;
- IoHash TaskId;
- IoHash RequirementsId;
- };
-
- struct UpstreamDirectory
- {
- std::filesystem::path Path;
- std::map<std::string, UpstreamDirectory> Directories;
- std::set<std::string> Files;
- };
-
- [[nodiscard]] GetUpstreamApplyResult ProcessTaskStatus(const UpstreamApplyType ApplyType, const IoHash& ResultHash)
- {
- try
- {
- CloudCacheSession Session(m_StorageClient);
-
- int64_t Bytes{};
- double ElapsedSeconds{};
-
- // Get Result object and all Object Attachments + Binary Attachment IDs
- CloudCacheResult ObjectRefResult = Session.GetRef("responses"sv, ResultHash, ZenContentType::kCbObject);
- Log().debug("Get ref {} Bytes={} Duration={}s Result={}",
- ResultHash,
- ObjectRefResult.Bytes,
- ObjectRefResult.ElapsedSeconds,
- ObjectRefResult.Success);
- Bytes += ObjectRefResult.Bytes;
- ElapsedSeconds += ObjectRefResult.ElapsedSeconds;
-
- if (!ObjectRefResult.Success)
- {
- return {.Error{.ErrorCode = -1, .Reason = "Failed to get result object data"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
-
- std::vector<IoHash> ObjectsToIterate;
- std::map<IoHash, IoBuffer> ObjectData;
- std::map<IoHash, IoBuffer> BinaryData;
-
- ObjectData[ResultHash] = ObjectRefResult.Response;
- CbObject Object = LoadCompactBinaryObject(ObjectData[ResultHash]);
- Object.IterateAttachments([&](CbFieldView Field) {
- if (Field.IsObjectAttachment())
- {
- const IoHash AttachmentHash = Field.AsObjectAttachment();
- if (!ObjectData.contains(AttachmentHash))
- {
- ObjectsToIterate.push_back(AttachmentHash);
- }
- }
- else if (Field.IsBinaryAttachment())
- {
- const IoHash AttachmentHash = Field.AsBinaryAttachment();
- BinaryData[AttachmentHash] = {};
- }
- });
-
- while (!ObjectsToIterate.empty())
- {
- const IoHash Hash = ObjectsToIterate.back();
- ObjectsToIterate.pop_back();
-
- CloudCacheResult ObjectResult = Session.GetObject(Hash);
- Log().debug("Get object {} Bytes={} Duration={}s Result={}",
- Hash,
- ObjectResult.Bytes,
- ObjectResult.ElapsedSeconds,
- ObjectResult.Success);
- Bytes += ObjectRefResult.Bytes;
- ElapsedSeconds += ObjectRefResult.ElapsedSeconds;
- if (!ObjectResult.Success)
- {
- return {.Error{.ErrorCode = ObjectResult.ErrorCode, .Reason = std::move(ObjectResult.Reason)},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
- ObjectData[Hash] = std::move(ObjectResult.Response);
-
- CbObject IterateObject = LoadCompactBinaryObject(ObjectData[Hash]);
- IterateObject.IterateAttachments([&](CbFieldView Field) {
- if (Field.IsObjectAttachment())
- {
- const IoHash AttachmentHash = Field.AsObjectAttachment();
- if (!ObjectData.contains(AttachmentHash))
- {
- ObjectsToIterate.push_back(AttachmentHash);
- }
- }
- else if (Field.IsBinaryAttachment())
- {
- const IoHash AttachmentHash = Field.AsBinaryAttachment();
- BinaryData[AttachmentHash] = {};
- }
- });
- }
-
- // Batch load all binary data
- for (auto& It : BinaryData)
- {
- CloudCacheResult BlobResult = Session.GetBlob(It.first);
- Log().debug("Get blob {} Bytes={} Duration={}s Result={}",
- It.first,
- BlobResult.Bytes,
- BlobResult.ElapsedSeconds,
- BlobResult.Success);
- Bytes += ObjectRefResult.Bytes;
- ElapsedSeconds += ObjectRefResult.ElapsedSeconds;
- if (!BlobResult.Success)
- {
- return {.Error{.ErrorCode = BlobResult.ErrorCode, .Reason = std::move(BlobResult.Reason)},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
- It.second = std::move(BlobResult.Response);
- }
-
- CbObject ResultObject = LoadCompactBinaryObject(ObjectData[ResultHash]);
- int32_t ExitCode = ResultObject["e"sv].AsInt32();
- IoHash StdOutHash = ResultObject["so"sv].AsBinaryAttachment();
- IoHash StdErrHash = ResultObject["se"sv].AsBinaryAttachment();
- IoHash OutputHash = ResultObject["o"sv].AsObjectAttachment();
-
- std::string StdOut = std::string((const char*)BinaryData[StdOutHash].GetData(), BinaryData[StdOutHash].GetSize());
- std::string StdErr = std::string((const char*)BinaryData[StdErrHash].GetData(), BinaryData[StdErrHash].GetSize());
-
- if (OutputHash == IoHash::Zero)
- {
- return {.Error{.ErrorCode = ExitCode, .Reason = "Task completed with no output object"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr)};
- }
-
- CbObject OutputObject = LoadCompactBinaryObject(ObjectData[OutputHash]);
-
- switch (ApplyType)
- {
- case UpstreamApplyType::Simple:
- {
- std::map<std::filesystem::path, IoHash> OutputFiles;
-
- ResolveMerkleTreeDirectory(""sv, OutputHash, ObjectData, OutputFiles);
-
- return {.OutputFiles = std::move(OutputFiles),
- .FileData = std::move(BinaryData),
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr),
- .Success = true};
- }
- break;
- case UpstreamApplyType::Asset:
- {
- if (ExitCode != 0)
- {
- return {.Error{.ErrorCode = ExitCode, .Reason = "Task completed with errors"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr)};
- }
-
- // Get build.output
- IoHash BuildOutputId;
- IoBuffer BuildOutput;
- for (auto& It : OutputObject["f"sv])
- {
- const CbObjectView FileObject = It.AsObjectView();
- if (FileObject["n"sv].AsString() == "Build.output"sv)
- {
- BuildOutputId = FileObject["h"sv].AsBinaryAttachment();
- BuildOutput = BinaryData[BuildOutputId];
- break;
- }
- }
-
- if (BuildOutput.GetSize() == 0)
- {
- return {.Error{.ErrorCode = ExitCode, .Reason = "Build.output file not found in task results"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr)};
- }
-
- // Get Output directory node
- IoBuffer OutputDirectoryTree;
- for (auto& It : OutputObject["d"sv])
- {
- const CbObjectView DirectoryObject = It.AsObjectView();
- if (DirectoryObject["n"sv].AsString() == "Outputs"sv)
- {
- OutputDirectoryTree = ObjectData[DirectoryObject["h"sv].AsObjectAttachment()];
- break;
- }
- }
-
- if (OutputDirectoryTree.GetSize() == 0)
- {
- return {.Error{.ErrorCode = ExitCode, .Reason = "Outputs directory not found in task results"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr)};
- }
-
- // load build.output as CbObject
-
- // Move Outputs from Horde to CbPackage
-
- std::unordered_map<IoHash, IoHash> CidToCompressedId;
- CbPackage OutputPackage;
- CbObject OutputDirectoryTreeObject = LoadCompactBinaryObject(OutputDirectoryTree);
- int64_t TotalAttachmentBytes = 0;
- int64_t TotalRawAttachmentBytes = 0;
-
- for (auto& It : OutputDirectoryTreeObject["f"sv])
- {
- CbObjectView FileObject = It.AsObjectView();
- // Name is the uncompressed hash
- IoHash DecompressedId = IoHash::FromHexString(FileObject["n"sv].AsString());
- // Hash is the compressed data hash, and how it is stored in Horde
- IoHash CompressedId = FileObject["h"sv].AsBinaryAttachment();
-
- if (!BinaryData.contains(CompressedId))
- {
- Log().warn("Object attachment chunk not retrieved from Horde {}", CompressedId);
- return {.Error{.ErrorCode = -1, .Reason = "Object attachment chunk not retrieved from Horde"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds};
- }
- CidToCompressedId[DecompressedId] = CompressedId;
- }
-
- // Iterate attachments, verify all chunks exist, and add to CbPackage
- bool AnyErrors = false;
- CbObject BuildOutputObject = LoadCompactBinaryObject(BuildOutput);
- BuildOutputObject.IterateAttachments([&](CbFieldView Field) {
- const IoHash DecompressedId = Field.AsHash();
- if (!CidToCompressedId.contains(DecompressedId))
- {
- Log().warn("Attachment not found {}", DecompressedId);
- AnyErrors = true;
- return;
- }
- const IoHash& CompressedId = CidToCompressedId.at(DecompressedId);
-
- if (!BinaryData.contains(CompressedId))
- {
- Log().warn("Missing output {} compressed {} uncompressed", CompressedId, DecompressedId);
- AnyErrors = true;
- return;
- }
-
- CompressedBuffer AttachmentBuffer =
- CompressedBuffer::FromCompressed(SharedBuffer(BinaryData[CompressedId]));
-
- if (!AttachmentBuffer)
- {
- Log().warn(
- "Invalid output encountered (not valid CompressedBuffer format) {} compressed {} uncompressed",
- CompressedId,
- DecompressedId);
- AnyErrors = true;
- return;
- }
-
- TotalAttachmentBytes += AttachmentBuffer.GetCompressedSize();
- TotalRawAttachmentBytes += AttachmentBuffer.GetRawSize();
-
- CbAttachment Attachment(AttachmentBuffer);
- OutputPackage.AddAttachment(Attachment);
- });
-
- if (AnyErrors)
- {
- return {.Error{.ErrorCode = -1, .Reason = "Failed to get result object attachment data"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr)};
- }
-
- OutputPackage.SetObject(BuildOutputObject);
-
- return {.OutputPackage = std::move(OutputPackage),
- .TotalAttachmentBytes = TotalAttachmentBytes,
- .TotalRawAttachmentBytes = TotalRawAttachmentBytes,
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr),
- .Success = true};
- }
- break;
- }
-
- return {.Error{.ErrorCode = ExitCode, .Reason = "Unknown apply type"},
- .Bytes = Bytes,
- .ElapsedSeconds = ElapsedSeconds,
- .StdOut = std::move(StdOut),
- .StdErr = std::move(StdErr)};
- }
- catch (std::exception& Err)
- {
- return {.Error{.ErrorCode = -1, .Reason = Err.what()}};
- }
- }
-
- [[nodiscard]] bool ProcessApplyKey(const UpstreamApplyRecord& ApplyRecord, UpstreamData& Data)
- {
- std::string ExecutablePath;
- std::string WorkingDirectory;
- std::vector<std::string> Arguments;
- std::map<std::string, std::string> Environment;
- std::set<std::filesystem::path> InputFiles;
- std::set<std::string> Outputs;
- std::map<std::filesystem::path, IoHash> InputFileHashes;
-
- ExecutablePath = ApplyRecord.WorkerDescriptor["path"sv].AsString();
- if (ExecutablePath.empty())
- {
- Log().warn("process apply upstream FAILED, '{}', path missing from worker descriptor",
- ApplyRecord.WorkerDescriptor.GetHash());
- return false;
- }
-
- WorkingDirectory = ApplyRecord.WorkerDescriptor["workdir"sv].AsString();
-
- for (auto& It : ApplyRecord.WorkerDescriptor["executables"sv])
- {
- CbObjectView FileEntry = It.AsObjectView();
- if (!ProcessFileEntry(FileEntry, InputFiles, InputFileHashes, Data.Blobs))
- {
- return false;
- }
- }
-
- for (auto& It : ApplyRecord.WorkerDescriptor["files"sv])
- {
- CbObjectView FileEntry = It.AsObjectView();
- if (!ProcessFileEntry(FileEntry, InputFiles, InputFileHashes, Data.Blobs))
- {
- return false;
- }
- }
-
- for (auto& It : ApplyRecord.WorkerDescriptor["dirs"sv])
- {
- std::string_view Directory = It.AsString();
- std::string DummyFile = fmt::format("{}/.zen_empty_file", Directory);
- InputFiles.insert(DummyFile);
- Data.Blobs[EmptyBufferId] = EmptyBuffer;
- InputFileHashes[DummyFile] = EmptyBufferId;
- }
-
- for (auto& It : ApplyRecord.WorkerDescriptor["environment"sv])
- {
- std::string_view Env = It.AsString();
- auto Index = Env.find('=');
- if (Index == std::string_view::npos)
- {
- Log().warn("process apply upstream FAILED, environment '{}' malformed", Env);
- return false;
- }
-
- Environment[std::string(Env.substr(0, Index))] = Env.substr(Index + 1);
- }
-
- switch (ApplyRecord.Type)
- {
- case UpstreamApplyType::Simple:
- {
- for (auto& It : ApplyRecord.WorkerDescriptor["arguments"sv])
- {
- Arguments.push_back(std::string(It.AsString()));
- }
-
- for (auto& It : ApplyRecord.WorkerDescriptor["outputs"sv])
- {
- Outputs.insert(std::string(It.AsString()));
- }
- }
- break;
- case UpstreamApplyType::Asset:
- {
- static const std::filesystem::path BuildActionPath = "Build.action"sv;
- static const std::filesystem::path InputPath = "Inputs"sv;
- const IoHash ActionId = ApplyRecord.Action.GetHash();
-
- Arguments.push_back("-Build=build.action");
- Outputs.insert("Build.output");
- Outputs.insert("Outputs");
-
- InputFiles.insert(BuildActionPath);
- InputFileHashes[BuildActionPath] = ActionId;
- Data.Blobs[ActionId] = IoBufferBuilder::MakeCloneFromMemory(ApplyRecord.Action.GetBuffer().GetData(),
- ApplyRecord.Action.GetBuffer().GetSize());
-
- bool AnyErrors = false;
- ApplyRecord.Action.IterateAttachments([&](CbFieldView Field) {
- const IoHash Cid = Field.AsHash();
- const std::filesystem::path FilePath = {InputPath / Cid.ToHexString()};
- IoBuffer DataBuffer = m_CidStore.FindChunkByCid(Cid);
-
- if (!DataBuffer)
- {
- Log().warn("process apply upstream FAILED, input CID chunk '{}' missing", Cid);
- AnyErrors = true;
- return;
- }
-
- if (InputFiles.contains(FilePath))
- {
- return;
- }
-
- const IoHash CompressedId = IoHash::HashBuffer(DataBuffer.GetData(), DataBuffer.GetSize());
-
- InputFiles.insert(FilePath);
- InputFileHashes[FilePath] = CompressedId;
- Data.Blobs[CompressedId] = std::move(DataBuffer);
- });
-
- if (AnyErrors)
- {
- return false;
- }
- }
- break;
- }
-
- const UpstreamDirectory RootDirectory = BuildDirectoryTree(InputFiles);
-
- CbObject Sandbox = BuildMerkleTreeDirectory(RootDirectory, InputFileHashes, Data.Blobs, Data.Objects);
- const IoHash SandboxHash = Sandbox.GetHash();
- Data.Objects[SandboxHash] = std::move(Sandbox);
-
- {
- std::string_view HostPlatform = ApplyRecord.WorkerDescriptor["host"sv].AsString();
- if (HostPlatform.empty())
- {
- Log().warn("process apply upstream FAILED, 'host' platform not provided");
- return false;
- }
-
- int32_t LogicalCores = ApplyRecord.WorkerDescriptor["cores"sv].AsInt32();
- int64_t Memory = ApplyRecord.WorkerDescriptor["memory"sv].AsInt64();
- bool Exclusive = ApplyRecord.WorkerDescriptor["exclusive"sv].AsBool();
-
- std::string Condition = fmt::format("Platform == '{}'", HostPlatform);
- if (HostPlatform == "Win64")
- {
- // TODO
- // Condition += " && Pool == 'Win-RemoteExec'";
- }
-
- std::map<std::string_view, int64_t> Resources;
- if (LogicalCores > 0)
- {
- Resources["LogicalCores"sv] = LogicalCores;
- }
- if (Memory > 0)
- {
- Resources["RAM"sv] = std::max(Memory / 1024LL / 1024LL / 1024LL, 1LL);
- }
-
- CbObject Requirements = BuildRequirements(Condition, Resources, Exclusive);
- const IoHash RequirementsId = Requirements.GetHash();
- Data.Objects[RequirementsId] = std::move(Requirements);
- Data.RequirementsId = RequirementsId;
- }
-
- CbObject Task = BuildTask(ExecutablePath, Arguments, Environment, WorkingDirectory, SandboxHash, Data.RequirementsId, Outputs);
-
- const IoHash TaskId = Task.GetHash();
- Data.Objects[TaskId] = std::move(Task);
- Data.TaskId = TaskId;
-
- return true;
- }
-
- [[nodiscard]] bool ProcessFileEntry(const CbObjectView& FileEntry,
- std::set<std::filesystem::path>& InputFiles,
- std::map<std::filesystem::path, IoHash>& InputFileHashes,
- std::map<IoHash, IoBuffer>& Blobs)
- {
- const std::filesystem::path FilePath = FileEntry["name"sv].AsString();
- const IoHash ChunkId = FileEntry["hash"sv].AsHash();
- const uint64_t Size = FileEntry["size"sv].AsUInt64();
- IoBuffer DataBuffer = m_CasStore.FindChunk(ChunkId);
-
- if (!DataBuffer)
- {
- Log().warn("process apply upstream FAILED, worker CAS chunk '{}' missing", ChunkId);
- return false;
- }
-
- if (DataBuffer.Size() != Size)
- {
- Log().warn("process apply upstream FAILED, worker CAS chunk '{}' size: {}, action spec expected {}",
- ChunkId,
- DataBuffer.Size(),
- Size);
- return false;
- }
-
- if (InputFiles.contains(FilePath))
- {
- Log().warn("process apply upstream FAILED, worker CAS chunk '{}' size: {} duplicate filename {}", ChunkId, Size, FilePath);
- return false;
- }
-
- InputFiles.insert(FilePath);
- InputFileHashes[FilePath] = ChunkId;
- Blobs[ChunkId] = std::move(DataBuffer);
- return true;
- }
-
- [[nodiscard]] UpstreamDirectory BuildDirectoryTree(const std::set<std::filesystem::path>& InputFiles)
- {
- static const std::filesystem::path RootPath;
- std::map<std::filesystem::path, UpstreamDirectory*> AllDirectories;
- UpstreamDirectory RootDirectory = {.Path = RootPath};
-
- AllDirectories[RootPath] = &RootDirectory;
-
- // Build tree from flat list
- for (const auto& Path : InputFiles)
- {
- if (Path.has_parent_path())
- {
- if (!AllDirectories.contains(Path.parent_path()))
- {
- std::stack<std::string> PathSplit;
- {
- std::filesystem::path ParentPath = Path.parent_path();
- PathSplit.push(ParentPath.filename().string());
- while (ParentPath.has_parent_path())
- {
- ParentPath = ParentPath.parent_path();
- PathSplit.push(ParentPath.filename().string());
- }
- }
- UpstreamDirectory* ParentPtr = &RootDirectory;
- while (!PathSplit.empty())
- {
- if (!ParentPtr->Directories.contains(PathSplit.top()))
- {
- std::filesystem::path NewParentPath = {ParentPtr->Path / PathSplit.top()};
- ParentPtr->Directories[PathSplit.top()] = {.Path = NewParentPath};
- AllDirectories[NewParentPath] = &ParentPtr->Directories[PathSplit.top()];
- }
- ParentPtr = &ParentPtr->Directories[PathSplit.top()];
- PathSplit.pop();
- }
- }
-
- AllDirectories[Path.parent_path()]->Files.insert(Path.filename().string());
- }
- else
- {
- RootDirectory.Files.insert(Path.filename().string());
- }
- }
-
- return RootDirectory;
- }
-
- [[nodiscard]] CbObject BuildMerkleTreeDirectory(const UpstreamDirectory& RootDirectory,
- const std::map<std::filesystem::path, IoHash>& InputFileHashes,
- const std::map<IoHash, IoBuffer>& Blobs,
- std::map<IoHash, CbObject>& Objects)
- {
- CbObjectWriter DirectoryTreeWriter;
-
- if (!RootDirectory.Files.empty())
- {
- DirectoryTreeWriter.BeginArray("f"sv);
- for (const auto& File : RootDirectory.Files)
- {
- const std::filesystem::path FilePath = {RootDirectory.Path / File};
- const IoHash& FileHash = InputFileHashes.at(FilePath);
- const uint64_t FileSize = Blobs.at(FileHash).Size();
- DirectoryTreeWriter.BeginObject();
- DirectoryTreeWriter.AddString("n"sv, File);
- DirectoryTreeWriter.AddBinaryAttachment("h"sv, FileHash);
- DirectoryTreeWriter.AddInteger("s"sv, FileSize); // Size
- // DirectoryTreeWriter.AddInteger("a"sv, 0); // Attributes Currently unneeded
- DirectoryTreeWriter.EndObject();
- }
- DirectoryTreeWriter.EndArray();
- }
-
- if (!RootDirectory.Directories.empty())
- {
- DirectoryTreeWriter.BeginArray("d"sv);
- for (const auto& Item : RootDirectory.Directories)
- {
- CbObject Directory = BuildMerkleTreeDirectory(Item.second, InputFileHashes, Blobs, Objects);
- const IoHash DirectoryHash = Directory.GetHash();
- Objects[DirectoryHash] = std::move(Directory);
-
- DirectoryTreeWriter.BeginObject();
- DirectoryTreeWriter.AddString("n"sv, Item.first);
- DirectoryTreeWriter.AddObjectAttachment("h"sv, DirectoryHash);
- DirectoryTreeWriter.EndObject();
- }
- DirectoryTreeWriter.EndArray();
- }
-
- return DirectoryTreeWriter.Save();
- }
-
- void ResolveMerkleTreeDirectory(const std::filesystem::path& ParentDirectory,
- const IoHash& DirectoryHash,
- const std::map<IoHash, IoBuffer>& Objects,
- std::map<std::filesystem::path, IoHash>& OutputFiles)
- {
- CbObject Directory = LoadCompactBinaryObject(Objects.at(DirectoryHash));
-
- for (auto& It : Directory["f"sv])
- {
- const CbObjectView FileObject = It.AsObjectView();
- const std::filesystem::path Path = ParentDirectory / FileObject["n"sv].AsString();
-
- OutputFiles[Path] = FileObject["h"sv].AsBinaryAttachment();
- }
-
- for (auto& It : Directory["d"sv])
- {
- const CbObjectView DirectoryObject = It.AsObjectView();
-
- ResolveMerkleTreeDirectory(ParentDirectory / DirectoryObject["n"sv].AsString(),
- DirectoryObject["h"sv].AsObjectAttachment(),
- Objects,
- OutputFiles);
- }
- }
-
- [[nodiscard]] CbObject BuildRequirements(const std::string_view Condition,
- const std::map<std::string_view, int64_t>& Resources,
- const bool Exclusive)
- {
- CbObjectWriter Writer;
- Writer.AddString("c", Condition);
- if (!Resources.empty())
- {
- Writer.BeginArray("r");
- for (const auto& Resource : Resources)
- {
- Writer.BeginArray();
- Writer.AddString(Resource.first);
- Writer.AddInteger(Resource.second);
- Writer.EndArray();
- }
- Writer.EndArray();
- }
- Writer.AddBool("e", Exclusive);
- return Writer.Save();
- }
-
- [[nodiscard]] CbObject BuildTask(const std::string_view Executable,
- const std::vector<std::string>& Arguments,
- const std::map<std::string, std::string>& Environment,
- const std::string_view WorkingDirectory,
- const IoHash& SandboxHash,
- const IoHash& RequirementsId,
- const std::set<std::string>& Outputs)
- {
- CbObjectWriter TaskWriter;
- TaskWriter.AddString("e"sv, Executable);
-
- if (!Arguments.empty())
- {
- TaskWriter.BeginArray("a"sv);
- for (const auto& Argument : Arguments)
- {
- TaskWriter.AddString(Argument);
- }
- TaskWriter.EndArray();
- }
-
- if (!Environment.empty())
- {
- TaskWriter.BeginArray("v"sv);
- for (const auto& Env : Environment)
- {
- TaskWriter.BeginArray();
- TaskWriter.AddString(Env.first);
- TaskWriter.AddString(Env.second);
- TaskWriter.EndArray();
- }
- TaskWriter.EndArray();
- }
-
- if (!WorkingDirectory.empty())
- {
- TaskWriter.AddString("w"sv, WorkingDirectory);
- }
-
- TaskWriter.AddObjectAttachment("s"sv, SandboxHash);
- TaskWriter.AddObjectAttachment("r"sv, RequirementsId);
-
- // Outputs
- if (!Outputs.empty())
- {
- TaskWriter.BeginArray("o"sv);
- for (const auto& Output : Outputs)
- {
- TaskWriter.AddString(Output);
- }
- TaskWriter.EndArray();
- }
-
- return TaskWriter.Save();
- }
- };
-} // namespace detail
-
-//////////////////////////////////////////////////////////////////////////
-
struct UpstreamApplyStats
{
static constexpr uint64_t MaxSampleCount = 1000ull;
@@ -1360,7 +83,8 @@ public:
, m_CasStore(CasStore)
, m_CidStore(CidStore)
, m_Stats(Options.StatsEnabled)
- , m_AsyncWorkPool(Options.ThreadCount)
+ , m_UpstreamAsyncWorkPool(Options.UpstreamThreadCount)
+ , m_DownstreamAsyncWorkPool(Options.DownstreamThreadCount)
{
}
@@ -1423,7 +147,9 @@ public:
m_ApplyTasks[WorkerId][ActionId] = {.State = UpstreamApplyState::Queued, .Result{}, .ExpireTime = std::move(ExpireTime)};
}
- m_AsyncWorkPool.ScheduleWork([this, ApplyRecord = std::move(ApplyRecord)]() { ProcessApplyRecord(std::move(ApplyRecord)); });
+ ApplyRecord.Timepoints["zen-queue-added"] = DateTime::NowTicks();
+ m_UpstreamAsyncWorkPool.ScheduleWork(
+ [this, ApplyRecord = std::move(ApplyRecord)]() { ProcessApplyRecord(std::move(ApplyRecord)); });
return {.ApplyId = ActionId, .Success = true};
}
@@ -1447,8 +173,10 @@ public:
virtual void GetStatus(CbObjectWriter& Status) override
{
- Status << "worker_threads" << m_Options.ThreadCount;
- Status << "queue_count" << m_AsyncWorkPool.PendingWork();
+ Status << "upstream_worker_threads" << m_Options.UpstreamThreadCount;
+ Status << "upstream_queue_count" << m_UpstreamAsyncWorkPool.PendingWork();
+ Status << "downstream_worker_threads" << m_Options.DownstreamThreadCount;
+ Status << "downstream_queue_count" << m_DownstreamAsyncWorkPool.PendingWork();
Status.BeginArray("endpoints");
for (const auto& Ep : m_Endpoints)
@@ -1501,11 +229,14 @@ private:
{
if (Endpoint->IsHealthy())
{
- PostUpstreamApplyResult Result = Endpoint->PostApply(std::move(ApplyRecord));
+ ApplyRecord.Timepoints["zen-queue-dispatched"] = DateTime::NowTicks();
+ PostUpstreamApplyResult Result = Endpoint->PostApply(std::move(ApplyRecord));
{
std::scoped_lock Lock(m_ApplyTasksMutex);
if (auto Status = FindStatus(WorkerId, ActionId); Status != nullptr)
{
+ Status->Timepoints.merge(Result.Timepoints);
+
if (Result.Success)
{
Status->State = UpstreamApplyState::Executing;
@@ -1553,7 +284,7 @@ private:
{
if (Endpoint->IsHealthy())
{
- GetUpstreamApplyUpdatesResult Result = Endpoint->GetUpdates(m_AsyncWorkPool);
+ GetUpstreamApplyUpdatesResult Result = Endpoint->GetUpdates(m_DownstreamAsyncWorkPool);
m_Stats.Add(*Endpoint, Result);
if (!Result.Success)
@@ -1572,6 +303,9 @@ private:
{
Status->State = UpstreamApplyState::Complete;
Status->Result = std::move(It2.second);
+ Status->Result.Timepoints.merge(Status->Timepoints);
+ Status->Result.Timepoints["zen-queue-complete"] = DateTime::NowTicks();
+ Status->Timepoints.clear();
}
}
}
@@ -1686,7 +420,8 @@ private:
std::mutex m_ApplyTasksMutex;
std::vector<std::unique_ptr<UpstreamApplyEndpoint>> m_Endpoints;
Event m_ShutdownEvent;
- WorkerThreadPool m_AsyncWorkPool;
+ WorkerThreadPool m_UpstreamAsyncWorkPool;
+ WorkerThreadPool m_DownstreamAsyncWorkPool;
std::thread m_UpstreamUpdatesThread;
std::thread m_EndpointMonitorThread;
RunState m_RunState;
@@ -1700,24 +435,6 @@ UpstreamApply::Create(const UpstreamApplyOptions& Options, CasStore& CasStore, C
return std::make_unique<UpstreamApplyImpl>(Options, CasStore, CidStore);
}
-std::unique_ptr<UpstreamApplyEndpoint>
-UpstreamApplyEndpoint::CreateHordeEndpoint(const CloudCacheClientOptions& ComputeOptions,
- const UpstreamAuthConfig& ComputeAuthConfig,
- const CloudCacheClientOptions& StorageOptions,
- const UpstreamAuthConfig& StorageAuthConfig,
- CasStore& CasStore,
- CidStore& CidStore,
- AuthMgr& Mgr)
-{
- return std::make_unique<detail::HordeUpstreamApplyEndpoint>(ComputeOptions,
- ComputeAuthConfig,
- StorageOptions,
- StorageAuthConfig,
- CasStore,
- CidStore,
- Mgr);
-}
-
} // namespace zen
#endif // ZEN_WITH_COMPUTE_SERVICES
diff --git a/zenserver/upstream/upstreamapply.h b/zenserver/upstream/upstreamapply.h
index 9cea88c56..c6e38142c 100644
--- a/zenserver/upstream/upstreamapply.h
+++ b/zenserver/upstream/upstreamapply.h
@@ -10,10 +10,8 @@
# include <zencore/stats.h>
# include <zencore/zencore.h>
-# include <atomic>
# include <chrono>
# include <map>
-# include <memory>
# include <unordered_map>
# include <unordered_set>
@@ -44,17 +42,19 @@ enum class UpstreamApplyType
struct UpstreamApplyRecord
{
- CbObject WorkerDescriptor;
- CbObject Action;
- UpstreamApplyType Type;
+ CbObject WorkerDescriptor;
+ CbObject Action;
+ UpstreamApplyType Type;
+ std::map<std::string, uint64_t> Timepoints{};
};
struct UpstreamApplyOptions
{
std::chrono::seconds HealthCheckInterval{5};
std::chrono::seconds UpdatesInterval{5};
- uint32_t ThreadCount = 4;
- bool StatsEnabled = false;
+ uint32_t UpstreamThreadCount = 4;
+ uint32_t DownstreamThreadCount = 4;
+ bool StatsEnabled = false;
};
struct UpstreamApplyError
@@ -67,31 +67,33 @@ struct UpstreamApplyError
struct PostUpstreamApplyResult
{
- UpstreamApplyError Error{};
- int64_t Bytes{};
- double ElapsedSeconds{};
- bool Success = false;
+ UpstreamApplyError Error{};
+ int64_t Bytes{};
+ double ElapsedSeconds{};
+ std::map<std::string, uint64_t> Timepoints{};
+ bool Success = false;
};
struct GetUpstreamApplyResult
{
// UpstreamApplyType::Simple
- std::map<std::filesystem::path, IoHash> OutputFiles;
- std::map<IoHash, IoBuffer> FileData;
+ std::map<std::filesystem::path, IoHash> OutputFiles{};
+ std::map<IoHash, IoBuffer> FileData{};
// UpstreamApplyType::Asset
CbPackage OutputPackage{};
int64_t TotalAttachmentBytes{};
int64_t TotalRawAttachmentBytes{};
- UpstreamApplyError Error{};
- int64_t Bytes{};
- double ElapsedSeconds{};
- std::string StdOut{};
- std::string StdErr{};
- std::string Agent{};
- std::string Detail{};
- bool Success = false;
+ UpstreamApplyError Error{};
+ int64_t Bytes{};
+ double ElapsedSeconds{};
+ std::string StdOut{};
+ std::string StdErr{};
+ std::string Agent{};
+ std::string Detail{};
+ std::map<std::string, uint64_t> Timepoints{};
+ bool Success = false;
};
using UpstreamApplyCompleted = std::unordered_map<IoHash, std::unordered_map<IoHash, GetUpstreamApplyResult>>;
@@ -110,6 +112,7 @@ struct UpstreamApplyStatus
UpstreamApplyState State{};
GetUpstreamApplyResult Result{};
std::chrono::steady_clock::time_point ExpireTime{};
+ std::map<std::string, uint64_t> Timepoints{};
};
using UpstreamApplyTasks = std::unordered_map<IoHash, std::unordered_map<IoHash, UpstreamApplyStatus>>;
diff --git a/zenserver/zenserver.cpp b/zenserver/zenserver.cpp
index 500b57e73..b0598d75b 100644
--- a/zenserver/zenserver.cpp
+++ b/zenserver/zenserver.cpp
@@ -368,6 +368,7 @@ public:
.MaxCacheDuration = std::chrono::seconds(ServerOptions.GcConfig.Cache.MaxDurationSeconds),
.CollectSmallObjects = ServerOptions.GcConfig.CollectSmallObjects,
.Enabled = ServerOptions.GcConfig.Enabled,
+ .DiskReserveSize = ServerOptions.GcConfig.DiskReserveSize,
};
m_GcScheduler.Initialize(GcConfig);
@@ -848,7 +849,7 @@ ZenServer::InitializeCompute(const ZenServerOptions& ServerOptions)
const ZenUpstreamCacheConfig& UpstreamConfig = ServerOptions.UpstreamCacheConfig;
// Horde compute upstream
- if (UpstreamConfig.HordeConfig.Url.empty() == false && UpstreamConfig.JupiterConfig.Url.empty() == false)
+ if (UpstreamConfig.HordeConfig.Url.empty() == false && UpstreamConfig.HordeConfig.StorageUrl.empty() == false)
{
ZEN_INFO("instantiating compute service");
@@ -859,8 +860,7 @@ ZenServer::InitializeCompute(const ZenServerOptions& ServerOptions)
.ServiceUrl = UpstreamConfig.HordeConfig.Url,
.ComputeCluster = UpstreamConfig.HordeConfig.Cluster,
.ConnectTimeout = std::chrono::milliseconds(UpstreamConfig.ConnectTimeoutMilliseconds),
- .Timeout = std::chrono::milliseconds(UpstreamConfig.TimeoutMilliseconds),
- .UseLegacyDdc = false};
+ .Timeout = std::chrono::milliseconds(UpstreamConfig.TimeoutMilliseconds)};
auto ComputeAuthConfig = zen::UpstreamAuthConfig{.OAuthUrl = UpstreamConfig.HordeConfig.OAuthUrl,
.OAuthClientId = UpstreamConfig.HordeConfig.OAuthClientId,
@@ -870,16 +870,16 @@ ZenServer::InitializeCompute(const ZenServerOptions& ServerOptions)
auto StorageOptions =
zen::CloudCacheClientOptions{.Name = EndpointName,
- .ServiceUrl = UpstreamConfig.JupiterConfig.Url,
+ .ServiceUrl = UpstreamConfig.HordeConfig.StorageUrl,
.BlobStoreNamespace = UpstreamConfig.HordeConfig.Namespace,
.ConnectTimeout = std::chrono::milliseconds(UpstreamConfig.ConnectTimeoutMilliseconds),
.Timeout = std::chrono::milliseconds(UpstreamConfig.TimeoutMilliseconds)};
- auto StorageAuthConfig = zen::UpstreamAuthConfig{.OAuthUrl = UpstreamConfig.JupiterConfig.OAuthUrl,
- .OAuthClientId = UpstreamConfig.JupiterConfig.OAuthClientId,
- .OAuthClientSecret = UpstreamConfig.JupiterConfig.OAuthClientSecret,
- .OpenIdProvider = UpstreamConfig.JupiterConfig.OpenIdProvider,
- .AccessToken = UpstreamConfig.JupiterConfig.AccessToken};
+ auto StorageAuthConfig = zen::UpstreamAuthConfig{.OAuthUrl = UpstreamConfig.HordeConfig.StorageOAuthUrl,
+ .OAuthClientId = UpstreamConfig.HordeConfig.StorageOAuthClientId,
+ .OAuthClientSecret = UpstreamConfig.HordeConfig.StorageOAuthClientSecret,
+ .OpenIdProvider = UpstreamConfig.HordeConfig.StorageOpenIdProvider,
+ .AccessToken = UpstreamConfig.HordeConfig.StorageAccessToken};
m_HttpFunctionService = std::make_unique<zen::HttpFunctionService>(*m_CasStore,
*m_CidStore,
@@ -891,7 +891,7 @@ ZenServer::InitializeCompute(const ZenServerOptions& ServerOptions)
}
else
{
- ZEN_INFO("NOT instantiating compute service (missing Horde or Jupiter config)");
+ ZEN_INFO("NOT instantiating compute service (missing Horde or Storage config)");
}
}
#endif // ZEN_WITH_COMPUTE_SERVICES
@@ -1016,6 +1016,8 @@ ZenEntryPoint::Run()
InitializeLogging(ServerOptions);
+ MaximizeOpenFileCount();
+
ZEN_INFO(ZEN_APP_NAME " - using lock file at '{}'", LockFilePath);
ZEN_INFO(ZEN_APP_NAME " - starting on port {}, version '{}'", ServerOptions.BasePort, ZEN_CFG_VERSION_BUILD_STRING_FULL);
@@ -1154,9 +1156,10 @@ test_main(int argc, char** argv)
zen::z$_forcelink();
zen::logging::InitializeLogging();
-
spdlog::set_level(spdlog::level::debug);
+ zen::MaximizeOpenFileCount();
+
return ZEN_RUN_TESTS(argc, argv);
}
#endif
diff --git a/zenstore-test/zenstore-test.cpp b/zenstore-test/zenstore-test.cpp
index 587cf4a05..8b3d6c648 100644
--- a/zenstore-test/zenstore-test.cpp
+++ b/zenstore-test/zenstore-test.cpp
@@ -1,9 +1,16 @@
// Copyright Epic Games, Inc. All Rights Reserved.
+#include <zencore/filesystem.h>
#include <zencore/logging.h>
#include <zencore/zencore.h>
#include <zenstore/zenstore.h>
+#if ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+# include <sys/time.h>
+# include <sys/resource.h>
+# include <zencore/except.h>
+#endif
+
#if ZEN_WITH_TESTS
# define ZEN_TEST_WITH_RUNNER 1
# include <zencore/testing.h>
@@ -16,6 +23,7 @@ main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[])
zen::zenstore_forcelinktests();
zen::logging::InitializeLogging();
+ zen::MaximizeOpenFileCount();
return ZEN_RUN_TESTS(argc, argv);
#else
diff --git a/zenstore/basicfile.cpp b/zenstore/basicfile.cpp
index 895db6cee..8eb172a1c 100644
--- a/zenstore/basicfile.cpp
+++ b/zenstore/basicfile.cpp
@@ -29,10 +29,10 @@ BasicFile::~BasicFile()
}
void
-BasicFile::Open(std::filesystem::path FileName, bool IsCreate)
+BasicFile::Open(const std::filesystem::path& FileName, Mode Mode)
{
std::error_code Ec;
- Open(FileName, IsCreate, Ec);
+ Open(FileName, Mode, Ec);
if (Ec)
{
@@ -41,22 +41,41 @@ BasicFile::Open(std::filesystem::path FileName, bool IsCreate)
}
void
-BasicFile::Open(std::filesystem::path FileName, bool IsCreate, std::error_code& Ec)
+BasicFile::Open(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec)
{
Ec.clear();
#if ZEN_PLATFORM_WINDOWS
- const DWORD dwCreationDisposition = IsCreate ? CREATE_ALWAYS : OPEN_EXISTING;
- DWORD dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
- const DWORD dwShareMode = FILE_SHARE_READ;
- const DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
- HANDLE hTemplateFile = nullptr;
-
- if (IsCreate)
+ DWORD dwCreationDisposition = 0;
+ DWORD dwDesiredAccess = 0;
+ switch (Mode)
{
- dwDesiredAccess |= DELETE;
+ case Mode::kRead:
+ dwCreationDisposition |= OPEN_EXISTING;
+ dwDesiredAccess |= GENERIC_READ;
+ break;
+ case Mode::kWrite:
+ dwCreationDisposition |= OPEN_ALWAYS;
+ dwDesiredAccess |= (GENERIC_READ | GENERIC_WRITE);
+ break;
+ case Mode::kDelete:
+ dwCreationDisposition |= OPEN_ALWAYS;
+ dwDesiredAccess |= (GENERIC_READ | GENERIC_WRITE | DELETE);
+ break;
+ case Mode::kTruncate:
+ dwCreationDisposition |= CREATE_ALWAYS;
+ dwDesiredAccess |= (GENERIC_READ | GENERIC_WRITE);
+ break;
+ case Mode::kTruncateDelete:
+ dwCreationDisposition |= CREATE_ALWAYS;
+ dwDesiredAccess |= (GENERIC_READ | GENERIC_WRITE | DELETE);
+ break;
}
+ const DWORD dwShareMode = FILE_SHARE_READ;
+ const DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
+ HANDLE hTemplateFile = nullptr;
+
HANDLE FileHandle = CreateFile(FileName.c_str(),
dwDesiredAccess,
dwShareMode,
@@ -67,21 +86,34 @@ BasicFile::Open(std::filesystem::path FileName, bool IsCreate, std::error_code&
if (FileHandle == INVALID_HANDLE_VALUE)
{
- Ec = zen::MakeErrorCodeFromLastError();
+ Ec = MakeErrorCodeFromLastError();
return;
}
#else
- int OpenFlags = O_RDWR | O_CLOEXEC;
- OpenFlags |= IsCreate ? O_CREAT | O_TRUNC : 0;
+ int OpenFlags = O_CLOEXEC;
+ switch (Mode)
+ {
+ case Mode::kRead:
+ OpenFlags |= O_RDONLY;
+ break;
+ case Mode::kWrite:
+ case Mode::kDelete:
+ OpenFlags |= (O_RDWR | O_CREAT);
+ break;
+ case Mode::kTruncate:
+ case Mode::kTruncateDelete:
+ OpenFlags |= (O_RDWR | O_CREAT | O_TRUNC);
+ break;
+ }
int Fd = open(FileName.c_str(), OpenFlags, 0666);
if (Fd < 0)
{
- Ec = zen::MakeErrorCodeFromLastError();
+ Ec = MakeErrorCodeFromLastError();
return;
}
- if (IsCreate)
+ if (Mode != Mode::kRead)
{
fchmod(Fd, 0666);
}
@@ -268,7 +300,14 @@ BasicFile::FileSize()
#if ZEN_PLATFORM_WINDOWS
ULARGE_INTEGER liFileSize;
liFileSize.LowPart = ::GetFileSize(m_FileHandle, &liFileSize.HighPart);
-
+ if (liFileSize.LowPart == INVALID_FILE_SIZE)
+ {
+ int Error = zen::GetLastError();
+ if (Error)
+ {
+ ThrowSystemError(Error, fmt::format("Failed to get file size from file '{}'", PathFromHandle(m_FileHandle)));
+ }
+ }
return uint64_t(liFileSize.QuadPart);
#else
int Fd = int(uintptr_t(m_FileHandle));
@@ -279,6 +318,94 @@ BasicFile::FileSize()
#endif
}
+void
+BasicFile::SetFileSize(uint64_t FileSize)
+{
+#if ZEN_PLATFORM_WINDOWS
+ LARGE_INTEGER liFileSize;
+ liFileSize.QuadPart = FileSize;
+ BOOL OK = ::SetFilePointerEx(m_FileHandle, liFileSize, 0, FILE_BEGIN);
+ if (OK == FALSE)
+ {
+ int Error = zen::GetLastError();
+ if (Error)
+ {
+ ThrowSystemError(Error, fmt::format("Failed to set file pointer to {} for file {}", FileSize, PathFromHandle(m_FileHandle)));
+ }
+ }
+ OK = ::SetEndOfFile(m_FileHandle);
+ if (OK == FALSE)
+ {
+ int Error = zen::GetLastError();
+ if (Error)
+ {
+ ThrowSystemError(Error, fmt::format("Failed to set end of file to {} for file {}", FileSize, PathFromHandle(m_FileHandle)));
+ }
+ }
+#elif ZEN_PLATFORM_MAC
+ int Fd = int(intptr_t(m_FileHandle));
+ if (ftruncate(Fd, (off_t)FileSize) < 0)
+ {
+ int Error = zen::GetLastError();
+ if (Error)
+ {
+ ThrowSystemError(Error, fmt::format("Failed to set truncate file to {} for file {}", FileSize, PathFromHandle(m_FileHandle)));
+ }
+ }
+#else
+ int Fd = int(intptr_t(m_FileHandle));
+ if (ftruncate64(Fd, (off64_t)FileSize) < 0)
+ {
+ int Error = zen::GetLastError();
+ if (Error)
+ {
+ ThrowSystemError(Error, fmt::format("Failed to set truncate file to {} for file {}", FileSize, PathFromHandle(m_FileHandle)));
+ }
+ }
+ if (FileSize > 0)
+ {
+ int Error = posix_fallocate64(Fd, 0, (off64_t)FileSize);
+ if (Error)
+ {
+ ThrowSystemError(Error, fmt::format("Failed to allocate space of {} for file {}", FileSize, PathFromHandle(m_FileHandle)));
+ }
+ }
+#endif
+}
+
+void
+BasicFile::MarkAsDeleteOnClose(std::error_code& Ec)
+{
+ Ec.clear();
+#if ZEN_PLATFORM_WINDOWS
+ FILE_DISPOSITION_INFO Fdi{};
+ Fdi.DeleteFile = TRUE;
+ BOOL Success = SetFileInformationByHandle(m_FileHandle, FileDispositionInfo, &Fdi, sizeof Fdi);
+ if (!Success)
+ {
+ Ec = MakeErrorCodeFromLastError();
+ }
+#elif ZEN_PLATFORM_LINUX || ZEN_PLATFORM_MAC
+ std::filesystem::path SourcePath = PathFromHandle(m_FileHandle);
+ if (unlink(SourcePath.c_str()) < 0)
+ {
+ int UnlinkError = zen::GetLastError();
+ if (UnlinkError != ENOENT)
+ {
+ Ec = MakeErrorCode(UnlinkError);
+ }
+ }
+#endif
+}
+
+void*
+BasicFile::Detach()
+{
+ void* FileHandle = m_FileHandle;
+ m_FileHandle = 0;
+ return FileHandle;
+}
+
//////////////////////////////////////////////////////////////////////////
TemporaryFile::~TemporaryFile()
@@ -314,9 +441,7 @@ TemporaryFile::CreateTemporary(std::filesystem::path TempDirName, std::error_cod
m_TempPath = TempDirName / TempName.c_str();
- const bool IsCreate = true;
-
- Open(m_TempPath, IsCreate, Ec);
+ Open(m_TempPath, BasicFile::Mode::kTruncateDelete, Ec);
}
void
@@ -416,8 +541,8 @@ TEST_CASE("BasicFile")
ScopedCurrentDirectoryChange _;
BasicFile File1;
- CHECK_THROWS(File1.Open("zonk", false));
- CHECK_NOTHROW(File1.Open("zonk", true));
+ CHECK_THROWS(File1.Open("zonk", BasicFile::Mode::kRead));
+ CHECK_NOTHROW(File1.Open("zonk", BasicFile::Mode::kTruncate));
CHECK_NOTHROW(File1.Write("abcd", 4, 0));
CHECK(File1.FileSize() == 4);
{
diff --git a/zenstore/blockstore.cpp b/zenstore/blockstore.cpp
new file mode 100644
index 000000000..1eb859d5a
--- /dev/null
+++ b/zenstore/blockstore.cpp
@@ -0,0 +1,242 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#include "compactcas.h"
+
+#include <zenstore/blockstore.h>
+
+#if ZEN_WITH_TESTS
+# include <zencore/compactbinarybuilder.h>
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+# include <algorithm>
+# include <random>
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+
+namespace zen {
+
+//////////////////////////////////////////////////////////////////////////
+
+BlockStoreFile::BlockStoreFile(const std::filesystem::path& BlockPath) : m_Path(BlockPath)
+{
+}
+
+BlockStoreFile::~BlockStoreFile()
+{
+ m_IoBuffer = IoBuffer();
+ m_File.Detach();
+}
+
+const std::filesystem::path&
+BlockStoreFile::GetPath() const
+{
+ return m_Path;
+}
+
+void
+BlockStoreFile::Open()
+{
+ m_File.Open(m_Path, BasicFile::Mode::kDelete);
+ void* FileHandle = m_File.Handle();
+ m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, m_File.FileSize());
+}
+
+void
+BlockStoreFile::Create(uint64_t InitialSize)
+{
+ auto ParentPath = m_Path.parent_path();
+ if (!std::filesystem::is_directory(ParentPath))
+ {
+ CreateDirectories(ParentPath);
+ }
+
+ m_File.Open(m_Path, BasicFile::Mode::kTruncateDelete);
+ if (InitialSize > 0)
+ {
+ m_File.SetFileSize(InitialSize);
+ }
+ void* FileHandle = m_File.Handle();
+ m_IoBuffer = IoBuffer(IoBuffer::File, FileHandle, 0, InitialSize);
+}
+
+uint64_t
+BlockStoreFile::FileSize()
+{
+ return m_File.FileSize();
+}
+
+void
+BlockStoreFile::MarkAsDeleteOnClose(std::error_code& Ec)
+{
+ m_File.MarkAsDeleteOnClose(Ec);
+}
+
+IoBuffer
+BlockStoreFile::GetChunk(uint64_t Offset, uint64_t Size)
+{
+ return IoBuffer(m_IoBuffer, Offset, Size);
+}
+
+void
+BlockStoreFile::Read(void* Data, uint64_t Size, uint64_t FileOffset)
+{
+ m_File.Read(Data, Size, FileOffset);
+}
+
+void
+BlockStoreFile::Write(const void* Data, uint64_t Size, uint64_t FileOffset)
+{
+ m_File.Write(Data, Size, FileOffset);
+}
+
+void
+BlockStoreFile::Truncate(uint64_t Size)
+{
+ m_File.SetFileSize(Size);
+}
+
+void
+BlockStoreFile::Flush()
+{
+ m_File.Flush();
+}
+
+void
+BlockStoreFile::StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun)
+{
+ m_File.StreamByteRange(FileOffset, Size, std::move(ChunkFun));
+}
+
+#if ZEN_WITH_TESTS
+
+static bool
+operator==(const BlockStoreLocation& Lhs, const BlockStoreLocation& Rhs)
+{
+ return Lhs.BlockIndex == Rhs.BlockIndex && Lhs.Offset == Rhs.Offset && Lhs.Size == Rhs.Size;
+}
+
+TEST_CASE("blockstore.blockstoredisklocation")
+{
+ BlockStoreLocation Zero = BlockStoreLocation{.BlockIndex = 0, .Offset = 0, .Size = 0};
+ CHECK(Zero == BlockStoreDiskLocation(Zero, 4).Get(4));
+
+ BlockStoreLocation MaxBlockIndex = BlockStoreLocation{.BlockIndex = BlockStoreDiskLocation::MaxBlockIndex, .Offset = 0, .Size = 0};
+ CHECK(MaxBlockIndex == BlockStoreDiskLocation(MaxBlockIndex, 4).Get(4));
+
+ BlockStoreLocation MaxOffset = BlockStoreLocation{.BlockIndex = 0, .Offset = BlockStoreDiskLocation::MaxOffset * 4, .Size = 0};
+ CHECK(MaxOffset == BlockStoreDiskLocation(MaxOffset, 4).Get(4));
+
+ BlockStoreLocation MaxSize = BlockStoreLocation{.BlockIndex = 0, .Offset = 0, .Size = std::numeric_limits<uint32_t>::max()};
+ CHECK(MaxSize == BlockStoreDiskLocation(MaxSize, 4).Get(4));
+
+ BlockStoreLocation MaxBlockIndexAndOffset =
+ BlockStoreLocation{.BlockIndex = BlockStoreDiskLocation::MaxBlockIndex, .Offset = BlockStoreDiskLocation::MaxOffset * 4, .Size = 0};
+ CHECK(MaxBlockIndexAndOffset == BlockStoreDiskLocation(MaxBlockIndexAndOffset, 4).Get(4));
+
+ BlockStoreLocation MaxAll = BlockStoreLocation{.BlockIndex = BlockStoreDiskLocation::MaxBlockIndex,
+ .Offset = BlockStoreDiskLocation::MaxOffset * 4,
+ .Size = std::numeric_limits<uint32_t>::max()};
+ CHECK(MaxAll == BlockStoreDiskLocation(MaxAll, 4).Get(4));
+
+ BlockStoreLocation MaxAll4096 = BlockStoreLocation{.BlockIndex = BlockStoreDiskLocation::MaxBlockIndex,
+ .Offset = BlockStoreDiskLocation::MaxOffset * 4096,
+ .Size = std::numeric_limits<uint32_t>::max()};
+ CHECK(MaxAll4096 == BlockStoreDiskLocation(MaxAll4096, 4096).Get(4096));
+
+ BlockStoreLocation Middle = BlockStoreLocation{.BlockIndex = (BlockStoreDiskLocation::MaxBlockIndex) / 2,
+ .Offset = ((BlockStoreDiskLocation::MaxOffset) / 2) * 4,
+ .Size = std::numeric_limits<uint32_t>::max() / 2};
+ CHECK(Middle == BlockStoreDiskLocation(Middle, 4).Get(4));
+}
+
+TEST_CASE("blockstore.blockfile")
+{
+ ScopedTemporaryDirectory TempDir;
+ auto RootDirectory = TempDir.Path() / "blocks";
+ CreateDirectories(RootDirectory);
+
+ {
+ BlockStoreFile File1(RootDirectory / "1");
+ File1.Create(16384);
+ CHECK(File1.FileSize() == 16384);
+ File1.Write("data", 5, 0);
+ IoBuffer DataChunk = File1.GetChunk(0, 5);
+ File1.Write("boop", 5, 5);
+ IoBuffer BoopChunk = File1.GetChunk(5, 5);
+ const char* Data = static_cast<const char*>(DataChunk.GetData());
+ CHECK(std::string(Data) == "data");
+ const char* Boop = static_cast<const char*>(BoopChunk.GetData());
+ CHECK(std::string(Boop) == "boop");
+ File1.Flush();
+ }
+ {
+ BlockStoreFile File1(RootDirectory / "1");
+ File1.Open();
+
+ char DataRaw[5];
+ File1.Read(DataRaw, 5, 0);
+ CHECK(std::string(DataRaw) == "data");
+ IoBuffer DataChunk = File1.GetChunk(0, 5);
+
+ char BoopRaw[5];
+ File1.Read(BoopRaw, 5, 5);
+ CHECK(std::string(BoopRaw) == "boop");
+
+ IoBuffer BoopChunk = File1.GetChunk(5, 5);
+ const char* Data = static_cast<const char*>(DataChunk.GetData());
+ CHECK(std::string(Data) == "data");
+ const char* Boop = static_cast<const char*>(BoopChunk.GetData());
+ CHECK(std::string(Boop) == "boop");
+ }
+
+ {
+ IoBuffer DataChunk;
+ IoBuffer BoopChunk;
+
+ {
+ BlockStoreFile File1(RootDirectory / "1");
+ File1.Open();
+ DataChunk = File1.GetChunk(0, 5);
+ BoopChunk = File1.GetChunk(5, 5);
+ }
+
+ CHECK(std::filesystem::exists(RootDirectory / "1"));
+
+ const char* Data = static_cast<const char*>(DataChunk.GetData());
+ CHECK(std::string(Data) == "data");
+ const char* Boop = static_cast<const char*>(BoopChunk.GetData());
+ CHECK(std::string(Boop) == "boop");
+ }
+ CHECK(std::filesystem::exists(RootDirectory / "1"));
+
+ {
+ IoBuffer DataChunk;
+ IoBuffer BoopChunk;
+
+ {
+ BlockStoreFile File1(RootDirectory / "1");
+ File1.Open();
+ std::error_code Ec;
+ File1.MarkAsDeleteOnClose(Ec);
+ CHECK(!Ec);
+ DataChunk = File1.GetChunk(0, 5);
+ BoopChunk = File1.GetChunk(5, 5);
+ }
+
+ const char* Data = static_cast<const char*>(DataChunk.GetData());
+ CHECK(std::string(Data) == "data");
+ const char* Boop = static_cast<const char*>(BoopChunk.GetData());
+ CHECK(std::string(Boop) == "boop");
+ }
+ CHECK(!std::filesystem::exists(RootDirectory / "1"));
+}
+
+#endif
+
+void
+blockstore_forcelink()
+{
+}
+
+} // namespace zen
diff --git a/zenstore/cas.cpp b/zenstore/cas.cpp
index a90e45c04..0e1d5b242 100644
--- a/zenstore/cas.cpp
+++ b/zenstore/cas.cpp
@@ -150,8 +150,8 @@ CasImpl::Initialize(const CasStoreConfiguration& InConfig)
// Initialize payload storage
m_LargeStrategy.Initialize(IsNewStore);
- m_TinyStrategy.Initialize("tobs", 16, IsNewStore);
- m_SmallStrategy.Initialize("sobs", 4096, IsNewStore);
+ m_TinyStrategy.Initialize("tobs", 1u << 28, 16, IsNewStore); // 256 Mb per block
+ m_SmallStrategy.Initialize("sobs", 1u << 30, 4096, IsNewStore); // 1 Gb per block
}
bool
@@ -164,7 +164,7 @@ CasImpl::OpenOrCreateManifest()
std::error_code Ec;
BasicFile ManifestFile;
- ManifestFile.Open(ManifestPath.c_str(), /* IsCreate */ false, Ec);
+ ManifestFile.Open(ManifestPath.c_str(), BasicFile::Mode::kRead, Ec);
bool ManifestIsOk = false;
@@ -236,7 +236,7 @@ CasImpl::UpdateManifest()
ZEN_TRACE("Writing new manifest to '{}'", ManifestPath);
BasicFile Marker;
- Marker.Open(ManifestPath.c_str(), /* IsCreate */ true);
+ Marker.Open(ManifestPath.c_str(), BasicFile::Mode::kTruncate);
Marker.Write(m_ManifestObject.GetBuffer(), 0);
}
diff --git a/zenstore/caslog.cpp b/zenstore/caslog.cpp
index 055e3feda..03a56f010 100644
--- a/zenstore/caslog.cpp
+++ b/zenstore/caslog.cpp
@@ -39,13 +39,23 @@ CasLogFile::~CasLogFile()
}
void
-CasLogFile::Open(std::filesystem::path FileName, size_t RecordSize, bool IsCreate)
+CasLogFile::Open(std::filesystem::path FileName, size_t RecordSize, Mode Mode)
{
m_RecordSize = RecordSize;
std::error_code Ec;
- m_File.Open(FileName, IsCreate, Ec);
+ BasicFile::Mode FileMode = BasicFile::Mode::kRead;
+ switch (Mode)
+ {
+ case Mode::kWrite:
+ FileMode = BasicFile::Mode::kWrite;
+ break;
+ case Mode::kTruncate:
+ FileMode = BasicFile::Mode::kTruncate;
+ break;
+ }
+ m_File.Open(FileName, FileMode, Ec);
if (Ec)
{
throw std::system_error(Ec, fmt::format("Failed to open log file '{}'", FileName));
@@ -53,8 +63,12 @@ CasLogFile::Open(std::filesystem::path FileName, size_t RecordSize, bool IsCreat
uint64_t AppendOffset = 0;
- if (IsCreate || (m_File.FileSize() < sizeof(FileHeader)))
+ if ((Mode == Mode::kTruncate) || (m_File.FileSize() < sizeof(FileHeader)))
{
+ if (Mode == Mode::kRead)
+ {
+ throw std::runtime_error(fmt::format("Mangled log header (file to small) in '{}'", FileName));
+ }
// Initialize log by writing header
FileHeader Header = {.RecordSize = gsl::narrow<uint32_t>(RecordSize), .LogId = Oid::NewOid(), .ValidatedTail = 0};
memcpy(Header.Magic, FileHeader::MagicSequence, sizeof Header.Magic);
@@ -106,20 +120,36 @@ CasLogFile::GetLogSize()
return m_File.FileSize();
}
+uint64_t
+CasLogFile::GetLogCount()
+{
+ uint64_t LogFileSize = m_AppendOffset.load(std::memory_order_acquire);
+ if (LogFileSize < sizeof(FileHeader))
+ {
+ return 0;
+ }
+ const uint64_t LogBaseOffset = sizeof(FileHeader);
+ const size_t LogEntryCount = (LogFileSize - LogBaseOffset) / m_RecordSize;
+ return LogEntryCount;
+}
+
void
-CasLogFile::Replay(std::function<void(const void*)>&& Handler)
+CasLogFile::Replay(std::function<void(const void*)>&& Handler, uint64_t SkipEntryCount)
{
uint64_t LogFileSize = m_File.FileSize();
// Ensure we end up on a clean boundary
- const uint64_t LogBaseOffset = sizeof(FileHeader);
- const size_t LogEntryCount = (LogFileSize - LogBaseOffset) / m_RecordSize;
+ uint64_t LogBaseOffset = sizeof(FileHeader);
+ size_t LogEntryCount = (LogFileSize - LogBaseOffset) / m_RecordSize;
- if (LogEntryCount == 0)
+ if (LogEntryCount <= SkipEntryCount)
{
return;
}
+ LogBaseOffset += SkipEntryCount * m_RecordSize;
+ LogEntryCount -= SkipEntryCount;
+
// This should really be streaming the data rather than just
// reading it into memory, though we don't tend to get very
// large logs so it may not matter
@@ -142,7 +172,7 @@ CasLogFile::Replay(std::function<void(const void*)>&& Handler)
void
CasLogFile::Append(const void* DataPointer, uint64_t DataSize)
{
- ZEN_ASSERT(DataSize == m_RecordSize);
+ ZEN_ASSERT((DataSize % m_RecordSize) == 0);
uint64_t AppendOffset = m_AppendOffset.fetch_add(DataSize);
diff --git a/zenstore/cidstore.cpp b/zenstore/cidstore.cpp
index 8b53a8304..509d21abe 100644
--- a/zenstore/cidstore.cpp
+++ b/zenstore/cidstore.cpp
@@ -127,35 +127,37 @@ struct CidStore::Impl
bool IsNew = !std::filesystem::exists(SlogPath);
- m_LogFile.Open(SlogPath, IsNew);
+ m_LogFile.Open(SlogPath, IsNew ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite);
ZEN_DEBUG("Initializing index from '{}' ({})", SlogPath, NiceBytes(m_LogFile.GetLogSize()));
uint64_t TombstoneCount = 0;
uint64_t InvalidCount = 0;
- m_LogFile.Replay([&](const IndexEntry& Entry) {
- if (Entry.Compressed != IoHash::Zero)
- {
- // Update
- m_CidMap.insert_or_assign(Entry.Uncompressed, Entry.Compressed);
- }
- else
- {
- if (Entry.Uncompressed != IoHash::Zero)
+ m_LogFile.Replay(
+ [&](const IndexEntry& Entry) {
+ if (Entry.Compressed != IoHash::Zero)
{
- // Tombstone
- m_CidMap.erase(Entry.Uncompressed);
- ++TombstoneCount;
+ // Update
+ m_CidMap.insert_or_assign(Entry.Uncompressed, Entry.Compressed);
}
else
{
- // Completely uninitialized entry with both hashes set to zero indicates a
- // problem. Might be an unwritten page due to BSOD or some other problem
- ++InvalidCount;
+ if (Entry.Uncompressed != IoHash::Zero)
+ {
+ // Tombstone
+ m_CidMap.erase(Entry.Uncompressed);
+ ++TombstoneCount;
+ }
+ else
+ {
+ // Completely uninitialized entry with both hashes set to zero indicates a
+ // problem. Might be an unwritten page due to BSOD or some other problem
+ ++InvalidCount;
+ }
}
- }
- });
+ },
+ 0);
ZEN_INFO("CID index initialized: {} entries found ({} tombstones, {} invalid)", m_CidMap.size(), TombstoneCount, InvalidCount);
}
diff --git a/zenstore/compactcas.cpp b/zenstore/compactcas.cpp
index 3bf0c70df..920ed965f 100644
--- a/zenstore/compactcas.cpp
+++ b/zenstore/compactcas.cpp
@@ -1,28 +1,24 @@
// Copyright Epic Games, Inc. All Rights Reserved.
-#include <zenstore/cas.h>
-
#include "compactcas.h"
-#include <zencore/compactbinarybuilder.h>
+#include <zenstore/cas.h>
+
#include <zencore/except.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
-#include <zencore/memory.h>
-#include <zencore/string.h>
-#include <zencore/testing.h>
-#include <zencore/testutils.h>
-#include <zencore/thread.h>
-#include <zencore/uid.h>
-
-#include <zenstore/gc.h>
-
-#include <filesystem>
-#include <functional>
+#include <zencore/scopeguard.h>
#include <gsl/gsl-lite.hpp>
+#include <xxhash.h>
+
#if ZEN_WITH_TESTS
+# include <zencore/compactbinarybuilder.h>
+# include <zencore/testing.h>
+# include <zencore/testutils.h>
+# include <zencore/workthreadpool.h>
+# include <zenstore/cidstore.h>
# include <algorithm>
# include <random>
#endif
@@ -31,6 +27,211 @@
namespace zen {
+struct CasDiskIndexHeader
+{
+ static constexpr uint32_t ExpectedMagic = 0x75696478; // 'uidx';
+ static constexpr uint32_t CurrentVersion = 1;
+
+ uint32_t Magic = ExpectedMagic;
+ uint32_t Version = CurrentVersion;
+ uint64_t EntryCount = 0;
+ uint64_t LogPosition = 0;
+ uint32_t PayloadAlignment = 0;
+ uint32_t Checksum = 0;
+
+ static uint32_t ComputeChecksum(const CasDiskIndexHeader& Header)
+ {
+ return XXH32(&Header.Magic, sizeof(CasDiskIndexHeader) - sizeof(uint32_t), 0xC0C0'BABA);
+ }
+};
+
+static_assert(sizeof(CasDiskIndexHeader) == 32);
+
+namespace {
+ std::vector<CasDiskIndexEntry> MakeCasDiskEntries(const std::unordered_map<IoHash, BlockStoreDiskLocation>& MovedChunks,
+ const std::vector<IoHash>& DeletedChunks)
+ {
+ std::vector<CasDiskIndexEntry> result;
+ result.reserve(MovedChunks.size());
+ for (const auto& MovedEntry : MovedChunks)
+ {
+ result.push_back({.Key = MovedEntry.first, .Location = MovedEntry.second});
+ }
+ for (const IoHash& ChunkHash : DeletedChunks)
+ {
+ result.push_back({.Key = ChunkHash, .Flags = CasDiskIndexEntry::kTombstone});
+ }
+ return result;
+ }
+
+ const char* IndexExtension = ".uidx";
+ const char* LogExtension = ".ulog";
+ const char* DataExtension = ".ucas";
+
+ std::filesystem::path GetBasePath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return RootPath / ContainerBaseName;
+ }
+
+ std::filesystem::path GetIndexPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return GetBasePath(RootPath, ContainerBaseName) / (ContainerBaseName + IndexExtension);
+ }
+
+ std::filesystem::path GetTempIndexPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return GetBasePath(RootPath, ContainerBaseName) / (ContainerBaseName + ".tmp" + LogExtension);
+ }
+
+ std::filesystem::path GetLogPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return GetBasePath(RootPath, ContainerBaseName) / (ContainerBaseName + LogExtension);
+ }
+
+ std::filesystem::path GetBlocksBasePath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return GetBasePath(RootPath, ContainerBaseName) / "blocks";
+ }
+
+ std::filesystem::path GetBlockPath(const std::filesystem::path& BlocksBasePath, const uint32_t BlockIndex)
+ {
+ ExtendablePathBuilder<256> Path;
+
+ char BlockHexString[9];
+ ToHexNumber(BlockIndex, BlockHexString);
+
+ Path.Append(BlocksBasePath);
+ Path.AppendSeparator();
+ Path.AppendAsciiRange(BlockHexString, BlockHexString + 4);
+ Path.AppendSeparator();
+ Path.Append(BlockHexString);
+ Path.Append(DataExtension);
+ return Path.ToPath();
+ }
+
+ std::filesystem::path GetLegacyLogPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return RootPath / (ContainerBaseName + LogExtension);
+ }
+
+ std::filesystem::path GetLegacyDataPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return RootPath / (ContainerBaseName + DataExtension);
+ }
+
+ std::filesystem::path GetLegacyIndexPath(const std::filesystem::path& RootPath, const std::string& ContainerBaseName)
+ {
+ return RootPath / (ContainerBaseName + IndexExtension);
+ }
+
+ struct LegacyCasDiskLocation
+ {
+ LegacyCasDiskLocation(uint64_t InOffset, uint64_t InSize)
+ {
+ ZEN_ASSERT(InOffset <= 0xff'ffff'ffff);
+ ZEN_ASSERT(InSize <= 0xff'ffff'ffff);
+
+ memcpy(&m_Offset[0], &InOffset, sizeof m_Offset);
+ memcpy(&m_Size[0], &InSize, sizeof m_Size);
+ }
+
+ LegacyCasDiskLocation() = default;
+
+ inline uint64_t GetOffset() const
+ {
+ uint64_t Offset = 0;
+ memcpy(&Offset, &m_Offset, sizeof m_Offset);
+ return Offset;
+ }
+
+ inline uint64_t GetSize() const
+ {
+ uint64_t Size = 0;
+ memcpy(&Size, &m_Size, sizeof m_Size);
+ return Size;
+ }
+
+ private:
+ uint8_t m_Offset[5];
+ uint8_t m_Size[5];
+ };
+
+ struct LegacyCasDiskIndexEntry
+ {
+ static const uint8_t kTombstone = 0x01;
+
+ IoHash Key;
+ LegacyCasDiskLocation Location;
+ ZenContentType ContentType = ZenContentType::kUnknownContentType;
+ uint8_t Flags = 0;
+ };
+
+ bool ValidateLegacyEntry(const LegacyCasDiskIndexEntry& Entry, std::string& OutReason)
+ {
+ if (Entry.Key == IoHash::Zero)
+ {
+ OutReason = fmt::format("Invalid hash key {}", Entry.Key.ToHexString());
+ return false;
+ }
+ if ((Entry.Flags & ~LegacyCasDiskIndexEntry::kTombstone) != 0)
+ {
+ OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Flags, Entry.Key.ToHexString());
+ return false;
+ }
+ if (Entry.Flags & LegacyCasDiskIndexEntry::kTombstone)
+ {
+ return true;
+ }
+ if (Entry.ContentType != ZenContentType::kUnknownContentType)
+ {
+ OutReason =
+ fmt::format("Invalid content type {} for entry {}", static_cast<uint8_t>(Entry.ContentType), Entry.Key.ToHexString());
+ return false;
+ }
+ uint64_t Size = Entry.Location.GetSize();
+ if (Size == 0)
+ {
+ OutReason = fmt::format("Invalid size {} for entry {}", Size, Entry.Key.ToHexString());
+ return false;
+ }
+ return true;
+ }
+
+ bool ValidateEntry(const CasDiskIndexEntry& Entry, std::string& OutReason)
+ {
+ if (Entry.Key == IoHash::Zero)
+ {
+ OutReason = fmt::format("Invalid hash key {}", Entry.Key.ToHexString());
+ return false;
+ }
+ if ((Entry.Flags & ~CasDiskIndexEntry::kTombstone) != 0)
+ {
+ OutReason = fmt::format("Invalid flags {} for entry {}", Entry.Flags, Entry.Key.ToHexString());
+ return false;
+ }
+ if (Entry.Flags & CasDiskIndexEntry::kTombstone)
+ {
+ return true;
+ }
+ if (Entry.ContentType != ZenContentType::kUnknownContentType)
+ {
+ OutReason =
+ fmt::format("Invalid content type {} for entry {}", static_cast<uint8_t>(Entry.ContentType), Entry.Key.ToHexString());
+ return false;
+ }
+ uint64_t Size = Entry.Location.GetSize();
+ if (Size == 0)
+ {
+ OutReason = fmt::format("Invalid size {} for entry {}", Size, Entry.Key.ToHexString());
+ return false;
+ }
+ return true;
+ }
+
+} // namespace
+
+//////////////////////////////////////////////////////////////////////////
+
CasContainerStrategy::CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc)
: GcStorage(Gc)
, m_Config(Config)
@@ -43,13 +244,16 @@ CasContainerStrategy::~CasContainerStrategy()
}
void
-CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore)
+CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint32_t MaxBlockSize, uint64_t Alignment, bool IsNewStore)
{
ZEN_ASSERT(IsPow2(Alignment));
ZEN_ASSERT(!m_IsInitialized);
+ ZEN_ASSERT(MaxBlockSize > 0);
m_ContainerBaseName = ContainerBaseName;
m_PayloadAlignment = Alignment;
+ m_MaxBlockSize = MaxBlockSize;
+ m_BlocksBasePath = GetBlocksBasePath(m_Config.RootDirectory, m_ContainerBaseName);
OpenContainer(IsNewStore);
@@ -59,36 +263,79 @@ CasContainerStrategy::Initialize(const std::string_view ContainerBaseName, uint6
CasStore::InsertResult
CasContainerStrategy::InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash)
{
+ uint32_t WriteBlockIndex;
+ Ref<BlockStoreFile> WriteBlock;
+ uint64_t InsertOffset;
{
- RwLock::SharedLockScope _(m_LocationMapLock);
- auto KeyIt = m_LocationMap.find(ChunkHash);
+ RwLock::ExclusiveLockScope _(m_InsertLock);
- if (KeyIt != m_LocationMap.end())
{
- return CasStore::InsertResult{.New = false};
+ RwLock::SharedLockScope __(m_LocationMapLock);
+ if (m_LocationMap.contains(ChunkHash))
+ {
+ return CasStore::InsertResult{.New = false};
+ }
}
- }
-
- // New entry
-
- RwLock::ExclusiveLockScope _(m_InsertLock);
- const uint64_t InsertOffset = m_CurrentInsertOffset;
- m_SmallObjectFile.Write(ChunkData, ChunkSize, InsertOffset);
+ // New entry
- m_CurrentInsertOffset = (m_CurrentInsertOffset + ChunkSize + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
-
- RwLock::ExclusiveLockScope __(m_LocationMapLock);
-
- const CasDiskLocation Location{InsertOffset, ChunkSize};
-
- m_LocationMap[ChunkHash] = Location;
-
- CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location};
+ WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire);
+ bool IsWriting = m_WriteBlock != nullptr;
+ if (!IsWriting || (m_CurrentInsertOffset + ChunkSize) > m_MaxBlockSize)
+ {
+ if (m_WriteBlock)
+ {
+ m_WriteBlock = nullptr;
+ }
+ {
+ RwLock::ExclusiveLockScope __(m_LocationMapLock);
+ if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex)
+ {
+ throw std::runtime_error(
+ fmt::format("unable to allocate a new block in '{}'", m_Config.RootDirectory / m_ContainerBaseName));
+ }
+ WriteBlockIndex += IsWriting ? 1 : 0;
+ while (m_ChunkBlocks.contains(WriteBlockIndex))
+ {
+ WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex;
+ }
+ std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex);
+ m_WriteBlock = new BlockStoreFile(BlockPath);
+ m_ChunkBlocks[WriteBlockIndex] = m_WriteBlock;
+ m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release);
+ }
+ m_CurrentInsertOffset = 0;
+ m_WriteBlock->Create(m_MaxBlockSize);
+ }
+ InsertOffset = m_CurrentInsertOffset;
+ m_CurrentInsertOffset = RoundUp(InsertOffset + ChunkSize, m_PayloadAlignment);
+ WriteBlock = m_WriteBlock;
+ }
- m_TotalSize.fetch_add(static_cast<uint64_t>(ChunkSize));
+ // We can end up in a situation that InsertChunk writes the same chunk data in
+ // different locations.
+ // We release the insert lock once we have the correct WriteBlock ready and we know
+ // where to write the data. If a new InsertChunk request for the same chunk hash/data
+ // comes in before we update m_LocationMap below we will have a race.
+ // The outcome of that is that we will write the chunk data in more than one location
+ // but the chunk hash will only point to one of the chunks.
+ // We will in that case waste space until the next GC operation.
+ //
+ // This should be a rare occasion and the current flow reduces the time we block for
+ // reads, insert and GC.
+
+ BlockStoreDiskLocation Location({.BlockIndex = WriteBlockIndex, .Offset = InsertOffset, .Size = ChunkSize}, m_PayloadAlignment);
+ const CasDiskIndexEntry IndexEntry{.Key = ChunkHash, .Location = Location};
+
+ WriteBlock->Write(ChunkData, ChunkSize, InsertOffset);
m_CasLog.Append(IndexEntry);
+ m_TotalSize.fetch_add(static_cast<uint64_t>(ChunkSize), std::memory_order_seq_cst);
+ {
+ RwLock::ExclusiveLockScope __(m_LocationMapLock);
+ m_LocationMap.emplace(ChunkHash, Location);
+ }
+
return CasStore::InsertResult{.New = true};
}
@@ -101,31 +348,28 @@ CasContainerStrategy::InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash)
IoBuffer
CasContainerStrategy::FindChunk(const IoHash& ChunkHash)
{
- RwLock::SharedLockScope _(m_LocationMapLock);
-
- if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
+ Ref<BlockStoreFile> ChunkBlock;
+ BlockStoreLocation Location;
{
- const CasDiskLocation& Location = KeyIt->second;
-
- return IoBufferBuilder::MakeFromFileHandle(m_SmallObjectFile.Handle(), Location.GetOffset(), Location.GetSize());
+ RwLock::SharedLockScope _(m_LocationMapLock);
+ if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
+ {
+ Location = KeyIt->second.Get(m_PayloadAlignment);
+ ChunkBlock = m_ChunkBlocks[Location.BlockIndex];
+ }
+ else
+ {
+ return IoBuffer();
+ }
}
-
- // Not found
-
- return IoBuffer();
+ return ChunkBlock->GetChunk(Location.Offset, Location.Size);
}
bool
CasContainerStrategy::HaveChunk(const IoHash& ChunkHash)
{
RwLock::SharedLockScope _(m_LocationMapLock);
-
- if (auto KeyIt = m_LocationMap.find(ChunkHash); KeyIt != m_LocationMap.end())
- {
- return true;
- }
-
- return false;
+ return m_LocationMap.contains(ChunkHash);
}
void
@@ -144,20 +388,23 @@ CasContainerStrategy::FilterChunks(CasChunkSet& InOutChunks)
void
CasContainerStrategy::Flush()
{
- m_CasLog.Flush();
- m_SmallObjectIndex.Flush();
- m_SmallObjectFile.Flush();
+ {
+ RwLock::ExclusiveLockScope _(m_InsertLock);
+ if (m_CurrentInsertOffset > 0)
+ {
+ uint32_t WriteBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire);
+ WriteBlockIndex = (WriteBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex;
+ m_WriteBlock = nullptr;
+ m_WriteBlockIndex.store(WriteBlockIndex, std::memory_order_release);
+ m_CurrentInsertOffset = 0;
+ }
+ }
+ MakeIndexSnapshot();
}
void
CasContainerStrategy::Scrub(ScrubContext& Ctx)
{
- const uint64_t WindowSize = 4 * 1024 * 1024;
- uint64_t WindowStart = 0;
- uint64_t WindowEnd = WindowSize;
- const uint64_t FileSize = m_SmallObjectFile.FileSize();
-
- std::vector<CasDiskIndexEntry> BigChunks;
std::vector<CasDiskIndexEntry> BadChunks;
// We do a read sweep through the payloads file and validate
@@ -166,62 +413,73 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
// pass. An alternative strategy would be to use memory mapping.
{
- IoBuffer ReadBuffer{WindowSize};
- void* BufferBase = ReadBuffer.MutableData();
+ std::vector<CasDiskIndexEntry> BigChunks;
+ const uint64_t WindowSize = 4 * 1024 * 1024;
+ IoBuffer ReadBuffer{WindowSize};
+ void* BufferBase = ReadBuffer.MutableData();
- RwLock::SharedLockScope _(m_LocationMapLock);
+ RwLock::SharedLockScope _(m_InsertLock); // TODO: Refactor so we don't have to keep m_InsertLock all the time?
+ RwLock::SharedLockScope __(m_LocationMapLock);
- do
+ for (const auto& Block : m_ChunkBlocks)
{
- const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart);
- m_SmallObjectFile.Read(BufferBase, ChunkSize, WindowStart);
+ uint64_t WindowStart = 0;
+ uint64_t WindowEnd = WindowSize;
+ const Ref<BlockStoreFile>& BlockFile = Block.second;
+ BlockFile->Open();
+ const uint64_t FileSize = BlockFile->FileSize();
- for (auto& Entry : m_LocationMap)
+ do
{
- const uint64_t EntryOffset = Entry.second.GetOffset();
+ const uint64_t ChunkSize = Min(WindowSize, FileSize - WindowStart);
+ BlockFile->Read(BufferBase, ChunkSize, WindowStart);
- if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
+ for (auto& Entry : m_LocationMap)
{
- const uint64_t EntryEnd = EntryOffset + Entry.second.GetSize();
+ const BlockStoreLocation Location = Entry.second.Get(m_PayloadAlignment);
+ const uint64_t EntryOffset = Location.Offset;
- if (EntryEnd >= WindowEnd)
+ if ((EntryOffset >= WindowStart) && (EntryOffset < WindowEnd))
{
- BigChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+ const uint64_t EntryEnd = EntryOffset + Location.Size;
- continue;
- }
+ if (EntryEnd >= WindowEnd)
+ {
+ BigChunks.push_back({.Key = Entry.first, .Location = Entry.second});
- const IoHash ComputedHash =
- IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Entry.second.GetOffset() - WindowStart,
- Entry.second.GetSize());
+ continue;
+ }
- if (Entry.first != ComputedHash)
- {
- // Hash mismatch
+ const IoHash ComputedHash =
+ IoHash::HashBuffer(reinterpret_cast<uint8_t*>(BufferBase) + Location.Offset - WindowStart, Location.Size);
- BadChunks.push_back({.Key = Entry.first, .Location = Entry.second});
+ if (Entry.first != ComputedHash)
+ {
+ // Hash mismatch
+ BadChunks.push_back({.Key = Entry.first, .Location = Entry.second, .Flags = CasDiskIndexEntry::kTombstone});
+ }
}
}
- }
- WindowStart += WindowSize;
- WindowEnd += WindowSize;
- } while (WindowStart < FileSize);
- }
-
- // Deal with large chunks
+ WindowStart += WindowSize;
+ WindowEnd += WindowSize;
+ } while (WindowStart < FileSize);
+ }
- for (const CasDiskIndexEntry& Entry : BigChunks)
- {
- IoHashStream Hasher;
- m_SmallObjectFile.StreamByteRange(Entry.Location.GetOffset(), Entry.Location.GetSize(), [&](const void* Data, uint64_t Size) {
- Hasher.Append(Data, Size);
- });
- IoHash ComputedHash = Hasher.GetHash();
+ // Deal with large chunks
- if (Entry.Key != ComputedHash)
+ for (const CasDiskIndexEntry& Entry : BigChunks)
{
- BadChunks.push_back(Entry);
+ IoHashStream Hasher;
+ const BlockStoreLocation Location = Entry.Location.Get(m_PayloadAlignment);
+ const Ref<BlockStoreFile>& BlockFile = m_ChunkBlocks[Location.BlockIndex];
+ BlockFile->StreamByteRange(Location.Offset, Location.Size, [&](const void* Data, uint64_t Size) { Hasher.Append(Data, Size); });
+ IoHash ComputedHash = Hasher.GetHash();
+
+ if (Entry.Key != ComputedHash)
+ {
+ BadChunks.push_back({.Key = Entry.Key, .Location = Entry.Location, .Flags = CasDiskIndexEntry::kTombstone});
+ }
}
}
@@ -230,17 +488,21 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
return;
}
- ZEN_ERROR("Scrubbing found {} bad chunks in '{}'", BadChunks.size(), m_ContainerBaseName);
+ ZEN_ERROR("Scrubbing found {} bad chunks in '{}'", BadChunks.size(), m_Config.RootDirectory / m_ContainerBaseName);
// Deal with bad chunks by removing them from our lookup map
std::vector<IoHash> BadChunkHashes;
+ BadChunkHashes.reserve(BadChunks.size());
- for (const CasDiskIndexEntry& Entry : BadChunks)
+ m_CasLog.Append(BadChunks);
{
- BadChunkHashes.push_back(Entry.Key);
- m_CasLog.Append({.Key = Entry.Key, .Location = Entry.Location, .Flags = CasDiskIndexEntry::kTombstone});
- m_LocationMap.erase(Entry.Key);
+ RwLock::ExclusiveLockScope _(m_LocationMapLock);
+ for (const CasDiskIndexEntry& Entry : BadChunks)
+ {
+ BadChunkHashes.push_back(Entry.Key);
+ m_LocationMap.erase(Entry.Key);
+ }
}
// Let whomever it concerns know about the bad chunks. This could
@@ -253,243 +515,1106 @@ CasContainerStrategy::Scrub(ScrubContext& Ctx)
void
CasContainerStrategy::CollectGarbage(GcContext& GcCtx)
{
- namespace fs = std::filesystem;
-
- // A naive garbage collection implementation that just copies evicted chunks
- // into a new container file. We probably need to partition the container file
- // into several parts to prevent needing to keep the entire container file during GC.
+ // It collects all the blocks that we want to delete chunks from. For each such
+ // block we keep a list of chunks to retain and a list of chunks to delete.
+ //
+ // If there is a block that we are currently writing to, that block is omitted
+ // from the garbage collection.
+ //
+ // Next it will iterate over all blocks that we want to remove chunks from.
+ // If the block is empty after removal of chunks we mark the block as pending
+ // delete - we want to delete it as soon as there are no IoBuffers using the
+ // block file.
+ // Once complete we update the m_LocationMap by removing the chunks.
+ //
+ // If the block is non-empty we write out the chunks we want to keep to a new
+ // block file (creating new block files as needed).
+ //
+ // We update the index as we complete each new block file. This makes it possible
+ // to break the GC if we want to limit time for execution.
+ //
+ // GC can fairly parallell to regular operation - it will block while taking
+ // a snapshot of the current m_LocationMap state.
+ //
+ // While moving blocks it will do a blocking operation and update the m_LocationMap
+ // after each new block is written and figuring out the path to the next new block.
ZEN_INFO("collecting garbage from '{}'", m_Config.RootDirectory / m_ContainerBaseName);
+ uint64_t WriteBlockTimeUs = 0;
+ uint64_t WriteBlockLongestTimeUs = 0;
+ uint64_t ReadBlockTimeUs = 0;
+ uint64_t ReadBlockLongestTimeUs = 0;
+ uint64_t TotalChunkCount = 0;
+ uint64_t DeletedSize = 0;
+ uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed);
+
+ std::vector<IoHash> DeletedChunks;
+ uint64_t MovedCount = 0;
+
+ Stopwatch TotalTimer;
+ const auto _ = MakeGuard([this,
+ &TotalTimer,
+ &WriteBlockTimeUs,
+ &WriteBlockLongestTimeUs,
+ &ReadBlockTimeUs,
+ &ReadBlockLongestTimeUs,
+ &TotalChunkCount,
+ &DeletedChunks,
+ &MovedCount,
+ &DeletedSize,
+ OldTotalSize] {
+ ZEN_INFO(
+ "garbage collect for '{}' DONE after {}, write lock: {} ({}), read lock: {} ({}), collected {} bytes, deleted #{} and moved "
+ "#{} "
+ "of #{} "
+ "chunks ({}).",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()),
+ NiceLatencyNs(WriteBlockTimeUs),
+ NiceLatencyNs(WriteBlockLongestTimeUs),
+ NiceLatencyNs(ReadBlockTimeUs),
+ NiceLatencyNs(ReadBlockLongestTimeUs),
+ NiceBytes(DeletedSize),
+ DeletedChunks.size(),
+ MovedCount,
+ TotalChunkCount,
+ NiceBytes(OldTotalSize));
+ });
+
+ LocationMap_t LocationMap;
+ size_t BlockCount;
+ uint64_t ExcludeBlockIndex = 0x800000000ull;
+ {
+ RwLock::SharedLockScope __(m_InsertLock);
+ RwLock::SharedLockScope ___(m_LocationMapLock);
+ {
+ Stopwatch Timer;
+ const auto ____ = MakeGuard([&Timer, &WriteBlockTimeUs, &WriteBlockLongestTimeUs] {
+ uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
+ WriteBlockTimeUs += ElapsedUs;
+ WriteBlockLongestTimeUs = std::max(ElapsedUs, WriteBlockLongestTimeUs);
+ });
+ if (m_WriteBlock)
+ {
+ ExcludeBlockIndex = m_WriteBlockIndex.load(std::memory_order_acquire);
+ }
+ __.ReleaseNow();
+ }
+ LocationMap = m_LocationMap;
+ BlockCount = m_ChunkBlocks.size();
+ }
- RwLock::ExclusiveLockScope _(m_LocationMapLock);
+ if (LocationMap.empty())
+ {
+ ZEN_INFO("garbage collect SKIPPED, for '{}', container is empty", m_Config.RootDirectory / m_ContainerBaseName);
+ return;
+ }
- Flush();
+ TotalChunkCount = LocationMap.size();
- std::vector<IoHash> Candidates;
- std::vector<IoHash> ChunksToKeep;
- std::vector<IoHash> ChunksToDelete;
- const uint64_t ChunkCount = m_LocationMap.size();
- uint64_t TotalSize{};
+ std::unordered_map<uint32_t, size_t> BlockIndexToChunkMapIndex;
+ std::vector<std::vector<IoHash>> KeepChunks;
+ std::vector<std::vector<IoHash>> DeleteChunks;
- Candidates.reserve(m_LocationMap.size());
+ BlockIndexToChunkMapIndex.reserve(BlockCount);
+ KeepChunks.reserve(BlockCount);
+ DeleteChunks.reserve(BlockCount);
+ size_t GuesstimateCountPerBlock = TotalChunkCount / BlockCount / 2;
- for (auto& Entry : m_LocationMap)
+ std::vector<IoHash> TotalChunkHashes;
+ TotalChunkHashes.reserve(TotalChunkCount);
+ for (const auto& Entry : LocationMap)
{
- Candidates.push_back(Entry.first);
- TotalSize += Entry.second.GetSize();
+ TotalChunkHashes.push_back(Entry.first);
}
- ChunksToKeep.reserve(Candidates.size());
- GcCtx.FilterCas(Candidates, [&ChunksToKeep, &ChunksToDelete](const IoHash& Hash, bool Keep) {
+ uint64_t DeleteCount = 0;
+
+ uint64_t NewTotalSize = 0;
+ GcCtx.FilterCas(TotalChunkHashes, [&](const IoHash& ChunkHash, bool Keep) {
+ auto KeyIt = LocationMap.find(ChunkHash);
+ const BlockStoreDiskLocation& Location = KeyIt->second;
+ uint32_t BlockIndex = Location.GetBlockIndex();
+
+ if (static_cast<uint64_t>(BlockIndex) == ExcludeBlockIndex)
+ {
+ return;
+ }
+
+ auto BlockIndexPtr = BlockIndexToChunkMapIndex.find(BlockIndex);
+ size_t ChunkMapIndex = 0;
+ if (BlockIndexPtr == BlockIndexToChunkMapIndex.end())
+ {
+ ChunkMapIndex = KeepChunks.size();
+ BlockIndexToChunkMapIndex[BlockIndex] = ChunkMapIndex;
+ KeepChunks.resize(ChunkMapIndex + 1);
+ KeepChunks.back().reserve(GuesstimateCountPerBlock);
+ DeleteChunks.resize(ChunkMapIndex + 1);
+ DeleteChunks.back().reserve(GuesstimateCountPerBlock);
+ }
+ else
+ {
+ ChunkMapIndex = BlockIndexPtr->second;
+ }
if (Keep)
{
- ChunksToKeep.push_back(Hash);
+ std::vector<IoHash>& ChunkMap = KeepChunks[ChunkMapIndex];
+ ChunkMap.push_back(ChunkHash);
+ NewTotalSize += Location.GetSize();
}
else
{
- ChunksToDelete.push_back(Hash);
+ std::vector<IoHash>& ChunkMap = DeleteChunks[ChunkMapIndex];
+ ChunkMap.push_back(ChunkHash);
+ DeleteCount++;
}
});
- if (m_LocationMap.empty() || ChunksToKeep.size() == m_LocationMap.size())
+ std::unordered_set<uint32_t> BlocksToReWrite;
+ BlocksToReWrite.reserve(BlockIndexToChunkMapIndex.size());
+ for (const auto& Entry : BlockIndexToChunkMapIndex)
+ {
+ uint32_t BlockIndex = Entry.first;
+ size_t ChunkMapIndex = Entry.second;
+ const std::vector<IoHash>& ChunkMap = DeleteChunks[ChunkMapIndex];
+ if (ChunkMap.empty())
+ {
+ continue;
+ }
+ BlocksToReWrite.insert(BlockIndex);
+ }
+
+ const bool PerformDelete = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
+ if (!PerformDelete)
{
- ZEN_INFO("garbage collect DONE, scanned #{} {} chunks from '{}', nothing to delete",
- ChunkCount,
- NiceBytes(TotalSize),
- m_Config.RootDirectory / m_ContainerBaseName);
+ uint64_t TotalSize = m_TotalSize.load(std::memory_order_relaxed);
+ ZEN_INFO("garbage collect for '{}' DISABLED, found #{} {} chunks of total #{} {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ DeleteCount,
+ NiceBytes(TotalSize - NewTotalSize),
+ TotalChunkCount,
+ NiceBytes(TotalSize));
return;
}
- const uint64_t NewChunkCount = ChunksToKeep.size();
- uint64_t NewTotalSize = 0;
+ // Move all chunks in blocks that have chunks removed to new blocks
- for (const IoHash& Key : ChunksToKeep)
+ Ref<BlockStoreFile> NewBlockFile;
+ uint64_t WriteOffset = 0;
+ uint32_t NewBlockIndex = 0;
+ DeletedChunks.reserve(DeleteCount);
+
+ auto UpdateLocations = [this](const std::span<CasDiskIndexEntry>& Entries) {
+ for (const CasDiskIndexEntry& Entry : Entries)
+ {
+ if (Entry.Flags & CasDiskIndexEntry::kTombstone)
+ {
+ auto KeyIt = m_LocationMap.find(Entry.Key);
+ uint64_t ChunkSize = KeyIt->second.GetSize();
+ m_TotalSize.fetch_sub(ChunkSize);
+ m_LocationMap.erase(KeyIt);
+ continue;
+ }
+ m_LocationMap[Entry.Key] = Entry.Location;
+ }
+ };
+
+ std::unordered_map<IoHash, BlockStoreDiskLocation> MovedBlockChunks;
+ for (uint32_t BlockIndex : BlocksToReWrite)
{
- const CasDiskLocation& Loc = m_LocationMap[Key];
- NewTotalSize += Loc.GetSize();
+ const size_t ChunkMapIndex = BlockIndexToChunkMapIndex[BlockIndex];
+
+ Ref<BlockStoreFile> OldBlockFile;
+ {
+ RwLock::SharedLockScope _i(m_LocationMapLock);
+ OldBlockFile = m_ChunkBlocks[BlockIndex];
+ }
+
+ const std::vector<IoHash>& KeepMap = KeepChunks[ChunkMapIndex];
+ if (KeepMap.empty())
+ {
+ const std::vector<IoHash>& DeleteMap = DeleteChunks[ChunkMapIndex];
+ std::vector<CasDiskIndexEntry> LogEntries = MakeCasDiskEntries({}, DeleteMap);
+ m_CasLog.Append(LogEntries);
+ m_CasLog.Flush();
+ {
+ RwLock::ExclusiveLockScope _i(m_LocationMapLock);
+ Stopwatch Timer;
+ const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] {
+ uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
+ ReadBlockTimeUs += ElapsedUs;
+ ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
+ });
+ UpdateLocations(LogEntries);
+ m_ChunkBlocks[BlockIndex] = nullptr;
+ }
+ DeletedChunks.insert(DeletedChunks.end(), DeleteMap.begin(), DeleteMap.end());
+ ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'",
+ m_ContainerBaseName,
+ BlockIndex,
+ OldBlockFile->GetPath());
+ std::error_code Ec;
+ OldBlockFile->MarkAsDeleteOnClose(Ec);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message());
+ }
+ continue;
+ }
+
+ std::vector<uint8_t> Chunk;
+ for (const IoHash& ChunkHash : KeepMap)
+ {
+ auto KeyIt = LocationMap.find(ChunkHash);
+ const BlockStoreLocation ChunkLocation = KeyIt->second.Get(m_PayloadAlignment);
+ Chunk.resize(ChunkLocation.Size);
+ OldBlockFile->Read(Chunk.data(), Chunk.size(), ChunkLocation.Offset);
+
+ if (!NewBlockFile || (WriteOffset + Chunk.size() > m_MaxBlockSize))
+ {
+ uint32_t NextBlockIndex = m_WriteBlockIndex.load(std::memory_order_relaxed);
+ std::vector<CasDiskIndexEntry> LogEntries = MakeCasDiskEntries(MovedBlockChunks, {});
+ m_CasLog.Append(LogEntries);
+ m_CasLog.Flush();
+
+ if (NewBlockFile)
+ {
+ NewBlockFile->Truncate(WriteOffset);
+ NewBlockFile->Flush();
+ }
+ {
+ RwLock::ExclusiveLockScope __(m_LocationMapLock);
+ Stopwatch Timer;
+ const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] {
+ uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
+ ReadBlockTimeUs += ElapsedUs;
+ ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
+ });
+ UpdateLocations(LogEntries);
+ if (m_ChunkBlocks.size() == BlockStoreDiskLocation::MaxBlockIndex)
+ {
+ ZEN_ERROR("unable to allocate a new block in '{}', count limit {} exeeded",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ static_cast<uint64_t>(std::numeric_limits<uint32_t>::max()) + 1);
+ return;
+ }
+ while (m_ChunkBlocks.contains(NextBlockIndex))
+ {
+ NextBlockIndex = (NextBlockIndex + 1) & BlockStoreDiskLocation::MaxBlockIndex;
+ }
+ std::filesystem::path NewBlockPath = GetBlockPath(m_BlocksBasePath, NextBlockIndex);
+ NewBlockFile = new BlockStoreFile(NewBlockPath);
+ m_ChunkBlocks[NextBlockIndex] = NewBlockFile;
+ }
+
+ MovedCount += MovedBlockChunks.size();
+ MovedBlockChunks.clear();
+
+ std::error_code Error;
+ DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error);
+ if (Error)
+ {
+ ZEN_ERROR("get disk space in '{}' FAILED, reason: '{}'", m_Config.RootDirectory, Error.message());
+ return;
+ }
+ if (Space.Free < m_MaxBlockSize)
+ {
+ uint64_t ReclaimedSpace = GcCtx.ClaimGCReserve();
+ if (Space.Free + ReclaimedSpace < m_MaxBlockSize)
+ {
+ ZEN_WARN("garbage collect for '{}' FAILED, required disk space {}, free {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ m_MaxBlockSize,
+ NiceBytes(Space.Free + ReclaimedSpace));
+ RwLock::ExclusiveLockScope _l(m_LocationMapLock);
+ Stopwatch Timer;
+ const auto __ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] {
+ uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
+ ReadBlockTimeUs += ElapsedUs;
+ ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
+ });
+ m_ChunkBlocks.erase(NextBlockIndex);
+ return;
+ }
+
+ ZEN_INFO("using gc reserve for '{}', reclaimed {}, disk free {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ ReclaimedSpace,
+ NiceBytes(Space.Free + ReclaimedSpace));
+ }
+ NewBlockFile->Create(m_MaxBlockSize);
+ NewBlockIndex = NextBlockIndex;
+ WriteOffset = 0;
+ }
+
+ NewBlockFile->Write(Chunk.data(), Chunk.size(), WriteOffset);
+ MovedBlockChunks.emplace(
+ ChunkHash,
+ BlockStoreDiskLocation({.BlockIndex = NewBlockIndex, .Offset = WriteOffset, .Size = Chunk.size()}, m_PayloadAlignment));
+ WriteOffset = RoundUp(WriteOffset + Chunk.size(), m_PayloadAlignment);
+ }
+ Chunk.clear();
+ if (NewBlockFile)
+ {
+ NewBlockFile->Truncate(WriteOffset);
+ NewBlockFile->Flush();
+ NewBlockFile = {};
+ }
+
+ const std::vector<IoHash>& DeleteMap = DeleteChunks[ChunkMapIndex];
+ std::vector<CasDiskIndexEntry> LogEntries = MakeCasDiskEntries(MovedBlockChunks, DeleteMap);
+ m_CasLog.Append(LogEntries);
+ m_CasLog.Flush();
+ {
+ RwLock::ExclusiveLockScope __(m_LocationMapLock);
+ Stopwatch Timer;
+ const auto ___ = MakeGuard([&Timer, &ReadBlockTimeUs, &ReadBlockLongestTimeUs] {
+ uint64_t ElapsedUs = Timer.GetElapsedTimeUs();
+ ReadBlockTimeUs += ElapsedUs;
+ ReadBlockLongestTimeUs = std::max(ElapsedUs, ReadBlockLongestTimeUs);
+ });
+ UpdateLocations(LogEntries);
+ m_ChunkBlocks[BlockIndex] = nullptr;
+ }
+ MovedCount += MovedBlockChunks.size();
+ DeletedChunks.insert(DeletedChunks.end(), DeleteMap.begin(), DeleteMap.end());
+ MovedBlockChunks.clear();
+
+ ZEN_DEBUG("marking cas store file in '{}' for delete , block #{}, '{}'", m_ContainerBaseName, BlockIndex, OldBlockFile->GetPath());
+ std::error_code Ec;
+ OldBlockFile->MarkAsDeleteOnClose(Ec);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to flag file '{}' for deletion: '{}'", OldBlockFile->GetPath(), Ec.message());
+ }
+ OldBlockFile = nullptr;
}
- std::error_code Error;
- DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error);
- if (Error)
+ for (const IoHash& ChunkHash : DeletedChunks)
{
- ZEN_ERROR("get disk space FAILED, reason '{}'", Error.message());
- return;
+ DeletedSize += LocationMap[ChunkHash].GetSize();
}
- if (Space.Free < NewTotalSize + (64 << 20))
- {
- ZEN_INFO("garbage collect from '{}' FAILED, required disk space {}, free {}",
+ GcCtx.DeletedCas(DeletedChunks);
+}
+
+void
+CasContainerStrategy::MakeIndexSnapshot()
+{
+ ZEN_INFO("write store snapshot for '{}'", m_Config.RootDirectory / m_ContainerBaseName);
+ uint64_t EntryCount = 0;
+ Stopwatch Timer;
+ const auto _ = MakeGuard([this, &EntryCount, &Timer] {
+ ZEN_INFO("wrote store snapshot for '{}' containing #{} entries in {}",
m_Config.RootDirectory / m_ContainerBaseName,
- NiceBytes(NewTotalSize),
- NiceBytes(Space.Free));
- return;
- }
+ EntryCount,
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ namespace fs = std::filesystem;
- const bool CollectSmallObjects = GcCtx.IsDeletionMode() && GcCtx.CollectSmallObjects();
+ fs::path IndexPath = GetIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
+ fs::path TempIndexPath = GetTempIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
- if (!CollectSmallObjects)
+ // Move index away, we keep it if something goes wrong
+ if (fs::is_regular_file(TempIndexPath))
{
- ZEN_INFO("garbage collect from '{}' DISABLED, found #{} {} chunks of total #{} {}",
- m_Config.RootDirectory / m_ContainerBaseName,
- ChunkCount - NewChunkCount,
- NiceBytes(TotalSize - NewTotalSize),
- ChunkCount,
- NiceBytes(TotalSize));
- return;
+ fs::remove(TempIndexPath);
+ }
+ if (fs::is_regular_file(IndexPath))
+ {
+ fs::rename(IndexPath, TempIndexPath);
}
- fs::path TmpSobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ucas");
- fs::path TmpSlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".gc.ulog");
-
+ try
{
- ZEN_DEBUG("creating temporary container cas '{}'...", TmpSobsPath);
+ m_CasLog.Flush();
- TCasLogFile<CasDiskIndexEntry> TmpLog;
- BasicFile TmpObjectFile;
- bool IsNew = true;
+ // Write the current state of the location map to a new index state
+ uint64_t LogCount = 0;
+ std::vector<CasDiskIndexEntry> Entries;
- TmpLog.Open(TmpSlogPath, IsNew);
- TmpObjectFile.Open(TmpSobsPath, IsNew);
+ {
+ RwLock::SharedLockScope __(m_InsertLock);
+ RwLock::SharedLockScope ___(m_LocationMapLock);
+ Entries.resize(m_LocationMap.size());
- std::vector<uint8_t> Chunk;
- uint64_t NextInsertOffset{};
+ uint64_t EntryIndex = 0;
+ for (auto& Entry : m_LocationMap)
+ {
+ CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++];
+ IndexEntry.Key = Entry.first;
+ IndexEntry.Location = Entry.second;
+ }
- for (const IoHash& Key : ChunksToKeep)
- {
- const auto Entry = m_LocationMap.find(Key);
- const auto& Loc = Entry->second;
+ LogCount = m_CasLog.GetLogCount();
+ }
- Chunk.resize(Loc.GetSize());
- m_SmallObjectFile.Read(Chunk.data(), Chunk.size(), Loc.GetOffset());
+ BasicFile ObjectIndexFile;
+ ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kTruncate);
+ CasDiskIndexHeader Header = {.EntryCount = Entries.size(),
+ .LogPosition = LogCount,
+ .PayloadAlignment = gsl::narrow<uint32_t>(m_PayloadAlignment)};
- const uint64_t InsertOffset = NextInsertOffset;
- TmpObjectFile.Write(Chunk.data(), Chunk.size(), InsertOffset);
- TmpLog.Append({.Key = Key, .Location = {InsertOffset, Chunk.size()}});
+ Header.Checksum = CasDiskIndexHeader::ComputeChecksum(Header);
- NextInsertOffset = (NextInsertOffset + Chunk.size() + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
+ ObjectIndexFile.Write(&Header, sizeof(CasDiskIndexEntry), 0);
+ ObjectIndexFile.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexEntry));
+ ObjectIndexFile.Flush();
+ ObjectIndexFile.Close();
+ EntryCount = Entries.size();
+ }
+ catch (std::exception& Err)
+ {
+ ZEN_ERROR("snapshot FAILED, reason: '{}'", Err.what());
+
+ // Restore any previous snapshot
+
+ if (fs::is_regular_file(TempIndexPath))
+ {
+ fs::remove(IndexPath);
+ fs::rename(TempIndexPath, IndexPath);
}
}
+ if (fs::is_regular_file(TempIndexPath))
+ {
+ fs::remove(TempIndexPath);
+ }
+}
- try
+uint64_t
+CasContainerStrategy::ReadIndexFile()
+{
+ std::vector<CasDiskIndexEntry> Entries;
+ std::filesystem::path IndexPath = GetIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
+ if (std::filesystem::is_regular_file(IndexPath))
{
- CloseContainer();
+ Stopwatch Timer;
+ const auto _ = MakeGuard([this, &Entries, &Timer] {
+ ZEN_INFO("read store '{}' index containing #{} entries in {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ Entries.size(),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
- fs::path SobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas");
- fs::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx");
- fs::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog");
+ BasicFile ObjectIndexFile;
+ ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kRead);
+ uint64_t Size = ObjectIndexFile.FileSize();
+ if (Size >= sizeof(CasDiskIndexHeader))
+ {
+ uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry);
+ CasDiskIndexHeader Header;
+ ObjectIndexFile.Read(&Header, sizeof(Header), 0);
+ if ((Header.Magic == CasDiskIndexHeader::ExpectedMagic) && (Header.Version == CasDiskIndexHeader::CurrentVersion) &&
+ (Header.Checksum == CasDiskIndexHeader::ComputeChecksum(Header)) && (Header.PayloadAlignment > 0) &&
+ (Header.EntryCount <= ExpectedEntryCount))
+ {
+ Entries.resize(Header.EntryCount);
+ ObjectIndexFile.Read(Entries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
+ m_PayloadAlignment = Header.PayloadAlignment;
- fs::remove(SobsPath);
- fs::remove(SidxPath);
- fs::remove(SlogPath);
+ std::string InvalidEntryReason;
+ for (const CasDiskIndexEntry& Entry : Entries)
+ {
+ if (!ValidateEntry(Entry, InvalidEntryReason))
+ {
+ ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", IndexPath, InvalidEntryReason);
+ continue;
+ }
+ m_LocationMap[Entry.Key] = Entry.Location;
+ }
- fs::rename(TmpSobsPath, SobsPath);
- fs::rename(TmpSlogPath, SlogPath);
+ return Header.LogPosition;
+ }
+ else
+ {
+ ZEN_WARN("skipping invalid index file '{}'", IndexPath);
+ }
+ }
+ }
+ return 0;
+}
+uint64_t
+CasContainerStrategy::ReadLog(uint64_t SkipEntryCount)
+{
+ std::vector<CasDiskIndexEntry> Entries;
+ std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName);
+ if (std::filesystem::is_regular_file(LogPath))
+ {
+ Stopwatch Timer;
+ const auto _ = MakeGuard([this, &Entries, &Timer] {
+ ZEN_INFO("read store '{}' log containing #{} entries in {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ Entries.size(),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+
+ TCasLogFile<CasDiskIndexEntry> CasLog;
+ CasLog.Open(LogPath, CasLogFile::Mode::kRead);
+ if (CasLog.Initialize())
{
- // Create a new empty index file
- BasicFile SidxFile;
- SidxFile.Open(SidxPath, true);
+ uint64_t EntryCount = CasLog.GetLogCount();
+ if (EntryCount < SkipEntryCount)
+ {
+ ZEN_WARN("reading full log at '{}', reason: Log position from index snapshot is out of range", LogPath);
+ SkipEntryCount = 0;
+ }
+ uint64_t ReadCount = EntryCount - SkipEntryCount;
+ Entries.reserve(ReadCount);
+ CasLog.Replay(
+ [&](const CasDiskIndexEntry& Record) {
+ std::string InvalidEntryReason;
+ if (Record.Flags & CasDiskIndexEntry::kTombstone)
+ {
+ m_LocationMap.erase(Record.Key);
+ return;
+ }
+ if (!ValidateEntry(Record, InvalidEntryReason))
+ {
+ ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LogPath, InvalidEntryReason);
+ return;
+ }
+ m_LocationMap[Record.Key] = Record.Location;
+ },
+ SkipEntryCount);
+ return ReadCount;
}
+ }
+ return 0;
+}
+
+uint64_t
+CasContainerStrategy::MigrateLegacyData(bool CleanSource)
+{
+ std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName);
+
+ if (!std::filesystem::is_regular_file(LegacyLogPath) || std::filesystem::file_size(LegacyLogPath) == 0)
+ {
+ return 0;
+ }
- OpenContainer(false /* IsNewStore */);
+ ZEN_INFO("migrating store '{}'", m_Config.RootDirectory / m_ContainerBaseName);
- GcCtx.DeletedCas(ChunksToDelete);
+ std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path LegacyIndexPath = GetLegacyIndexPath(m_Config.RootDirectory, m_ContainerBaseName);
- ZEN_INFO("garbage collect from '{}' DONE, collected #{} {} chunks of total #{} {}",
+ uint64_t MigratedChunkCount = 0;
+ uint32_t MigratedBlockCount = 0;
+ Stopwatch MigrationTimer;
+ uint64_t TotalSize = 0;
+ const auto _ = MakeGuard([this, &MigrationTimer, &MigratedChunkCount, &MigratedBlockCount, &TotalSize] {
+ ZEN_INFO("migrated store '{}' to #{} chunks in #{} blocks in {} ({})",
m_Config.RootDirectory / m_ContainerBaseName,
- ChunkCount - NewChunkCount,
- NiceBytes(TotalSize - NewTotalSize),
- ChunkCount,
+ MigratedChunkCount,
+ MigratedBlockCount,
+ NiceTimeSpanMs(MigrationTimer.GetElapsedTimeMs()),
NiceBytes(TotalSize));
+ });
+
+ uint32_t WriteBlockIndex = 0;
+ while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex)))
+ {
+ ++WriteBlockIndex;
}
- catch (std::exception& Err)
+
+ std::error_code Error;
+ DiskSpace Space = DiskSpaceInfo(m_Config.RootDirectory, Error);
+ if (Error)
+ {
+ ZEN_ERROR("get disk space in {} FAILED, reason: '{}'", m_Config.RootDirectory, Error.message());
+ return 0;
+ }
+
+ if (Space.Free < m_MaxBlockSize)
{
- ZEN_ERROR("garbage collection FAILED, reason '{}'", Err.what());
+ ZEN_ERROR("legacy store migration from '{}' FAILED, required disk space {}, free {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ m_MaxBlockSize,
+ NiceBytes(Space.Free));
+ return 0;
+ }
+
+ BasicFile BlockFile;
+ BlockFile.Open(LegacyDataPath, CleanSource ? BasicFile::Mode::kWrite : BasicFile::Mode::kRead);
- // Something went wrong, try create a new container
- OpenContainer(true /* IsNewStore */);
+ std::unordered_map<IoHash, LegacyCasDiskIndexEntry, IoHash::Hasher> LegacyDiskIndex;
+ uint64_t InvalidEntryCount = 0;
+
+ TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
+ LegacyCasLog.Open(LegacyLogPath, CleanSource ? CasLogFile::Mode::kWrite : CasLogFile::Mode::kRead);
+ {
+ Stopwatch Timer;
+ const auto __ = MakeGuard([this, &LegacyDiskIndex, &Timer] {
+ ZEN_INFO("read store '{}' legacy log containing #{} entries in {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ LegacyDiskIndex.size(),
+ NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ });
+ if (LegacyCasLog.Initialize())
+ {
+ LegacyDiskIndex.reserve(LegacyCasLog.GetLogCount());
+ LegacyCasLog.Replay(
+ [&](const LegacyCasDiskIndexEntry& Record) {
+ std::string InvalidEntryReason;
+ if (Record.Flags & LegacyCasDiskIndexEntry::kTombstone)
+ {
+ LegacyDiskIndex.erase(Record.Key);
+ return;
+ }
+ if (!ValidateLegacyEntry(Record, InvalidEntryReason))
+ {
+ ZEN_WARN("skipping invalid entry in '{}', reason: '{}'", LegacyLogPath, InvalidEntryReason);
+ InvalidEntryCount++;
+ return;
+ }
+ LegacyDiskIndex.insert_or_assign(Record.Key, Record);
+ },
+ 0);
- GcCtx.DeletedCas(ChunksToDelete);
- GcCtx.DeletedCas(ChunksToKeep);
+ std::vector<IoHash> BadEntries;
+ uint64_t BlockFileSize = BlockFile.FileSize();
+ for (const auto& Entry : LegacyDiskIndex)
+ {
+ const LegacyCasDiskIndexEntry& Record(Entry.second);
+ if (Record.Location.GetOffset() + Record.Location.GetSize() <= BlockFileSize)
+ {
+ continue;
+ }
+ ZEN_WARN("skipping invalid entry in '{}', reason: location is outside of file", LegacyLogPath);
+ BadEntries.push_back(Entry.first);
+ }
+ for (const IoHash& BadHash : BadEntries)
+ {
+ LegacyDiskIndex.erase(BadHash);
+ }
+ InvalidEntryCount += BadEntries.size();
+ }
}
-}
-void
-CasContainerStrategy::MakeSnapshot()
-{
- RwLock::SharedLockScope _(m_LocationMapLock);
+ if (InvalidEntryCount)
+ {
+ ZEN_WARN("found #{} invalid entries in '{}'", InvalidEntryCount, m_Config.RootDirectory / m_ContainerBaseName);
+ }
- std::vector<CasDiskIndexEntry> Entries{m_LocationMap.size()};
+ if (LegacyDiskIndex.empty())
+ {
+ BlockFile.Close();
+ LegacyCasLog.Close();
+ if (CleanSource)
+ {
+ // Older versions of CasContainerStrategy expects the legacy files to exist if it can find
+ // a CAS manifest and crashes on startup if they don't.
+ // In order to not break startup when switching back an older version, lets just reset
+ // the legacy data files to zero length.
+
+ BasicFile LegacyLog;
+ LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate);
+ BasicFile LegacySobs;
+ LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate);
+ BasicFile LegacySidx;
+ LegacySidx.Open(LegacyIndexPath, BasicFile::Mode::kTruncate);
+ }
+ return 0;
+ }
- uint64_t EntryIndex = 0;
- for (auto& Entry : m_LocationMap)
+ for (const auto& Entry : LegacyDiskIndex)
{
- CasDiskIndexEntry& IndexEntry = Entries[EntryIndex++];
- IndexEntry.Key = Entry.first;
- IndexEntry.Location = Entry.second;
+ const LegacyCasDiskIndexEntry& Record(Entry.second);
+ TotalSize += Record.Location.GetSize();
}
- m_SmallObjectIndex.Write(Entries.data(), Entries.size() * sizeof(CasDiskIndexEntry), 0);
-}
+ uint64_t RequiredDiskSpace = TotalSize + ((m_PayloadAlignment - 1) * LegacyDiskIndex.size());
+ uint64_t MaxRequiredBlockCount = RoundUp(RequiredDiskSpace, m_MaxBlockSize) / m_MaxBlockSize;
+ if (MaxRequiredBlockCount > BlockStoreDiskLocation::MaxBlockIndex)
+ {
+ ZEN_ERROR("legacy store migration from '{}' FAILED, required block count {}, possible {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ MaxRequiredBlockCount,
+ BlockStoreDiskLocation::MaxBlockIndex);
+ return 0;
+ }
-void
-CasContainerStrategy::OpenContainer(bool IsNewStore)
-{
- std::filesystem::path SobsPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ucas");
- std::filesystem::path SidxPath = m_Config.RootDirectory / (m_ContainerBaseName + ".uidx");
- std::filesystem::path SlogPath = m_Config.RootDirectory / (m_ContainerBaseName + ".ulog");
+ constexpr const uint64_t DiskReserve = 1ul << 28;
- m_SmallObjectFile.Open(SobsPath, IsNewStore);
- m_SmallObjectIndex.Open(SidxPath, IsNewStore);
- m_CasLog.Open(SlogPath, IsNewStore);
+ if (CleanSource)
+ {
+ if (Space.Free < (m_MaxBlockSize + DiskReserve))
+ {
+ ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ NiceBytes(m_MaxBlockSize + DiskReserve),
+ NiceBytes(Space.Free));
+ return 0;
+ }
+ }
+ else
+ {
+ if (Space.Free < (RequiredDiskSpace + DiskReserve))
+ {
+ ZEN_INFO("legacy store migration from '{}' aborted, not enough disk space available {} ({})",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ NiceBytes(RequiredDiskSpace + DiskReserve),
+ NiceBytes(Space.Free));
+ return 0;
+ }
+ }
- // TODO: should validate integrity of container files here
+ std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName);
+ CreateDirectories(LogPath.parent_path());
+ TCasLogFile<CasDiskIndexEntry> CasLog;
+ CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
- m_CurrentInsertOffset = 0;
- m_CurrentIndexOffset = 0;
- m_TotalSize = 0;
+ if (CleanSource && (MaxRequiredBlockCount < 2))
+ {
+ std::vector<CasDiskIndexEntry> LogEntries;
+ LogEntries.reserve(LegacyDiskIndex.size());
- m_LocationMap.clear();
+ // We can use the block as is, just move it and add the blocks to our new log
+ for (auto& Entry : LegacyDiskIndex)
+ {
+ const LegacyCasDiskIndexEntry& Record(Entry.second);
- uint64_t MaxFileOffset = 0;
+ BlockStoreLocation NewChunkLocation{WriteBlockIndex, Record.Location.GetOffset(), Record.Location.GetSize()};
+ BlockStoreDiskLocation NewLocation(NewChunkLocation, m_PayloadAlignment);
+ LogEntries.push_back(
+ {.Key = Entry.second.Key, .Location = NewLocation, .ContentType = Record.ContentType, .Flags = Record.Flags});
+ }
+ std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, WriteBlockIndex);
+ CreateDirectories(BlockPath.parent_path());
+ BlockFile.Close();
+ std::filesystem::rename(LegacyDataPath, BlockPath);
+ CasLog.Append(LogEntries);
+ for (const CasDiskIndexEntry& Entry : LogEntries)
+ {
+ m_LocationMap.insert_or_assign(Entry.Key, Entry.Location);
+ }
- m_CasLog.Replay([&](const CasDiskIndexEntry& Record) {
- if (Record.Flags & CasDiskIndexEntry::kTombstone)
+ MigratedChunkCount += LogEntries.size();
+ MigratedBlockCount++;
+ }
+ else
+ {
+ std::vector<IoHash> ChunkHashes;
+ ChunkHashes.reserve(LegacyDiskIndex.size());
+ for (const auto& Entry : LegacyDiskIndex)
{
- m_TotalSize.fetch_sub(Record.Location.GetSize());
+ ChunkHashes.push_back(Entry.first);
}
- else
+
+ std::sort(begin(ChunkHashes), end(ChunkHashes), [&](IoHash Lhs, IoHash Rhs) {
+ auto LhsKeyIt = LegacyDiskIndex.find(Lhs);
+ auto RhsKeyIt = LegacyDiskIndex.find(Rhs);
+ return LhsKeyIt->second.Location.GetOffset() < RhsKeyIt->second.Location.GetOffset();
+ });
+
+ uint64_t BlockSize = 0;
+ uint64_t BlockOffset = 0;
+ std::vector<BlockStoreLocation> NewLocations;
+ struct BlockData
{
- m_TotalSize.fetch_add(Record.Location.GetSize());
- m_LocationMap[Record.Key] = Record.Location;
- MaxFileOffset = std::max<uint64_t>(MaxFileOffset, Record.Location.GetOffset() + Record.Location.GetSize());
+ std::vector<std::pair<IoHash, BlockStoreLocation>> Chunks;
+ uint64_t BlockOffset;
+ uint64_t BlockSize;
+ uint32_t BlockIndex;
+ };
+
+ std::vector<BlockData> BlockRanges;
+ std::vector<std::pair<IoHash, BlockStoreLocation>> Chunks;
+ BlockRanges.reserve(MaxRequiredBlockCount);
+ for (const IoHash& ChunkHash : ChunkHashes)
+ {
+ const LegacyCasDiskIndexEntry& LegacyEntry = LegacyDiskIndex[ChunkHash];
+ const LegacyCasDiskLocation& LegacyChunkLocation = LegacyEntry.Location;
+
+ uint64_t ChunkOffset = LegacyChunkLocation.GetOffset();
+ uint64_t ChunkSize = LegacyChunkLocation.GetSize();
+ uint64_t ChunkEnd = ChunkOffset + ChunkSize;
+
+ if (BlockSize == 0)
+ {
+ BlockOffset = ChunkOffset;
+ }
+ if ((ChunkEnd - BlockOffset) > m_MaxBlockSize)
+ {
+ BlockData BlockRange{.BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex};
+ BlockRange.Chunks.swap(Chunks);
+ BlockRanges.push_back(BlockRange);
+
+ WriteBlockIndex++;
+ while (std::filesystem::exists(GetBlockPath(m_BlocksBasePath, WriteBlockIndex)))
+ {
+ ++WriteBlockIndex;
+ }
+ BlockOffset = ChunkOffset;
+ BlockSize = 0;
+ }
+ BlockSize = RoundUp(BlockSize, m_PayloadAlignment);
+ BlockStoreLocation ChunkLocation = {.BlockIndex = WriteBlockIndex, .Offset = ChunkOffset - BlockOffset, .Size = ChunkSize};
+ Chunks.push_back({ChunkHash, ChunkLocation});
+ BlockSize = ChunkEnd - BlockOffset;
}
- });
+ if (BlockSize > 0)
+ {
+ BlockRanges.push_back(
+ {.Chunks = std::move(Chunks), .BlockOffset = BlockOffset, .BlockSize = BlockSize, .BlockIndex = WriteBlockIndex});
+ }
+ Stopwatch WriteBlockTimer;
+
+ std::reverse(BlockRanges.begin(), BlockRanges.end());
+ std::vector<std::uint8_t> Buffer(1 << 28);
+ for (size_t Idx = 0; Idx < BlockRanges.size(); ++Idx)
+ {
+ const BlockData& BlockRange = BlockRanges[Idx];
+ if (Idx > 0)
+ {
+ uint64_t Remaining = BlockRange.BlockOffset + BlockRange.BlockSize;
+ uint64_t Completed = BlockOffset + BlockSize - Remaining;
+ uint64_t ETA = (WriteBlockTimer.GetElapsedTimeMs() * Remaining) / Completed;
+
+ ZEN_INFO("migrating store '{}' {}/{} blocks, remaining {} ({}) ETA: {}",
+ m_Config.RootDirectory / m_ContainerBaseName,
+ Idx,
+ BlockRanges.size(),
+ NiceBytes(BlockRange.BlockOffset + BlockRange.BlockSize),
+ NiceBytes(BlockOffset + BlockSize),
+ NiceTimeSpanMs(ETA));
+ }
+
+ std::filesystem::path BlockPath = GetBlockPath(m_BlocksBasePath, BlockRange.BlockIndex);
+ BlockStoreFile ChunkBlock(BlockPath);
+ ChunkBlock.Create(BlockRange.BlockSize);
+ uint64_t Offset = 0;
+ while (Offset < BlockRange.BlockSize)
+ {
+ uint64_t Size = BlockRange.BlockSize - Offset;
+ if (Size > Buffer.size())
+ {
+ Size = Buffer.size();
+ }
+ BlockFile.Read(Buffer.data(), Size, BlockRange.BlockOffset + Offset);
+ ChunkBlock.Write(Buffer.data(), Size, Offset);
+ Offset += Size;
+ }
+ ChunkBlock.Truncate(Offset);
+ ChunkBlock.Flush();
- m_CurrentInsertOffset = (MaxFileOffset + m_PayloadAlignment - 1) & ~(m_PayloadAlignment - 1);
- m_CurrentIndexOffset = m_SmallObjectIndex.FileSize();
+ std::vector<CasDiskIndexEntry> LogEntries;
+ LogEntries.reserve(BlockRange.Chunks.size());
+ for (const auto& Entry : BlockRange.Chunks)
+ {
+ const LegacyCasDiskIndexEntry& LegacyEntry = LegacyDiskIndex[Entry.first];
+ BlockStoreDiskLocation Location(Entry.second, m_PayloadAlignment);
+ LogEntries.push_back(
+ {.Key = Entry.first, .Location = Location, .ContentType = LegacyEntry.ContentType, .Flags = LegacyEntry.Flags});
+ }
+ CasLog.Append(LogEntries);
+ for (const CasDiskIndexEntry& Entry : LogEntries)
+ {
+ m_LocationMap.insert_or_assign(Entry.Key, Entry.Location);
+ }
+ MigratedChunkCount += LogEntries.size();
+ MigratedBlockCount++;
+
+ if (CleanSource)
+ {
+ std::vector<LegacyCasDiskIndexEntry> LegacyLogEntries;
+ LegacyLogEntries.reserve(BlockRange.Chunks.size());
+ for (const auto& Entry : BlockRange.Chunks)
+ {
+ LegacyLogEntries.push_back({.Key = Entry.first, .Flags = LegacyCasDiskIndexEntry::kTombstone});
+ }
+ LegacyCasLog.Append(LegacyLogEntries);
+ BlockFile.SetFileSize(BlockRange.BlockOffset);
+ }
+ }
+ }
+
+ BlockFile.Close();
+ LegacyCasLog.Close();
+ CasLog.Close();
+
+ if (CleanSource)
+ {
+ // Older versions of CasContainerStrategy expects the legacy files to exist if it can find
+ // a CAS manifest and crashes on startup if they don't.
+ // In order to not break startup when switching back an older version, lets just reset
+ // the legacy data files to zero length.
+
+ BasicFile LegacyLog;
+ LegacyLog.Open(LegacyLogPath, BasicFile::Mode::kTruncate);
+ BasicFile LegacySobs;
+ LegacySobs.Open(LegacyDataPath, BasicFile::Mode::kTruncate);
+ BasicFile LegacySidx;
+ LegacySidx.Open(LegacyIndexPath, BasicFile::Mode::kTruncate);
+ }
+ return MigratedChunkCount;
}
void
-CasContainerStrategy::CloseContainer()
+CasContainerStrategy::OpenContainer(bool IsNewStore)
{
- m_SmallObjectFile.Close();
- m_SmallObjectIndex.Close();
- m_CasLog.Close();
+ // Add .running file and delete on clean on close to detect bad termination
+ m_TotalSize = 0;
+
+ m_LocationMap.clear();
+
+ std::filesystem::path BasePath = GetBasePath(m_Config.RootDirectory, m_ContainerBaseName);
+
+ if (IsNewStore)
+ {
+ std::filesystem::path LegacyDataPath = GetLegacyDataPath(m_Config.RootDirectory, m_ContainerBaseName);
+ std::filesystem::path LegacyLogPath = GetLegacyLogPath(m_Config.RootDirectory, m_ContainerBaseName);
+
+ std::filesystem::remove(LegacyLogPath);
+ std::filesystem::remove(LegacyDataPath);
+ std::filesystem::remove_all(BasePath);
+ }
+
+ uint64_t LogPosition = ReadIndexFile();
+ uint64_t LogEntryCount = ReadLog(LogPosition);
+ uint64_t LegacyLogEntryCount = MigrateLegacyData(true);
+
+ CreateDirectories(BasePath);
+
+ std::filesystem::path LogPath = GetLogPath(m_Config.RootDirectory, m_ContainerBaseName);
+ m_CasLog.Open(LogPath, CasLogFile::Mode::kWrite);
+
+ std::unordered_set<uint32_t> KnownBlocks;
+ for (const auto& Entry : m_LocationMap)
+ {
+ const BlockStoreDiskLocation& Location = Entry.second;
+ m_TotalSize.fetch_add(Location.GetSize(), std::memory_order_seq_cst);
+ KnownBlocks.insert(Location.GetBlockIndex());
+ }
+
+ if (std::filesystem::is_directory(m_BlocksBasePath))
+ {
+ std::vector<std::filesystem::path> FoldersToScan;
+ FoldersToScan.push_back(m_BlocksBasePath);
+ size_t FolderOffset = 0;
+ while (FolderOffset < FoldersToScan.size())
+ {
+ for (const std::filesystem::directory_entry& Entry : std::filesystem::directory_iterator(FoldersToScan[FolderOffset]))
+ {
+ if (Entry.is_directory())
+ {
+ FoldersToScan.push_back(Entry.path());
+ continue;
+ }
+ if (Entry.is_regular_file())
+ {
+ const std::filesystem::path Path = Entry.path();
+ if (Path.extension() != DataExtension)
+ {
+ continue;
+ }
+ std::string FileName = Path.stem().string();
+ uint32_t BlockIndex;
+ bool OK = ParseHexNumber(FileName, BlockIndex);
+ if (!OK)
+ {
+ continue;
+ }
+ if (!KnownBlocks.contains(BlockIndex))
+ {
+ // Log removing unreferenced block
+ // Clear out unused blocks
+ ZEN_INFO("removing unused block for '{}' at '{}'", m_ContainerBaseName, Path);
+ std::error_code Ec;
+ std::filesystem::remove(Path, Ec);
+ if (Ec)
+ {
+ ZEN_WARN("Failed to delete file '{}' reason: '{}'", Path, Ec.message());
+ }
+ continue;
+ }
+ Ref<BlockStoreFile> BlockFile = new BlockStoreFile(Path);
+ BlockFile->Open();
+ m_ChunkBlocks[BlockIndex] = BlockFile;
+ }
+ }
+ ++FolderOffset;
+ }
+ }
+ else
+ {
+ CreateDirectories(m_BlocksBasePath);
+ }
+
+ if (IsNewStore || ((LogEntryCount + LegacyLogEntryCount) > 0))
+ {
+ MakeIndexSnapshot();
+ }
+
+ // TODO: should validate integrity of container files here
}
//////////////////////////////////////////////////////////////////////////
#if ZEN_WITH_TESTS
-TEST_CASE("cas.compact.gc")
+namespace {
+ static IoBuffer CreateChunk(uint64_t Size)
+ {
+ static std::random_device rd;
+ static std::mt19937 g(rd());
+
+ std::vector<uint8_t> Values;
+ Values.resize(Size);
+ for (size_t Idx = 0; Idx < Size; ++Idx)
+ {
+ Values[Idx] = static_cast<uint8_t>(Idx);
+ }
+ std::shuffle(Values.begin(), Values.end(), g);
+
+ return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size());
+ }
+} // namespace
+
+TEST_CASE("compactcas.hex")
+{
+ uint32_t Value;
+ std::string HexString;
+ CHECK(!ParseHexNumber("", Value));
+ char Hex[9];
+
+ ToHexNumber(0u, Hex);
+ HexString = std::string(Hex);
+ CHECK(ParseHexNumber(HexString, Value));
+ CHECK(Value == 0u);
+
+ ToHexNumber(std::numeric_limits<std::uint32_t>::max(), Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "ffffffff");
+ CHECK(ParseHexNumber(HexString, Value));
+ CHECK(Value == std::numeric_limits<std::uint32_t>::max());
+
+ ToHexNumber(0xadf14711u, Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "adf14711");
+ CHECK(ParseHexNumber(HexString, Value));
+ CHECK(Value == 0xadf14711u);
+
+ ToHexNumber(0x80000000u, Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "80000000");
+ CHECK(ParseHexNumber(HexString, Value));
+ CHECK(Value == 0x80000000u);
+
+ ToHexNumber(0x718293a4u, Hex);
+ HexString = std::string(Hex);
+ CHECK(HexString == "718293a4");
+ CHECK(ParseHexNumber(HexString, Value));
+ CHECK(Value == 0x718293a4u);
+}
+
+TEST_CASE("compactcas.compact.gc")
{
ScopedTemporaryDirectory TempDir;
CasStoreConfiguration CasConfig;
CasConfig.RootDirectory = TempDir.Path();
-
CreateDirectories(CasConfig.RootDirectory);
const int kIterationCount = 1000;
@@ -499,7 +1624,7 @@ TEST_CASE("cas.compact.gc")
{
CasGc Gc;
CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 16, true);
+ Cas.Initialize("test", 65536, 16, true);
for (int i = 0; i < kIterationCount; ++i)
{
@@ -533,7 +1658,7 @@ TEST_CASE("cas.compact.gc")
{
CasGc Gc;
CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 16, false);
+ Cas.Initialize("test", 65536, 16, false);
for (int i = 0; i < kIterationCount; ++i)
{
@@ -545,67 +1670,854 @@ TEST_CASE("cas.compact.gc")
CHECK_EQ(Value["id"].AsInt32(), i);
}
-
- GcContext Ctx;
- Cas.CollectGarbage(Ctx);
}
}
-TEST_CASE("cas.compact.totalsize")
+TEST_CASE("compactcas.compact.totalsize")
{
std::random_device rd;
std::mt19937 g(rd());
- const auto CreateChunk = [&](uint64_t Size) -> IoBuffer {
- const size_t Count = static_cast<size_t>(Size / sizeof(uint32_t));
- std::vector<uint32_t> Values;
- Values.resize(Count);
- for (size_t Idx = 0; Idx < Count; ++Idx)
+ // for (uint32_t i = 0; i < 100; ++i)
+ {
+ ScopedTemporaryDirectory TempDir;
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
+
+ CreateDirectories(CasConfig.RootDirectory);
+
+ const uint64_t kChunkSize = 1024;
+ const int32_t kChunkCount = 16;
+
{
- Values[Idx] = static_cast<uint32_t>(Idx);
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", 65536, 16, true);
+
+ for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
+ {
+ IoBuffer Chunk = CreateChunk(kChunkSize);
+ const IoHash Hash = HashBuffer(Chunk);
+ CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, Hash);
+ ZEN_ASSERT(InsertResult.New);
+ }
+
+ const uint64_t TotalSize = Cas.StorageSize().DiskSize;
+ CHECK_EQ(kChunkSize * kChunkCount, TotalSize);
}
- std::shuffle(Values.begin(), Values.end(), g);
- return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size() * sizeof(uint32_t));
- };
+ {
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", 65536, 16, false);
+
+ const uint64_t TotalSize = Cas.StorageSize().DiskSize;
+ CHECK_EQ(kChunkSize * kChunkCount, TotalSize);
+ }
+
+ // Re-open again, this time we should have a snapshot
+ {
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", 65536, 16, false);
+
+ const uint64_t TotalSize = Cas.StorageSize().DiskSize;
+ CHECK_EQ(kChunkSize * kChunkCount, TotalSize);
+ }
+ }
+}
+TEST_CASE("compactcas.gc.basic")
+{
ScopedTemporaryDirectory TempDir;
CasStoreConfiguration CasConfig;
CasConfig.RootDirectory = TempDir.Path();
+ CreateDirectories(CasConfig.RootDirectory);
+
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("cb", 65536, 1 << 4, true);
+
+ IoBuffer Chunk = CreateChunk(128);
+ IoHash ChunkHash = IoHash::HashBuffer(Chunk);
+
+ const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash);
+ CHECK(InsertResult.New);
+ Cas.Flush();
+
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+
+ Cas.CollectGarbage(GcCtx);
+ CHECK(!Cas.HaveChunk(ChunkHash));
+}
+
+TEST_CASE("compactcas.gc.removefile")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
CreateDirectories(CasConfig.RootDirectory);
- const uint64_t kChunkSize = 1024;
- const int32_t kChunkCount = 16;
+ IoBuffer Chunk = CreateChunk(128);
+ IoHash ChunkHash = IoHash::HashBuffer(Chunk);
+ {
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("cb", 65536, 1 << 4, true);
+
+ const CasStore::InsertResult InsertResult = Cas.InsertChunk(Chunk, ChunkHash);
+ CHECK(InsertResult.New);
+ const CasStore::InsertResult InsertResultDup = Cas.InsertChunk(Chunk, ChunkHash);
+ CHECK(!InsertResultDup.New);
+ Cas.Flush();
+ }
+
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("cb", 65536, 1 << 4, false);
+
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+
+ Cas.CollectGarbage(GcCtx);
+
+ CHECK(!Cas.HaveChunk(ChunkHash));
+}
+TEST_CASE("compactcas.gc.compact")
+{
+ // for (uint32_t i = 0; i < 100; ++i)
{
+ ScopedTemporaryDirectory TempDir;
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
+ CreateDirectories(CasConfig.RootDirectory);
+
CasGc Gc;
CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 16, true);
+ Cas.Initialize("cb", 2048, 1 << 4, true);
- for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
+ uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5};
+ std::vector<IoBuffer> Chunks;
+ Chunks.reserve(9);
+ for (uint64_t Size : ChunkSizes)
{
- IoBuffer Chunk = CreateChunk(kChunkSize);
- const IoHash Hash = HashBuffer(Chunk);
- auto InsertResult = Cas.InsertChunk(Chunk, Hash);
- ZEN_ASSERT(InsertResult.New);
+ Chunks.push_back(CreateChunk(Size));
}
- const uint64_t TotalSize = Cas.StorageSize().DiskSize;
- CHECK_EQ(kChunkSize * kChunkCount, TotalSize);
+ std::vector<IoHash> ChunkHashes;
+ ChunkHashes.reserve(9);
+ for (const IoBuffer& Chunk : Chunks)
+ {
+ ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
+ }
+
+ CHECK(Cas.InsertChunk(Chunks[0], ChunkHashes[0]).New);
+ CHECK(Cas.InsertChunk(Chunks[1], ChunkHashes[1]).New);
+ CHECK(Cas.InsertChunk(Chunks[2], ChunkHashes[2]).New);
+ CHECK(Cas.InsertChunk(Chunks[3], ChunkHashes[3]).New);
+ CHECK(Cas.InsertChunk(Chunks[4], ChunkHashes[4]).New);
+ CHECK(Cas.InsertChunk(Chunks[5], ChunkHashes[5]).New);
+ CHECK(Cas.InsertChunk(Chunks[6], ChunkHashes[6]).New);
+ CHECK(Cas.InsertChunk(Chunks[7], ChunkHashes[7]).New);
+ CHECK(Cas.InsertChunk(Chunks[8], ChunkHashes[8]).New);
+
+ CHECK(Cas.HaveChunk(ChunkHashes[0]));
+ CHECK(Cas.HaveChunk(ChunkHashes[1]));
+ CHECK(Cas.HaveChunk(ChunkHashes[2]));
+ CHECK(Cas.HaveChunk(ChunkHashes[3]));
+ CHECK(Cas.HaveChunk(ChunkHashes[4]));
+ CHECK(Cas.HaveChunk(ChunkHashes[5]));
+ CHECK(Cas.HaveChunk(ChunkHashes[6]));
+ CHECK(Cas.HaveChunk(ChunkHashes[7]));
+ CHECK(Cas.HaveChunk(ChunkHashes[8]));
+
+ uint64_t InitialSize = Cas.StorageSize().DiskSize;
+
+ // Keep first and last
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[0]);
+ KeepChunks.push_back(ChunkHashes[8]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ Cas.Flush();
+ Cas.CollectGarbage(GcCtx);
+
+ CHECK(Cas.HaveChunk(ChunkHashes[0]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[1]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[2]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[3]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[4]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[5]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[6]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[7]));
+ CHECK(Cas.HaveChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0])));
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+ }
+
+ Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
+ Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
+ Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
+ Cas.InsertChunk(Chunks[4], ChunkHashes[4]);
+ Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
+ Cas.InsertChunk(Chunks[6], ChunkHashes[6]);
+ Cas.InsertChunk(Chunks[7], ChunkHashes[7]);
+
+ // Keep last
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[8]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ Cas.Flush();
+ Cas.CollectGarbage(GcCtx);
+
+ CHECK(!Cas.HaveChunk(ChunkHashes[0]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[1]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[2]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[3]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[4]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[5]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[6]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[7]));
+ CHECK(Cas.HaveChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+
+ Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
+ Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
+ Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
+ Cas.InsertChunk(Chunks[4], ChunkHashes[4]);
+ Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
+ Cas.InsertChunk(Chunks[6], ChunkHashes[6]);
+ Cas.InsertChunk(Chunks[7], ChunkHashes[7]);
+ }
+
+ // Keep mixed
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[1]);
+ KeepChunks.push_back(ChunkHashes[4]);
+ KeepChunks.push_back(ChunkHashes[7]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ Cas.Flush();
+ Cas.CollectGarbage(GcCtx);
+
+ CHECK(!Cas.HaveChunk(ChunkHashes[0]));
+ CHECK(Cas.HaveChunk(ChunkHashes[1]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[2]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[3]));
+ CHECK(Cas.HaveChunk(ChunkHashes[4]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[5]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[6]));
+ CHECK(Cas.HaveChunk(ChunkHashes[7]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[1] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[1])));
+ CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4])));
+ CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7])));
+
+ Cas.InsertChunk(Chunks[0], ChunkHashes[0]);
+ Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
+ Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
+ Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
+ Cas.InsertChunk(Chunks[6], ChunkHashes[6]);
+ Cas.InsertChunk(Chunks[8], ChunkHashes[8]);
+ }
+
+ // Keep multiple at end
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[6]);
+ KeepChunks.push_back(ChunkHashes[7]);
+ KeepChunks.push_back(ChunkHashes[8]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ Cas.Flush();
+ Cas.CollectGarbage(GcCtx);
+
+ CHECK(!Cas.HaveChunk(ChunkHashes[0]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[1]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[2]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[3]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[4]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[5]));
+ CHECK(Cas.HaveChunk(ChunkHashes[6]));
+ CHECK(Cas.HaveChunk(ChunkHashes[7]));
+ CHECK(Cas.HaveChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6])));
+ CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7])));
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+
+ Cas.InsertChunk(Chunks[0], ChunkHashes[0]);
+ Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
+ Cas.InsertChunk(Chunks[2], ChunkHashes[2]);
+ Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
+ Cas.InsertChunk(Chunks[4], ChunkHashes[4]);
+ Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
+ }
+
+ // Keep every other
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[0]);
+ KeepChunks.push_back(ChunkHashes[2]);
+ KeepChunks.push_back(ChunkHashes[4]);
+ KeepChunks.push_back(ChunkHashes[6]);
+ KeepChunks.push_back(ChunkHashes[8]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ Cas.Flush();
+ Cas.CollectGarbage(GcCtx);
+
+ CHECK(Cas.HaveChunk(ChunkHashes[0]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[1]));
+ CHECK(Cas.HaveChunk(ChunkHashes[2]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[3]));
+ CHECK(Cas.HaveChunk(ChunkHashes[4]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[5]));
+ CHECK(Cas.HaveChunk(ChunkHashes[6]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[7]));
+ CHECK(Cas.HaveChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0])));
+ CHECK(ChunkHashes[2] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[2])));
+ CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4])));
+ CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6])));
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+
+ Cas.InsertChunk(Chunks[1], ChunkHashes[1]);
+ Cas.InsertChunk(Chunks[3], ChunkHashes[3]);
+ Cas.InsertChunk(Chunks[5], ChunkHashes[5]);
+ Cas.InsertChunk(Chunks[7], ChunkHashes[7]);
+ }
+
+ // Verify that we nicely appended blocks even after all GC operations
+ CHECK(ChunkHashes[0] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[0])));
+ CHECK(ChunkHashes[1] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[1])));
+ CHECK(ChunkHashes[2] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[2])));
+ CHECK(ChunkHashes[3] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[3])));
+ CHECK(ChunkHashes[4] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[4])));
+ CHECK(ChunkHashes[5] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[5])));
+ CHECK(ChunkHashes[6] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[6])));
+ CHECK(ChunkHashes[7] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[7])));
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[8])));
+
+ uint64_t FinalSize = Cas.StorageSize().DiskSize;
+ CHECK(InitialSize == FinalSize);
}
+}
+
+TEST_CASE("compactcas.gc.deleteblockonopen")
+{
+ ScopedTemporaryDirectory TempDir;
+ uint64_t ChunkSizes[20] = {128, 541, 311, 181, 218, 37, 4, 397, 5, 92, 551, 721, 31, 92, 16, 99, 131, 41, 541, 84};
+ std::vector<IoBuffer> Chunks;
+ Chunks.reserve(20);
+ for (uint64_t Size : ChunkSizes)
+ {
+ Chunks.push_back(CreateChunk(Size));
+ }
+
+ std::vector<IoHash> ChunkHashes;
+ ChunkHashes.reserve(20);
+ for (const IoBuffer& Chunk : Chunks)
+ {
+ ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
+ }
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
+ CreateDirectories(CasConfig.RootDirectory);
{
CasGc Gc;
CasContainerStrategy Cas(CasConfig, Gc);
- Cas.Initialize("test", 16, false);
+ Cas.Initialize("test", 1024, 16, true);
+
+ for (size_t i = 0; i < 20; i++)
+ {
+ CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New);
+ }
+ // GC every other block
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ for (size_t i = 0; i < 20; i += 2)
+ {
+ KeepChunks.push_back(ChunkHashes[i]);
+ }
+ GcCtx.ContributeCas(KeepChunks);
+
+ Cas.Flush();
+ Cas.CollectGarbage(GcCtx);
+
+ for (size_t i = 0; i < 20; i += 2)
+ {
+ CHECK(Cas.HaveChunk(ChunkHashes[i]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[i + 1]));
+ CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i])));
+ }
+ }
+ }
+ {
+ // Re-open
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", 1024, 16, false);
+
+ for (size_t i = 0; i < 20; i += 2)
+ {
+ CHECK(Cas.HaveChunk(ChunkHashes[i]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[i + 1]));
+ CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i])));
+ }
+ }
+}
+
+TEST_CASE("compactcas.gc.handleopeniobuffer")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ uint64_t ChunkSizes[20] = {128, 541, 311, 181, 218, 37, 4, 397, 5, 92, 551, 721, 31, 92, 16, 99, 131, 41, 541, 84};
+ std::vector<IoBuffer> Chunks;
+ Chunks.reserve(20);
+ for (const uint64_t& Size : ChunkSizes)
+ {
+ Chunks.push_back(CreateChunk(Size));
+ }
+
+ std::vector<IoHash> ChunkHashes;
+ ChunkHashes.reserve(20);
+ for (const IoBuffer& Chunk : Chunks)
+ {
+ ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
+ }
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
+ CreateDirectories(CasConfig.RootDirectory);
+
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", 1024, 16, true);
+
+ for (size_t i = 0; i < 20; i++)
+ {
+ CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New);
+ }
+
+ IoBuffer RetainChunk = Cas.FindChunk(ChunkHashes[5]);
+ Cas.Flush();
+
+ // GC everything
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ Cas.CollectGarbage(GcCtx);
+
+ for (size_t i = 0; i < 20; i++)
+ {
+ CHECK(!Cas.HaveChunk(ChunkHashes[i]));
+ }
+
+ CHECK(ChunkHashes[5] == IoHash::HashBuffer(RetainChunk));
+}
+
+TEST_CASE("compactcas.legacyconversion")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ uint64_t ChunkSizes[] = {2041, 1123, 1223, 1239, 341, 1412, 912, 774, 341, 431, 554, 1098, 2048, 339, 561, 16, 16, 2048, 2048};
+ size_t ChunkCount = sizeof(ChunkSizes) / sizeof(uint64_t);
+ size_t SingleBlockSize = 0;
+ std::vector<IoBuffer> Chunks;
+ Chunks.reserve(ChunkCount);
+ for (uint64_t Size : ChunkSizes)
+ {
+ Chunks.push_back(CreateChunk(Size));
+ SingleBlockSize += Size;
+ }
+
+ std::vector<IoHash> ChunkHashes;
+ ChunkHashes.reserve(ChunkCount);
+ for (const IoBuffer& Chunk : Chunks)
+ {
+ ChunkHashes.push_back(IoHash::HashBuffer(Chunk.Data(), Chunk.Size()));
+ }
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
+ CreateDirectories(CasConfig.RootDirectory);
+
+ {
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", gsl::narrow<uint32_t>(SingleBlockSize * 2), 16, true);
+
+ for (size_t i = 0; i < ChunkCount; i++)
+ {
+ CHECK(Cas.InsertChunk(Chunks[i], ChunkHashes[i]).New);
+ }
+
+ std::vector<IoHash> KeepChunks;
+ for (size_t i = 0; i < ChunkCount; i += 2)
+ {
+ KeepChunks.push_back(ChunkHashes[i]);
+ }
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ GcCtx.ContributeCas(KeepChunks);
+ Cas.Flush();
+ Gc.CollectGarbage(GcCtx);
+ }
+
+ std::filesystem::path BlockPath = GetBlockPath(GetBlocksBasePath(CasConfig.RootDirectory, "test"), 1);
+ std::filesystem::path LegacyDataPath = GetLegacyDataPath(CasConfig.RootDirectory, "test");
+ std::filesystem::rename(BlockPath, LegacyDataPath);
+
+ std::vector<CasDiskIndexEntry> LogEntries;
+ std::filesystem::path IndexPath = GetIndexPath(CasConfig.RootDirectory, "test");
+ if (std::filesystem::is_regular_file(IndexPath))
+ {
+ BasicFile ObjectIndexFile;
+ ObjectIndexFile.Open(IndexPath, BasicFile::Mode::kRead);
+ uint64_t Size = ObjectIndexFile.FileSize();
+ if (Size >= sizeof(CasDiskIndexHeader))
+ {
+ uint64_t ExpectedEntryCount = (Size - sizeof(sizeof(CasDiskIndexHeader))) / sizeof(CasDiskIndexEntry);
+ CasDiskIndexHeader Header;
+ ObjectIndexFile.Read(&Header, sizeof(Header), 0);
+ if (Header.Magic == CasDiskIndexHeader::ExpectedMagic && Header.Version == CasDiskIndexHeader::CurrentVersion &&
+ Header.PayloadAlignment > 0 && Header.EntryCount == ExpectedEntryCount)
+ {
+ LogEntries.resize(Header.EntryCount);
+ ObjectIndexFile.Read(LogEntries.data(), Header.EntryCount * sizeof(CasDiskIndexEntry), sizeof(CasDiskIndexHeader));
+ }
+ }
+ ObjectIndexFile.Close();
+ std::filesystem::remove(IndexPath);
+ }
+
+ std::filesystem::path LogPath = GetLogPath(CasConfig.RootDirectory, "test");
+ {
+ TCasLogFile<CasDiskIndexEntry> CasLog;
+ CasLog.Open(LogPath, CasLogFile::Mode::kRead);
+ LogEntries.reserve(CasLog.GetLogCount());
+ CasLog.Replay([&](const CasDiskIndexEntry& Record) { LogEntries.push_back(Record); }, 0);
+ }
+ TCasLogFile<LegacyCasDiskIndexEntry> LegacyCasLog;
+ std::filesystem::path LegacylogPath = GetLegacyLogPath(CasConfig.RootDirectory, "test");
+ LegacyCasLog.Open(LegacylogPath, CasLogFile::Mode::kTruncate);
+
+ for (const CasDiskIndexEntry& Entry : LogEntries)
+ {
+ BlockStoreLocation Location = Entry.Location.Get(16);
+ LegacyCasDiskLocation LegacyLocation(Location.Offset, Location.Size);
+ LegacyCasDiskIndexEntry LegacyEntry = {.Key = Entry.Key,
+ .Location = LegacyLocation,
+ .ContentType = Entry.ContentType,
+ .Flags = Entry.Flags};
+ LegacyCasLog.Append(LegacyEntry);
+ }
+ LegacyCasLog.Close();
+
+ std::filesystem::remove_all(CasConfig.RootDirectory / "test");
+
+ {
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", 2048, 16, false);
+
+ for (size_t i = 0; i < ChunkCount; i += 2)
+ {
+ CHECK(Cas.HaveChunk(ChunkHashes[i]));
+ CHECK(!Cas.HaveChunk(ChunkHashes[i + 1]));
+ CHECK(ChunkHashes[i] == IoHash::HashBuffer(Cas.FindChunk(ChunkHashes[i])));
+ }
+ }
+}
+
+TEST_CASE("compactcas.threadedinsert") // * doctest::skip(true))
+{
+ // for (uint32_t i = 0; i < 100; ++i)
+ {
+ ScopedTemporaryDirectory TempDir;
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path();
+
+ CreateDirectories(CasConfig.RootDirectory);
+
+ const uint64_t kChunkSize = 1048;
+ const int32_t kChunkCount = 4096;
+ uint64_t ExpectedSize = 0;
+
+ std::unordered_map<IoHash, IoBuffer, IoHash::Hasher> Chunks;
+ Chunks.reserve(kChunkCount);
+
+ for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
+ {
+ while (true)
+ {
+ IoBuffer Chunk = CreateChunk(kChunkSize);
+ IoHash Hash = HashBuffer(Chunk);
+ if (Chunks.contains(Hash))
+ {
+ continue;
+ }
+ Chunks[Hash] = Chunk;
+ ExpectedSize += Chunk.Size();
+ break;
+ }
+ }
+
+ std::atomic<size_t> WorkCompleted = 0;
+ WorkerThreadPool ThreadPool(4);
+ CasGc Gc;
+ CasContainerStrategy Cas(CasConfig, Gc);
+ Cas.Initialize("test", 32768, 16, true);
+ {
+ for (const auto& Chunk : Chunks)
+ {
+ const IoHash& Hash = Chunk.first;
+ const IoBuffer& Buffer = Chunk.second;
+ ThreadPool.ScheduleWork([&Cas, &WorkCompleted, Buffer, Hash]() {
+ CasStore::InsertResult InsertResult = Cas.InsertChunk(Buffer, Hash);
+ ZEN_ASSERT(InsertResult.New);
+ WorkCompleted.fetch_add(1);
+ });
+ }
+ while (WorkCompleted < Chunks.size())
+ {
+ Sleep(1);
+ }
+ }
+
+ WorkCompleted = 0;
const uint64_t TotalSize = Cas.StorageSize().DiskSize;
- CHECK_EQ(kChunkSize * kChunkCount, TotalSize);
+ CHECK_EQ(ExpectedSize, TotalSize);
+
+ {
+ for (const auto& Chunk : Chunks)
+ {
+ ThreadPool.ScheduleWork([&Cas, &WorkCompleted, &Chunk]() {
+ IoHash ChunkHash = Chunk.first;
+ IoBuffer Buffer = Cas.FindChunk(ChunkHash);
+ IoHash Hash = IoHash::HashBuffer(Buffer);
+ CHECK(ChunkHash == Hash);
+ WorkCompleted.fetch_add(1);
+ });
+ }
+ while (WorkCompleted < Chunks.size())
+ {
+ Sleep(1);
+ }
+ }
+
+ std::unordered_set<IoHash, IoHash::Hasher> GcChunkHashes;
+ GcChunkHashes.reserve(Chunks.size());
+ for (const auto& Chunk : Chunks)
+ {
+ GcChunkHashes.insert(Chunk.first);
+ }
+ {
+ WorkCompleted = 0;
+ std::unordered_map<IoHash, IoBuffer, IoHash::Hasher> NewChunks;
+ NewChunks.reserve(kChunkCount);
+
+ for (int32_t Idx = 0; Idx < kChunkCount; ++Idx)
+ {
+ IoBuffer Chunk = CreateChunk(kChunkSize);
+ IoHash Hash = HashBuffer(Chunk);
+ NewChunks[Hash] = Chunk;
+ }
+
+ std::atomic_uint32_t AddedChunkCount;
+
+ for (const auto& Chunk : NewChunks)
+ {
+ ThreadPool.ScheduleWork([&Cas, &WorkCompleted, Chunk, &AddedChunkCount]() {
+ Cas.InsertChunk(Chunk.second, Chunk.first);
+ AddedChunkCount.fetch_add(1);
+ WorkCompleted.fetch_add(1);
+ });
+ }
+ for (const auto& Chunk : Chunks)
+ {
+ ThreadPool.ScheduleWork([&Cas, &WorkCompleted, Chunk]() {
+ IoHash ChunkHash = Chunk.first;
+ IoBuffer Buffer = Cas.FindChunk(ChunkHash);
+ if (Buffer)
+ {
+ CHECK(ChunkHash == IoHash::HashBuffer(Buffer));
+ }
+ WorkCompleted.fetch_add(1);
+ });
+ }
+
+ while (AddedChunkCount.load() < NewChunks.size())
+ {
+ // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope
+ for (const auto& Chunk : NewChunks)
+ {
+ if (Cas.HaveChunk(Chunk.first))
+ {
+ GcChunkHashes.emplace(Chunk.first);
+ }
+ }
+ std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end());
+ size_t C = 0;
+ while (C < KeepHashes.size())
+ {
+ if (C % 155 == 0)
+ {
+ if (C < KeepHashes.size() - 1)
+ {
+ KeepHashes[C] = KeepHashes[KeepHashes.size() - 1];
+ KeepHashes.pop_back();
+ }
+ if (C + 3 < KeepHashes.size() - 1)
+ {
+ KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1];
+ KeepHashes.pop_back();
+ }
+ }
+ C++;
+ }
+
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ GcCtx.ContributeCas(KeepHashes);
+ Cas.CollectGarbage(GcCtx);
+ CasChunkSet& Deleted = GcCtx.DeletedCas();
+ Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
+ }
+
+ while (WorkCompleted < NewChunks.size() + Chunks.size())
+ {
+ Sleep(1);
+ }
+
+ // Need to be careful since we might GC blocks we don't know outside of RwLock::ExclusiveLockScope
+ for (const auto& Chunk : NewChunks)
+ {
+ if (Cas.HaveChunk(Chunk.first))
+ {
+ GcChunkHashes.emplace(Chunk.first);
+ }
+ }
+ std::vector<IoHash> KeepHashes(GcChunkHashes.begin(), GcChunkHashes.end());
+ size_t C = 0;
+ while (C < KeepHashes.size())
+ {
+ if (C % 155 == 0)
+ {
+ if (C < KeepHashes.size() - 1)
+ {
+ KeepHashes[C] = KeepHashes[KeepHashes.size() - 1];
+ KeepHashes.pop_back();
+ }
+ if (C + 3 < KeepHashes.size() - 1)
+ {
+ KeepHashes[C + 3] = KeepHashes[KeepHashes.size() - 1];
+ KeepHashes.pop_back();
+ }
+ }
+ C++;
+ }
+
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ GcCtx.ContributeCas(KeepHashes);
+ Cas.CollectGarbage(GcCtx);
+ CasChunkSet& Deleted = GcCtx.DeletedCas();
+ Deleted.IterateChunks([&GcChunkHashes](const IoHash& ChunkHash) { GcChunkHashes.erase(ChunkHash); });
+ }
+ {
+ WorkCompleted = 0;
+ for (const IoHash& ChunkHash : GcChunkHashes)
+ {
+ ThreadPool.ScheduleWork([&Cas, &WorkCompleted, ChunkHash]() {
+ CHECK(Cas.HaveChunk(ChunkHash));
+ CHECK(ChunkHash == IoHash::HashBuffer(Cas.FindChunk(ChunkHash)));
+ WorkCompleted.fetch_add(1);
+ });
+ }
+ while (WorkCompleted < GcChunkHashes.size())
+ {
+ Sleep(1);
+ }
+ }
}
}
+TEST_CASE("compactcas.migrate.large.data" * doctest::skip(true))
+{
+ const char* BigDataPath = "D:\\zen-data\\dc4-zen-cache-t\\cas";
+ std::filesystem::path TobsBasePath = GetBasePath(BigDataPath, "tobs");
+ std::filesystem::path SobsBasePath = GetBasePath(BigDataPath, "sobs");
+ std::filesystem::remove_all(TobsBasePath);
+ std::filesystem::remove_all(SobsBasePath);
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = BigDataPath;
+ uint64_t TObsSize = 0;
+ {
+ CasGc TobsCasGc;
+ CasContainerStrategy TobsCas(CasConfig, TobsCasGc);
+ TobsCas.Initialize("tobs", 1u << 28, 16, false);
+ TObsSize = TobsCas.StorageSize().DiskSize;
+ CHECK(TObsSize > 0);
+ }
+
+ uint64_t SObsSize = 0;
+ {
+ CasGc SobsCasGc;
+ CasContainerStrategy SobsCas(CasConfig, SobsCasGc);
+ SobsCas.Initialize("sobs", 1u << 30, 4096, false);
+ SObsSize = SobsCas.StorageSize().DiskSize;
+ CHECK(SObsSize > 0);
+ }
+
+ CasGc TobsCasGc;
+ CasContainerStrategy TobsCas(CasConfig, TobsCasGc);
+ TobsCas.Initialize("tobs", 1u << 28, 16, false);
+ GcContext TobsGcCtx;
+ TobsCas.CollectGarbage(TobsGcCtx);
+ CHECK(TobsCas.StorageSize().DiskSize == TObsSize);
+
+ CasGc SobsCasGc;
+ CasContainerStrategy SobsCas(CasConfig, SobsCasGc);
+ SobsCas.Initialize("sobs", 1u << 30, 4096, false);
+ GcContext SobsGcCtx;
+ SobsCas.CollectGarbage(SobsGcCtx);
+ CHECK(SobsCas.StorageSize().DiskSize == SObsSize);
+}
+
#endif
void
diff --git a/zenstore/compactcas.h b/zenstore/compactcas.h
index c039feec9..11da37202 100644
--- a/zenstore/compactcas.h
+++ b/zenstore/compactcas.h
@@ -3,22 +3,13 @@
#pragma once
#include <zencore/zencore.h>
-
-#include <zencore/iobuffer.h>
-#include <zencore/iohash.h>
-#include <zencore/string.h>
-#include <zencore/thread.h>
-#include <zencore/uid.h>
-#include <zenstore/basicfile.h>
+#include <zenstore/blockstore.h>
#include <zenstore/cas.h>
#include <zenstore/caslog.h>
#include <zenstore/gc.h>
-#if ZEN_PLATFORM_WINDOWS
-# include <zencore/windows.h>
-#endif
-
#include <atomic>
+#include <limits>
#include <unordered_map>
namespace spdlog {
@@ -32,46 +23,14 @@ namespace zen {
#pragma pack(push)
#pragma pack(1)
-struct CasDiskLocation
-{
- CasDiskLocation(uint64_t InOffset, uint64_t InSize)
- {
- ZEN_ASSERT(InOffset <= 0xff'ffff'ffff);
- ZEN_ASSERT(InSize <= 0xff'ffff'ffff);
-
- memcpy(&m_Offset[0], &InOffset, sizeof m_Offset);
- memcpy(&m_Size[0], &InSize, sizeof m_Size);
- }
-
- CasDiskLocation() = default;
-
- inline uint64_t GetOffset() const
- {
- uint64_t Offset = 0;
- memcpy(&Offset, &m_Offset, sizeof m_Offset);
- return Offset;
- }
-
- inline uint64_t GetSize() const
- {
- uint64_t Size = 0;
- memcpy(&Size, &m_Size, sizeof m_Size);
- return Size;
- }
-
-private:
- uint8_t m_Offset[5];
- uint8_t m_Size[5];
-};
-
struct CasDiskIndexEntry
{
static const uint8_t kTombstone = 0x01;
- IoHash Key;
- CasDiskLocation Location;
- ZenContentType ContentType = ZenContentType::kUnknownContentType;
- uint8_t Flags = 0;
+ IoHash Key;
+ BlockStoreDiskLocation Location;
+ ZenContentType ContentType = ZenContentType::kUnknownContentType;
+ uint8_t Flags = 0;
};
#pragma pack(pop)
@@ -91,39 +50,46 @@ struct CasContainerStrategy final : public GcStorage
CasContainerStrategy(const CasStoreConfiguration& Config, CasGc& Gc);
~CasContainerStrategy();
- CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
CasStore::InsertResult InsertChunk(IoBuffer Chunk, const IoHash& ChunkHash);
IoBuffer FindChunk(const IoHash& ChunkHash);
bool HaveChunk(const IoHash& ChunkHash);
void FilterChunks(CasChunkSet& InOutChunks);
- void Initialize(const std::string_view ContainerBaseName, uint64_t Alignment, bool IsNewStore);
+ void Initialize(const std::string_view ContainerBaseName, uint32_t MaxBlockSize, uint64_t Alignment, bool IsNewStore);
void Flush();
void Scrub(ScrubContext& Ctx);
virtual void CollectGarbage(GcContext& GcCtx) override;
- virtual GcStorageSize StorageSize() const override { return {.DiskSize = m_TotalSize.load(std::memory_order::relaxed)}; }
+ virtual GcStorageSize StorageSize() const override { return {.DiskSize = m_TotalSize.load(std::memory_order::acquire)}; }
private:
- void OpenContainer(bool IsNewStore);
- void CloseContainer();
+ CasStore::InsertResult InsertChunk(const void* ChunkData, size_t ChunkSize, const IoHash& ChunkHash);
+ void MakeIndexSnapshot();
+ uint64_t ReadIndexFile();
+ uint64_t ReadLog(uint64_t SkipEntryCount);
+ uint64_t MigrateLegacyData(bool CleanSource);
+ void OpenContainer(bool IsNewStore);
+
spdlog::logger& Log() { return m_Log; }
const CasStoreConfiguration& m_Config;
spdlog::logger& m_Log;
- uint64_t m_PayloadAlignment = 1 << 4;
+ uint64_t m_PayloadAlignment = 1u << 4;
+ uint64_t m_MaxBlockSize = 1u << 28;
bool m_IsInitialized = false;
- BasicFile m_SmallObjectFile;
- BasicFile m_SmallObjectIndex;
TCasLogFile<CasDiskIndexEntry> m_CasLog;
std::string m_ContainerBaseName;
+ std::filesystem::path m_BlocksBasePath;
+
+ RwLock m_LocationMapLock;
+ typedef std::unordered_map<IoHash, BlockStoreDiskLocation, IoHash::Hasher> LocationMap_t;
+ LocationMap_t m_LocationMap;
+ std::unordered_map<uint32_t, Ref<BlockStoreFile>> m_ChunkBlocks;
- RwLock m_LocationMapLock;
- std::unordered_map<IoHash, CasDiskLocation, IoHash::Hasher> m_LocationMap;
- RwLock m_InsertLock; // used to serialize inserts
- std::atomic_uint64_t m_CurrentInsertOffset{};
- std::atomic_uint64_t m_CurrentIndexOffset{};
- std::atomic_uint64_t m_TotalSize{};
+ RwLock m_InsertLock; // used to serialize inserts
+ Ref<BlockStoreFile> m_WriteBlock;
+ std::uint64_t m_CurrentInsertOffset = 0;
- void MakeSnapshot();
+ std::atomic_uint32_t m_WriteBlockIndex{};
+ std::atomic_uint64_t m_TotalSize{};
};
void compactcas_forcelink();
diff --git a/zenstore/filecas.cpp b/zenstore/filecas.cpp
index 758c0665b..b53cfaa54 100644
--- a/zenstore/filecas.cpp
+++ b/zenstore/filecas.cpp
@@ -12,6 +12,7 @@
#include <zencore/testing.h>
#include <zencore/testutils.h>
#include <zencore/thread.h>
+#include <zencore/timer.h>
#include <zencore/uid.h>
#include <zenstore/basicfile.h>
#include <zenstore/gc.h>
@@ -88,18 +89,37 @@ FileCasStrategy::Initialize(bool IsNewStore)
CreateDirectories(m_Config.RootDirectory);
- m_CasLog.Open(m_Config.RootDirectory / "cas.ulog", IsNewStore);
+ m_CasLog.Open(m_Config.RootDirectory / "cas.ulog", IsNewStore ? CasLogFile::Mode::kTruncate : CasLogFile::Mode::kWrite);
- m_CasLog.Replay([&](const FileCasIndexEntry& Entry) {
- if (Entry.IsFlagSet(FileCasIndexEntry::kTombStone))
- {
- m_TotalSize.fetch_sub(Entry.Size, std::memory_order_relaxed);
- }
- else
- {
- m_TotalSize.fetch_add(Entry.Size, std::memory_order_relaxed);
- }
+ Stopwatch Timer;
+ const auto _ = MakeGuard([this, &Timer] {
+ ZEN_INFO("read log {} containing {}", m_Config.RootDirectory / "cas.ulog", NiceBytes(m_TotalSize.load(std::memory_order::relaxed)));
});
+
+ std::unordered_set<IoHash> FoundEntries;
+ FoundEntries.reserve(10000);
+ m_CasLog.Replay(
+ [&](const FileCasIndexEntry& Entry) {
+ if (Entry.IsFlagSet(FileCasIndexEntry::kTombStone))
+ {
+ if (!FoundEntries.contains(Entry.Key))
+ {
+ return;
+ }
+ m_TotalSize.fetch_sub(Entry.Size, std::memory_order_relaxed);
+ FoundEntries.erase(Entry.Key);
+ }
+ else
+ {
+ if (FoundEntries.contains(Entry.Key))
+ {
+ return;
+ }
+ FoundEntries.insert(Entry.Key);
+ m_TotalSize.fetch_add(Entry.Size, std::memory_order_relaxed);
+ }
+ },
+ 0);
}
CasStore::InsertResult
@@ -565,7 +585,7 @@ FileCasStrategy::IterateChunks(std::function<void(const IoHash& Hash, BasicFile&
BasicFile PayloadFile;
std::error_code Ec;
- PayloadFile.Open(Parent / File, false, Ec);
+ PayloadFile.Open(Parent / File, BasicFile::Mode::kWrite, Ec);
if (!Ec)
{
@@ -668,6 +688,20 @@ FileCasStrategy::CollectGarbage(GcContext& GcCtx)
std::vector<IoHash> CandidateCas;
+ uint64_t DeletedCount = 0;
+ uint64_t OldTotalSize = m_TotalSize.load(std::memory_order::relaxed);
+
+ Stopwatch TotalTimer;
+ const auto _ = MakeGuard([this, &TotalTimer, &DeletedCount, &ChunkCount, OldTotalSize] {
+ ZEN_INFO("garbage collect for '{}' DONE after {}, deleted {} out of {} files, removed {} out of {}",
+ m_Config.RootDirectory,
+ NiceTimeSpanMs(TotalTimer.GetElapsedTimeMs()),
+ DeletedCount,
+ ChunkCount,
+ NiceBytes(OldTotalSize - m_TotalSize.load(std::memory_order::relaxed)),
+ NiceBytes(OldTotalSize));
+ });
+
IterateChunks([&](const IoHash& Hash, BasicFile& Payload) {
bool KeepThis = false;
CandidateCas.clear();
@@ -689,16 +723,17 @@ FileCasStrategy::CollectGarbage(GcContext& GcCtx)
ChunkBytes.fetch_add(FileSize);
});
- ZEN_INFO("file CAS gc scanned: {} chunks ({})", ChunkCount.load(), NiceBytes(ChunkBytes));
-
if (ChunksToDelete.empty())
{
- ZEN_INFO("nothing to delete");
-
+ ZEN_INFO("gc for '{}' SKIPPED, nothing to delete", m_Config.RootDirectory);
return;
}
- ZEN_INFO("deleting file CAS garbage: {} chunks ({})", ChunksToDelete.size(), NiceBytes(ChunksToDeleteBytes));
+ ZEN_INFO("deleting file CAS garbage for '{}': {} out of {} chunks ({})",
+ m_Config.RootDirectory,
+ ChunksToDelete.size(),
+ ChunkCount.load(),
+ NiceBytes(ChunksToDeleteBytes));
if (GcCtx.IsDeletionMode() == false)
{
@@ -716,8 +751,10 @@ FileCasStrategy::CollectGarbage(GcContext& GcCtx)
if (Ec)
{
- ZEN_WARN("failed to delete file for chunk {}: '{}'", Hash, Ec.message());
+ ZEN_WARN("gc for '{}' failed to delete file for chunk {}: '{}'", m_Config.RootDirectory, Hash, Ec.message());
+ continue;
}
+ DeletedCount++;
}
GcCtx.DeletedCas(ChunksToDelete);
@@ -747,7 +784,7 @@ TEST_CASE("cas.file.move")
IoHash ZeroHash = IoHash::HashBuffer(ZeroBytes);
BasicFile PayloadFile;
- PayloadFile.Open(Payload1Path, true);
+ PayloadFile.Open(Payload1Path, BasicFile::Mode::kTruncate);
PayloadFile.Write(ZeroBytes, 0);
PayloadFile.Close();
diff --git a/zenstore/gc.cpp b/zenstore/gc.cpp
index 3b090cae9..856f9af02 100644
--- a/zenstore/gc.cpp
+++ b/zenstore/gc.cpp
@@ -5,9 +5,11 @@
#include <zencore/compactbinary.h>
#include <zencore/compactbinarybuilder.h>
#include <zencore/compactbinaryvalidation.h>
+#include <zencore/except.h>
#include <zencore/filesystem.h>
#include <zencore/fmtutils.h>
#include <zencore/logging.h>
+#include <zencore/scopeguard.h>
#include <zencore/string.h>
#include <zencore/testing.h>
#include <zencore/testutils.h>
@@ -18,6 +20,15 @@
#include <fmt/format.h>
#include <filesystem>
+#if ZEN_PLATFORM_WINDOWS
+# include <zencore/windows.h>
+#else
+# include <fcntl.h>
+# include <sys/file.h>
+# include <sys/stat.h>
+# include <unistd.h>
+#endif
+
#if ZEN_WITH_TESTS
# include <zencore/compress.h>
# include <algorithm>
@@ -31,6 +42,107 @@ namespace fs = std::filesystem;
//////////////////////////////////////////////////////////////////////////
+namespace {
+ std::error_code CreateGCReserve(const std::filesystem::path& Path, uint64_t Size)
+ {
+ if (Size == 0)
+ {
+ std::filesystem::remove(Path);
+ return std::error_code{};
+ }
+ CreateDirectories(Path.parent_path());
+ if (std::filesystem::is_regular_file(Path) && std::filesystem::file_size(Path) == Size)
+ {
+ return std::error_code();
+ }
+#if ZEN_PLATFORM_WINDOWS
+ DWORD dwCreationDisposition = CREATE_ALWAYS;
+ DWORD dwDesiredAccess = GENERIC_READ | GENERIC_WRITE;
+
+ const DWORD dwShareMode = 0;
+ const DWORD dwFlagsAndAttributes = FILE_ATTRIBUTE_NORMAL;
+ HANDLE hTemplateFile = nullptr;
+
+ HANDLE FileHandle = CreateFile(Path.c_str(),
+ dwDesiredAccess,
+ dwShareMode,
+ /* lpSecurityAttributes */ nullptr,
+ dwCreationDisposition,
+ dwFlagsAndAttributes,
+ hTemplateFile);
+
+ if (FileHandle == INVALID_HANDLE_VALUE)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+ bool Keep = true;
+ auto _ = MakeGuard([FileHandle, &Keep, Path]() {
+ ::CloseHandle(FileHandle);
+ if (!Keep)
+ {
+ ::DeleteFile(Path.c_str());
+ }
+ });
+ LARGE_INTEGER liFileSize;
+ liFileSize.QuadPart = Size;
+ BOOL OK = ::SetFilePointerEx(FileHandle, liFileSize, 0, FILE_BEGIN);
+ if (!OK)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+ OK = ::SetEndOfFile(FileHandle);
+ if (!OK)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+ Keep = true;
+#else
+ int OpenFlags = O_CLOEXEC | O_RDWR | O_CREAT;
+ int Fd = open(Path.c_str(), OpenFlags, 0666);
+ if (Fd < 0)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+
+ bool Keep = true;
+ auto _ = MakeGuard([Fd, &Keep, Path]() {
+ close(Fd);
+ if (!Keep)
+ {
+ unlink(Path.c_str());
+ }
+ });
+
+ if (fchmod(Fd, 0666) < 0)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+
+# if ZEN_PLATFORM_MAC
+ if (ftruncate(Fd, (off_t)Size) < 0)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+# else
+ if (ftruncate64(Fd, (off64_t)Size) < 0)
+ {
+ return MakeErrorCodeFromLastError();
+ }
+ int Error = posix_fallocate64(Fd, 0, (off64_t)Size);
+ if (Error)
+ {
+ return MakeErrorCode(Error);
+ }
+# endif
+ Keep = true;
+#endif
+ return std::error_code{};
+ }
+
+} // namespace
+
+//////////////////////////////////////////////////////////////////////////
+
CbObject
LoadCompactBinaryObject(const fs::path& Path)
{
@@ -74,6 +186,8 @@ struct GcContext::GcState
GcClock::Duration m_MaxCacheDuration = std::chrono::hours(24);
bool m_DeletionMode = true;
bool m_CollectSmallObjects = false;
+
+ std::filesystem::path DiskReservePath;
};
GcContext::GcContext(GcClock::TimePoint Time) : m_State(std::make_unique<GcState>())
@@ -194,6 +308,27 @@ GcContext::MaxCacheDuration(GcClock::Duration Duration)
m_State->m_MaxCacheDuration = Duration;
}
+void
+GcContext::DiskReservePath(const std::filesystem::path& Path)
+{
+ m_State->DiskReservePath = Path;
+}
+
+uint64_t
+GcContext::ClaimGCReserve()
+{
+ if (!std::filesystem::is_regular_file(m_State->DiskReservePath))
+ {
+ return 0;
+ }
+ uint64_t ReclaimedSize = std::filesystem::file_size(m_State->DiskReservePath);
+ if (std::filesystem::remove(m_State->DiskReservePath))
+ {
+ return ReclaimedSize;
+ }
+ return 0;
+}
+
//////////////////////////////////////////////////////////////////////////
GcContributor::GcContributor(CasGc& Gc) : m_Gc(Gc)
@@ -262,10 +397,13 @@ CasGc::CollectGarbage(GcContext& GcCtx)
RwLock::SharedLockScope _(m_Lock);
// First gather reference set
-
- for (GcContributor* Contributor : m_GcContribs)
{
- Contributor->GatherReferences(GcCtx);
+ Stopwatch Timer;
+ const auto Guard = MakeGuard([this, &Timer] { ZEN_INFO("gathered references in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
+ for (GcContributor* Contributor : m_GcContribs)
+ {
+ Contributor->GatherReferences(GcCtx);
+ }
}
// Cache records reference CAS chunks with the uncompressed
@@ -300,15 +438,22 @@ CasGc::CollectGarbage(GcContext& GcCtx)
// Then trim storage
- for (GcStorage* Storage : m_GcStorage)
{
- Storage->CollectGarbage(GcCtx);
+ Stopwatch Timer;
+ const auto Guard = MakeGuard([this, &Timer] { ZEN_INFO("collected garbage in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
+ for (GcStorage* Storage : m_GcStorage)
+ {
+ Storage->CollectGarbage(GcCtx);
+ }
}
// Remove Cid to CAS hash mappings. Scrub?
if (CidStore* CidStore = m_CidStore)
{
+ Stopwatch Timer;
+ const auto Guard =
+ MakeGuard([this, &Timer] { ZEN_INFO("clean up deleted content ids in {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
CidStore->RemoveCids(GcCtx.DeletedCas());
}
}
@@ -379,6 +524,15 @@ GcScheduler::Initialize(const GcSchedulerConfig& Config)
std::filesystem::create_directories(Config.RootDirectory);
+ std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
+ if (Ec)
+ {
+ ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason '{}'",
+ m_Config.RootDirectory / "reserve.gc",
+ NiceBytes(m_Config.DiskReserveSize),
+ Ec.message());
+ }
+
m_LastGcTime = GcClock::Now();
if (CbObject SchedulerState = LoadCompactBinaryObject(Config.RootDirectory / "gc_state"))
@@ -475,7 +629,7 @@ GcScheduler::SchedulerThread()
if (Ec)
{
- ZEN_WARN("get disk space info FAILED, reason '{}'", Ec.message());
+ ZEN_WARN("get disk space info FAILED, reason: '{}'", Ec.message());
}
ZEN_INFO("{} in use, {} of total {} free disk space, {}",
@@ -506,6 +660,7 @@ GcScheduler::SchedulerThread()
GcCtx.SetDeletionMode(true);
GcCtx.CollectSmallObjects(m_Config.CollectSmallObjects);
GcCtx.MaxCacheDuration(m_Config.MaxCacheDuration);
+ GcCtx.DiskReservePath(m_Config.RootDirectory / "reserve.gc");
if (m_TriggerParams)
{
@@ -519,27 +674,37 @@ GcScheduler::SchedulerThread()
}
}
- Stopwatch Timer;
-
ZEN_INFO("garbage collection STARTING, small objects gc {}, max cache duration {}",
GcCtx.CollectSmallObjects() ? "ENABLED"sv : "DISABLED"sv,
NiceTimeSpanMs(uint64_t(std::chrono::duration_cast<std::chrono::milliseconds>(GcCtx.MaxCacheDuration()).count())));
+ {
+ Stopwatch Timer;
+ const auto __ =
+ MakeGuard([this, &Timer] { ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs())); });
- m_CasGc.CollectGarbage(GcCtx);
+ m_CasGc.CollectGarbage(GcCtx);
- m_LastGcTime = GcClock::Now();
- m_NextGcTime = NextGcTime(m_LastGcTime);
- WaitTime = m_Config.MonitorInterval;
+ m_LastGcTime = GcClock::Now();
+ m_NextGcTime = NextGcTime(m_LastGcTime);
+ WaitTime = m_Config.MonitorInterval;
- {
- const fs::path Path = m_Config.RootDirectory / "gc_state";
- ZEN_DEBUG("saving scheduler state to '{}'", Path);
- CbObjectWriter SchedulderState;
- SchedulderState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count());
- SaveCompactBinaryObject(Path, SchedulderState.Save());
- }
+ {
+ const fs::path Path = m_Config.RootDirectory / "gc_state";
+ ZEN_DEBUG("saving scheduler state to '{}'", Path);
+ CbObjectWriter SchedulderState;
+ SchedulderState << "LastGcTime"sv << static_cast<int64_t>(m_LastGcTime.time_since_epoch().count());
+ SaveCompactBinaryObject(Path, SchedulderState.Save());
+ }
- ZEN_INFO("garbage collection DONE after {}", NiceTimeSpanMs(Timer.GetElapsedTimeMs()));
+ std::error_code Ec = CreateGCReserve(m_Config.RootDirectory / "reserve.gc", m_Config.DiskReserveSize);
+ if (Ec)
+ {
+ ZEN_WARN("unable to create GC reserve at '{}' with size {}, reason: '{}'",
+ m_Config.RootDirectory / "reserve.gc",
+ NiceBytes(m_Config.DiskReserveSize),
+ Ec.message());
+ }
+ }
uint32_t RunningState = static_cast<uint32_t>(GcSchedulerStatus::kRunning);
if (!m_Status.compare_exchange_strong(RunningState, static_cast<uint32_t>(GcSchedulerStatus::kIdle)))
@@ -573,16 +738,15 @@ namespace {
static std::random_device rd;
static std::mt19937 g(rd());
- const size_t Count = static_cast<size_t>(Size / sizeof(uint32_t));
- std::vector<uint32_t> Values;
- Values.resize(Count);
- for (size_t Idx = 0; Idx < Count; ++Idx)
+ std::vector<uint8_t> Values;
+ Values.resize(Size);
+ for (size_t Idx = 0; Idx < Size; ++Idx)
{
- Values[Idx] = static_cast<uint32_t>(Idx);
+ Values[Idx] = static_cast<uint8_t>(Idx);
}
std::shuffle(Values.begin(), Values.end(), g);
- return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size() * sizeof(uint32_t));
+ return IoBufferBuilder::MakeCloneFromMemory(Values.data(), Values.size());
}
static CompressedBuffer Compress(IoBuffer Buffer)
@@ -613,11 +777,209 @@ TEST_CASE("gc.basic")
GcContext GcCtx;
GcCtx.CollectSmallObjects(true);
+ CasStore->Flush();
Gc.CollectGarbage(GcCtx);
CHECK(!CidStore.ContainsChunk(InsertResult.DecompressedId));
}
+TEST_CASE("gc.full")
+{
+ ScopedTemporaryDirectory TempDir;
+
+ CasStoreConfiguration CasConfig;
+ CasConfig.RootDirectory = TempDir.Path() / "cas";
+
+ CasGc Gc;
+ std::unique_ptr<CasStore> CasStore = CreateCasStore(Gc);
+
+ CasStore->Initialize(CasConfig);
+
+ uint64_t ChunkSizes[9] = {128, 541, 1023, 781, 218, 37, 4, 997, 5};
+ IoBuffer Chunks[9] = {CreateChunk(ChunkSizes[0]),
+ CreateChunk(ChunkSizes[1]),
+ CreateChunk(ChunkSizes[2]),
+ CreateChunk(ChunkSizes[3]),
+ CreateChunk(ChunkSizes[4]),
+ CreateChunk(ChunkSizes[5]),
+ CreateChunk(ChunkSizes[6]),
+ CreateChunk(ChunkSizes[7]),
+ CreateChunk(ChunkSizes[8])};
+ IoHash ChunkHashes[9] = {
+ IoHash::HashBuffer(Chunks[0].Data(), Chunks[0].Size()),
+ IoHash::HashBuffer(Chunks[1].Data(), Chunks[1].Size()),
+ IoHash::HashBuffer(Chunks[2].Data(), Chunks[2].Size()),
+ IoHash::HashBuffer(Chunks[3].Data(), Chunks[3].Size()),
+ IoHash::HashBuffer(Chunks[4].Data(), Chunks[4].Size()),
+ IoHash::HashBuffer(Chunks[5].Data(), Chunks[5].Size()),
+ IoHash::HashBuffer(Chunks[6].Data(), Chunks[6].Size()),
+ IoHash::HashBuffer(Chunks[7].Data(), Chunks[7].Size()),
+ IoHash::HashBuffer(Chunks[8].Data(), Chunks[8].Size()),
+ };
+
+ CasStore->InsertChunk(Chunks[0], ChunkHashes[0]);
+ CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
+ CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
+ CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
+ CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
+ CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
+ CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
+ CasStore->InsertChunk(Chunks[7], ChunkHashes[7]);
+ CasStore->InsertChunk(Chunks[8], ChunkHashes[8]);
+
+ CasStoreSize InitialSize = CasStore->TotalSize();
+
+ // Keep first and last
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[0]);
+ KeepChunks.push_back(ChunkHashes[8]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ CasStore->Flush();
+ Gc.CollectGarbage(GcCtx);
+
+ CHECK(CasStore->ContainsChunk(ChunkHashes[0]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[1]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[4]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[6]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[7]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0])));
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
+ }
+
+ CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
+ CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
+ CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
+ CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
+ CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
+ CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
+ CasStore->InsertChunk(Chunks[7], ChunkHashes[7]);
+
+ // Keep last
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[8]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ CasStore->Flush();
+ Gc.CollectGarbage(GcCtx);
+
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[0]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[1]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[4]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[6]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[7]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
+
+ CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
+ CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
+ CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
+ CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
+ CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
+ CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
+ CasStore->InsertChunk(Chunks[7], ChunkHashes[7]);
+ }
+
+ // Keep mixed
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[1]);
+ KeepChunks.push_back(ChunkHashes[4]);
+ KeepChunks.push_back(ChunkHashes[7]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ CasStore->Flush();
+ Gc.CollectGarbage(GcCtx);
+
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[0]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[1]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[4]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[6]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[7]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1])));
+ CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4])));
+ CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7])));
+
+ CasStore->InsertChunk(Chunks[0], ChunkHashes[0]);
+ CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
+ CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
+ CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
+ CasStore->InsertChunk(Chunks[6], ChunkHashes[6]);
+ CasStore->InsertChunk(Chunks[8], ChunkHashes[8]);
+ }
+
+ // Keep multiple at end
+ {
+ GcContext GcCtx;
+ GcCtx.CollectSmallObjects(true);
+ std::vector<IoHash> KeepChunks;
+ KeepChunks.push_back(ChunkHashes[6]);
+ KeepChunks.push_back(ChunkHashes[7]);
+ KeepChunks.push_back(ChunkHashes[8]);
+ GcCtx.ContributeCas(KeepChunks);
+
+ CasStore->Flush();
+ Gc.CollectGarbage(GcCtx);
+
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[0]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[1]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[2]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[3]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[4]));
+ CHECK(!CasStore->ContainsChunk(ChunkHashes[5]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[6]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[7]));
+ CHECK(CasStore->ContainsChunk(ChunkHashes[8]));
+
+ CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6])));
+ CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7])));
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
+
+ CasStore->InsertChunk(Chunks[0], ChunkHashes[0]);
+ CasStore->InsertChunk(Chunks[1], ChunkHashes[1]);
+ CasStore->InsertChunk(Chunks[2], ChunkHashes[2]);
+ CasStore->InsertChunk(Chunks[3], ChunkHashes[3]);
+ CasStore->InsertChunk(Chunks[4], ChunkHashes[4]);
+ CasStore->InsertChunk(Chunks[5], ChunkHashes[5]);
+ }
+
+ // Verify that we nicely appended blocks even after all GC operations
+ CHECK(ChunkHashes[0] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[0])));
+ CHECK(ChunkHashes[1] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[1])));
+ CHECK(ChunkHashes[2] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[2])));
+ CHECK(ChunkHashes[3] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[3])));
+ CHECK(ChunkHashes[4] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[4])));
+ CHECK(ChunkHashes[5] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[5])));
+ CHECK(ChunkHashes[6] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[6])));
+ CHECK(ChunkHashes[7] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[7])));
+ CHECK(ChunkHashes[8] == IoHash::HashBuffer(CasStore->FindChunk(ChunkHashes[8])));
+
+ auto FinalSize = CasStore->TotalSize();
+ CHECK(InitialSize.TinySize == FinalSize.TinySize);
+}
#endif
void
diff --git a/zenstore/include/zenstore/basicfile.h b/zenstore/include/zenstore/basicfile.h
index 2df016c76..5a500c65f 100644
--- a/zenstore/include/zenstore/basicfile.h
+++ b/zenstore/include/zenstore/basicfile.h
@@ -31,8 +31,17 @@ public:
BasicFile(const BasicFile&) = delete;
BasicFile& operator=(const BasicFile&) = delete;
- void Open(std::filesystem::path FileName, bool IsCreate);
- void Open(std::filesystem::path FileName, bool IsCreate, std::error_code& Ec);
+ enum class Mode : uint32_t
+ {
+ kRead = 0, // Opens a existing file for read only
+ kWrite = 1, // Opens (or creates) a file for read and write
+ kTruncate = 2, // Opens (or creates) a file for read and write and sets the size to zero
+ kDelete = 3, // Opens (or creates) a file for read and write enabling MarkAsDeleteOnClose()
+ kTruncateDelete = 4 // Opens (or creates) a file for read and write and sets the size to zero enabling MarkAsDeleteOnClose()
+ };
+
+ void Open(const std::filesystem::path& FileName, Mode Mode);
+ void Open(const std::filesystem::path& FileName, Mode Mode, std::error_code& Ec);
void Close();
void Read(void* Data, uint64_t Size, uint64_t FileOffset);
void StreamFile(std::function<void(const void* Data, uint64_t Size)>&& ChunkFun);
@@ -43,13 +52,17 @@ public:
void Write(const void* Data, uint64_t Size, uint64_t FileOffset, std::error_code& Ec);
void Flush();
uint64_t FileSize();
+ void SetFileSize(uint64_t FileSize);
IoBuffer ReadAll();
void WriteAll(IoBuffer Data, std::error_code& Ec);
+ void MarkAsDeleteOnClose(std::error_code& Ec);
+ void* Detach();
inline void* Handle() { return m_FileHandle; }
protected:
void* m_FileHandle = nullptr; // This is either null or valid
+private:
};
/**
diff --git a/zenstore/include/zenstore/blockstore.h b/zenstore/include/zenstore/blockstore.h
new file mode 100644
index 000000000..424db461a
--- /dev/null
+++ b/zenstore/include/zenstore/blockstore.h
@@ -0,0 +1,104 @@
+// Copyright Epic Games, Inc. All Rights Reserved.
+
+#pragma once
+
+#include <zencore/filesystem.h>
+#include <zencore/zencore.h>
+#include <zenstore/basicfile.h>
+
+namespace zen {
+
+//////////////////////////////////////////////////////////////////////////
+
+struct BlockStoreLocation
+{
+ uint32_t BlockIndex;
+ uint64_t Offset;
+ uint64_t Size;
+};
+
+#pragma pack(push)
+#pragma pack(1)
+
+struct BlockStoreDiskLocation
+{
+ constexpr static uint32_t MaxBlockIndexBits = 20;
+ constexpr static uint32_t MaxOffsetBits = 28;
+ constexpr static uint32_t MaxBlockIndex = (1ul << BlockStoreDiskLocation::MaxBlockIndexBits) - 1ul;
+ constexpr static uint32_t MaxOffset = (1ul << BlockStoreDiskLocation::MaxOffsetBits) - 1ul;
+
+ BlockStoreDiskLocation(const BlockStoreLocation& Location, uint64_t OffsetAlignment)
+ {
+ Init(Location.BlockIndex, Location.Offset / OffsetAlignment, Location.Size);
+ }
+
+ BlockStoreDiskLocation() = default;
+
+ inline BlockStoreLocation Get(uint64_t OffsetAlignment) const
+ {
+ uint64_t PackedOffset = 0;
+ memcpy(&PackedOffset, &m_Offset, sizeof m_Offset);
+ return {.BlockIndex = static_cast<std::uint32_t>(PackedOffset >> MaxOffsetBits),
+ .Offset = (PackedOffset & MaxOffset) * OffsetAlignment,
+ .Size = GetSize()};
+ }
+
+ inline uint32_t GetBlockIndex() const
+ {
+ uint64_t PackedOffset = 0;
+ memcpy(&PackedOffset, &m_Offset, sizeof m_Offset);
+ return static_cast<std::uint32_t>(PackedOffset >> MaxOffsetBits);
+ }
+
+ inline uint64_t GetOffset(uint64_t OffsetAlignment) const
+ {
+ uint64_t PackedOffset = 0;
+ memcpy(&PackedOffset, &m_Offset, sizeof m_Offset);
+ return (PackedOffset & MaxOffset) * OffsetAlignment;
+ }
+
+ inline uint64_t GetSize() const { return m_Size; }
+
+private:
+ inline void Init(uint32_t BlockIndex, uint64_t Offset, uint64_t Size)
+ {
+ ZEN_ASSERT(BlockIndex <= MaxBlockIndex);
+ ZEN_ASSERT(Offset <= MaxOffset);
+ ZEN_ASSERT(Size <= std::numeric_limits<std::uint32_t>::max());
+
+ m_Size = static_cast<uint32_t>(Size);
+ uint64_t PackedOffset = (static_cast<uint64_t>(BlockIndex) << MaxOffsetBits) + Offset;
+ memcpy(&m_Offset[0], &PackedOffset, sizeof m_Offset);
+ }
+
+ uint32_t m_Size;
+ uint8_t m_Offset[6];
+};
+
+#pragma pack(pop)
+
+struct BlockStoreFile : public RefCounted
+{
+ explicit BlockStoreFile(const std::filesystem::path& BlockPath);
+ ~BlockStoreFile();
+ const std::filesystem::path& GetPath() const;
+ void Open();
+ void Create(uint64_t InitialSize);
+ void MarkAsDeleteOnClose(std::error_code& Ec);
+ uint64_t FileSize();
+ IoBuffer GetChunk(uint64_t Offset, uint64_t Size);
+ void Read(void* Data, uint64_t Size, uint64_t FileOffset);
+ void Write(const void* Data, uint64_t Size, uint64_t FileOffset);
+ void Truncate(uint64_t Size);
+ void Flush();
+ void StreamByteRange(uint64_t FileOffset, uint64_t Size, std::function<void(const void* Data, uint64_t Size)>&& ChunkFun);
+
+private:
+ const std::filesystem::path m_Path;
+ IoBuffer m_IoBuffer;
+ BasicFile m_File;
+};
+
+void blockstore_forcelink();
+
+} // namespace zen
diff --git a/zenstore/include/zenstore/cas.h b/zenstore/include/zenstore/cas.h
index 7a7233c1c..5592fbd0a 100644
--- a/zenstore/include/zenstore/cas.h
+++ b/zenstore/include/zenstore/cas.h
@@ -69,7 +69,7 @@ public:
private:
// Q: should we protect this with a lock, or is that a higher level concern?
- std::unordered_set<IoHash> m_ChunkSet;
+ std::unordered_set<IoHash, IoHash::Hasher> m_ChunkSet;
};
/** Context object for data scrubbing
diff --git a/zenstore/include/zenstore/caslog.h b/zenstore/include/zenstore/caslog.h
index 1bd11800c..4b93a708f 100644
--- a/zenstore/include/zenstore/caslog.h
+++ b/zenstore/include/zenstore/caslog.h
@@ -4,18 +4,8 @@
#include "zenstore.h"
-#include <zencore/iobuffer.h>
-#include <zencore/string.h>
-#include <zencore/thread.h>
#include <zencore/uid.h>
#include <zenstore/basicfile.h>
-#include <zenstore/cas.h>
-
-#if ZEN_PLATFORM_WINDOWS
-# include <zencore/windows.h>
-#endif
-
-#include <functional>
namespace zen {
@@ -25,12 +15,20 @@ public:
CasLogFile();
~CasLogFile();
- void Open(std::filesystem::path FileName, size_t RecordSize, bool isCreate);
+ enum class Mode
+ {
+ kRead,
+ kWrite,
+ kTruncate
+ };
+
+ void Open(std::filesystem::path FileName, size_t RecordSize, Mode Mode);
void Append(const void* DataPointer, uint64_t DataSize);
- void Replay(std::function<void(const void*)>&& Handler);
+ void Replay(std::function<void(const void*)>&& Handler, uint64_t SkipEntryCount);
void Flush();
void Close();
uint64_t GetLogSize();
+ uint64_t GetLogCount();
private:
struct FileHeader
@@ -51,6 +49,8 @@ private:
static_assert(sizeof(FileHeader) == 64);
private:
+ void Open(std::filesystem::path FileName, size_t RecordSize, BasicFile::Mode Mode);
+
BasicFile m_File;
FileHeader m_Header;
size_t m_RecordSize = 1;
@@ -61,18 +61,20 @@ template<typename T>
class TCasLogFile : public CasLogFile
{
public:
- void Open(std::filesystem::path FileName, bool IsCreate) { CasLogFile::Open(FileName, sizeof(T), IsCreate); }
+ void Open(std::filesystem::path FileName, Mode Mode) { CasLogFile::Open(FileName, sizeof(T), Mode); }
// This should be called before the Replay() is called to do some basic sanity checking
bool Initialize() { return true; }
- void Replay(Invocable<const T&> auto Handler)
+ void Replay(Invocable<const T&> auto Handler, uint64_t SkipEntryCount)
{
- CasLogFile::Replay([&](const void* VoidPtr) {
- const T& Record = *reinterpret_cast<const T*>(VoidPtr);
+ CasLogFile::Replay(
+ [&](const void* VoidPtr) {
+ const T& Record = *reinterpret_cast<const T*>(VoidPtr);
- Handler(Record);
- });
+ Handler(Record);
+ },
+ SkipEntryCount);
}
void Append(const T& Record)
@@ -82,6 +84,8 @@ public:
CasLogFile::Append(&Record, sizeof Record);
}
+
+ void Append(const std::span<T>& Records) { CasLogFile::Append(Records.data(), sizeof(T) * Records.size()); }
};
} // namespace zen
diff --git a/zenstore/include/zenstore/gc.h b/zenstore/include/zenstore/gc.h
index b8ba338f0..bc8dee9a3 100644
--- a/zenstore/include/zenstore/gc.h
+++ b/zenstore/include/zenstore/gc.h
@@ -78,6 +78,9 @@ public:
GcClock::Duration MaxCacheDuration() const;
void MaxCacheDuration(GcClock::Duration Duration);
+ void DiskReservePath(const std::filesystem::path& Path);
+ uint64_t ClaimGCReserve();
+
inline bool Expired(GcClock::Tick TickCount) { return Time() - GcClock::TimePointFromTick(TickCount) > MaxCacheDuration(); }
private:
@@ -170,6 +173,7 @@ struct GcSchedulerConfig
std::chrono::seconds MaxCacheDuration{86400};
bool CollectSmallObjects = true;
bool Enabled = true;
+ uint64_t DiskReserveSize = 1ul << 28;
};
/**
diff --git a/zenstore/zenstore.cpp b/zenstore/zenstore.cpp
index dbb3dbbf7..5f40b7f60 100644
--- a/zenstore/zenstore.cpp
+++ b/zenstore/zenstore.cpp
@@ -3,6 +3,7 @@
#include "zenstore/zenstore.h"
#include <zenstore/basicfile.h>
+#include <zenstore/blockstore.h>
#include <zenstore/cas.h>
#include <zenstore/gc.h>
#include "compactcas.h"
@@ -16,6 +17,7 @@ zenstore_forcelinktests()
basicfile_forcelink();
CAS_forcelink();
filecas_forcelink();
+ blockstore_forcelink();
compactcas_forcelink();
gc_forcelink();
}
diff --git a/zenutil/zenserverprocess.cpp b/zenutil/zenserverprocess.cpp
index f49d5f6d8..3a4957b76 100644
--- a/zenutil/zenserverprocess.cpp
+++ b/zenutil/zenserverprocess.cpp
@@ -550,8 +550,8 @@ ZenServerInstance::SpawnServer(int BasePort, std::string_view AdditionalServerAr
const std::filesystem::path BaseDir = m_Env.ProgramBaseDir();
const std::filesystem::path Executable = BaseDir / "zenserver" ZEN_EXE_SUFFIX_LITERAL;
CreateProcOptions CreateOptions = {
- .WorkingDirectory = &CurrentDirectory,
- .Flags = CreationFlags,
+ .WorkingDirectory = &CurrentDirectory,
+ .Flags = CreationFlags,
};
CreateProcResult ChildPid = CreateProc(Executable, CommandLine.ToView(), CreateOptions);
#if ZEN_PLATFORM_WINDOWS