aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsilvanshade <[email protected]>2025-02-08 05:28:20 -0700
committerJack O'Connor <[email protected]>2025-03-09 16:00:11 -0700
commitd9b49df0757f8bdfaa542e7181013fbf1555ff89 (patch)
treeb0b3323bd07e22d39fb032e53b019386be9fb640
parenta31e519869d5751370f50c39a99340660ee95bf7 (diff)
Implement TBB-based parallelism for C lib
-rw-r--r--.github/workflows/ci.yml141
-rw-r--r--README.md5
-rw-r--r--c/CMakeLists.txt84
-rw-r--r--c/README.md126
-rw-r--r--c/blake3.c63
-rw-r--r--c/blake3.h4
-rw-r--r--c/blake3_impl.h29
-rw-r--r--c/blake3_tbb.cpp37
-rw-r--r--c/cmake/BLAKE3/ContinuousIntegration.cmake235
-rw-r--r--c/cmake/BLAKE3/Examples.cmake6
-rw-r--r--c/cmake/BLAKE3/Testing.cmake3
-rw-r--r--c/dependencies/CMakeLists.txt3
-rw-r--r--c/dependencies/tbb/CMakeLists.txt33
13 files changed, 667 insertions, 102 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4ce575e..4bb3aaf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -225,54 +225,48 @@ jobs:
# Currently only on x86.
c_tests:
- name: C Makefile tests
+ name: C tests SIMD=${{ matrix.simd }} TBB=${{ matrix.use_tbb }}
runs-on: ubuntu-latest
-
+ strategy:
+ fail-fast: false
+ matrix:
+ use_tbb: ["OFF", "ON"]
+ simd: ["x86-intrinsics", "amd64-asm"]
steps:
- uses: actions/checkout@v4
- # Test the intrinsics-based implementations.
- - run: make -f Makefile.testing test
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_sse2.c
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 make -f Makefile.testing test
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_sse41.c
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 BLAKE3_NO_SSE41=1 make -f Makefile.testing test
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_avx2.c
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 BLAKE3_NO_SSE41=1 BLAKE3_NO_AVX2=1 make -f Makefile.testing test
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_avx512.c
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 BLAKE3_NO_SSE41=1 BLAKE3_NO_AVX2=1 BLAKE3_NO_AVX512=1 make -f Makefile.testing test
- working-directory: ./c
- # Test the assembly implementations.
- - run: make -f Makefile.testing test_asm
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_sse2_x86-64_unix.S
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 make -f Makefile.testing test_asm
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_sse41_x86-64_unix.S
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 BLAKE3_NO_SSE41=1 make -f Makefile.testing test_asm
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_avx2_x86-64_unix.S
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 BLAKE3_NO_SSE41=1 BLAKE3_NO_AVX2=1 make -f Makefile.testing test_asm
- working-directory: ./c
- - run: make -f Makefile.testing clean && rm blake3_avx512_x86-64_unix.S
- working-directory: ./c
- - run: BLAKE3_NO_SSE2=1 BLAKE3_NO_SSE41=1 BLAKE3_NO_AVX2=1 BLAKE3_NO_AVX512=1 make -f Makefile.testing test_asm
- working-directory: ./c
- # Restore the files we deleted above.
- - run: git checkout .
- # Build the example.
- - run: make -f Makefile.testing example
- working-directory: ./c
+ - run: |
+ sudo apt-get update
+ sudo apt-get install ninja-build libtbb-dev libtbb12
+ # Test the intrinsics-based and assembly-based implementations.
+ - run: |
+ cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON "-DBLAKE3_SIMD_TYPE=${{ matrix.simd }}"
+ cmake --build c/build --target test
+ cat c/build/Testing/Temporary/LastTest.log
+ - run: |
+ cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON "-DBLAKE3_SIMD_TYPE=${{ matrix.simd }}" -DBLAKE3_NO_SSE2=1
+ cmake --build c/build --target test
+ cat c/build/Testing/Temporary/LastTest.log
+ - run: |
+ cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON "-DBLAKE3_SIMD_TYPE=${{ matrix.simd }}" -DBLAKE3_NO_SSE2=1 -DBLAKE3_NO_SSE41=1
+ cmake --build c/build --target test
+ cat c/build/Testing/Temporary/LastTest.log
+ - run: |
+ cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON "-DBLAKE3_SIMD_TYPE=${{ matrix.simd }}" "-DBLAKE3_NO_SSE2=1" "-DBLAKE3_NO_SSE41=1" "-DBLAKE3_NO_AVX2=1"
+ cmake --build c/build --target test
+ cat c/build/Testing/Temporary/LastTest.log
+ - run: |
+ cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON "-DBLAKE3_SIMD_TYPE=${{ matrix.simd }}" "-DBLAKE3_NO_SSE2=1" "-DBLAKE3_NO_SSE41=1" "-DBLAKE3_NO_AVX2=1" "-DBLAKE3_NO_AVX512=1"
+ cmake --build c/build --target test
+ cat c/build/Testing/Temporary/LastTest.log
+ # Test with TBB disabled/enabled.
+ - run: |
+ cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON "-DBLAKE3_USE_TBB=${{ matrix.use_tbb }}"
+ cmake --build c/build --target test
+ cat c/build/Testing/Temporary/LastTest.log
+ # Build the example with TBB disabled/enabled.
+ - run: |
+ cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON -DBLAKE3_EXAMPLES=ON
+ cmake --build c/build --target blake3-example
# Note that this jobs builds AArch64 binaries from an x86_64 host.
build_apple_silicon:
@@ -323,28 +317,59 @@ jobs:
cd /work
~/.cargo/bin/cargo test --features prefer_intrinsics
- # CMake build test (Library only), current macOS/Linux only.
+ # CMake build test (Library only).
cmake_current_build:
- name: CMake ${{ matrix.os }} ${{ matrix.compiler }}
+ name: CMake ${{ matrix.os }} CC=${{ matrix.toolchain.cc }} CXX=${{ matrix.toolchain.cxx }} TBB=${{ matrix.use_tbb }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
- os: ["ubuntu-latest", "macOS-latest", "windows-latest"]
- compiler: [gcc, clang, cl]
+ cmakeVersion: [latest]
+ ninjaVersion: [latest]
+ os: [ubuntu-latest, macOS-latest, windows-latest]
+ toolchain: [
+ { cc: cl, cxx: cl },
+ { cc: clang, cxx: clang++ },
+ { cc: clang-cl, cxx: clang-cl },
+ { cc: gcc, cxx: g++ },
+ ]
+ use_tbb: [OFF, ON]
exclude:
- - os: windows-latest
- compiler: gcc
- - os: ubuntu-latest
- compiler: msvc
- os: macOS-latest
- compiler: msvc
+ toolchain: { cc: cl, cxx: cl }
+ - os: macOS-latest
+ toolchain: { cc: clang-cl, cxx: clang-cl }
+ - os: ubuntu-latest
+ toolchain: { cc: cl, cxx: cl }
+ - os: ubuntu-latest
+ toolchain: { cc: clang-cl, cxx: clang-cl }
+ - os: windows-latest
+ toolchain: { cc: clang, cxx: clang++ }
+ use_tbb: ON
+ - os: windows-latest
+ toolchain: { cc: gcc, cxx: g++ }
+ use_tbb: ON
steps:
- uses: actions/checkout@v4
- - name: CMake generation
- run: cmake -S c -B c/build -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/target
- - name: CMake build / install
- run: cmake --build c/build --target install
+ - uses: lukka/get-cmake@5f6e04f5267c8133f1273bf2103583fc72c46b17
+ with:
+ cmakeVersion: ${{ matrix.cmakeVersion }}
+ ninjaVersion: ${{ matrix.ninjaVersion }}
+ - if: matrix.os == 'macOS-latest'
+ name: Install dependencies on macOS
+ run: |
+ brew update
+ brew install tbb
+ - if: matrix.os == 'ubuntu-latest'
+ name: Install dependencies on Linux
+ run: |
+ sudo apt-get update
+ sudo apt-get install libtbb-dev libtbb12
+ - name: CMake generation, build, install
+ run: |
+ ${{ matrix.os != 'windows-latest' || '& "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/Tools/Launch-VsDevShell.ps1" -Arch amd64 -SkipAutomaticLocation' }}
+ cmake -S c -B c/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/target -DCMAKE_C_COMPILER=${{ matrix.toolchain.cc }} -DCMAKE_CXX_COMPILER=${{ matrix.toolchain.cxx }} -DBLAKE3_USE_TBB=${{ matrix.use_tbb }} -DBLAKE3_FETCH_TBB=-DBLAKE3_FETCH_TBB=${{ matrix.os == 'windows-latest' && 'YES' || 'NO' }} -DBLAKE3_EXAMPLES=ON
+ cmake --build c/build --target install
cmake_3-9_build:
name: CMake 3.9.6 ubuntu-latest
runs-on: ubuntu-latest
diff --git a/README.md b/README.md
index c1ce961..dcda2f8 100644
--- a/README.md
+++ b/README.md
@@ -44,9 +44,8 @@ This repository is the official implementation of BLAKE3. It includes:
typical desktop hardware.
* The [C implementation](c), which like the Rust implementation includes
- SIMD code and runtime CPU feature detection on x86. Unlike the Rust
- implementation, it's [not currently multithreaded](c#multithreading). See
- [`c/README.md`](c/README.md).
+ SIMD code and runtime CPU feature detection on x86. The `BLAKE3_USE_TBB`
+ CMAKE option enables multithreading. See [`c/README.md`](c/README.md).
* The [Rust reference implementation](reference_impl/reference_impl.rs),
which is discussed in Section 5.1 of the [BLAKE3
diff --git a/c/CMakeLists.txt b/c/CMakeLists.txt
index 76c8947..ba88ed3 100644
--- a/c/CMakeLists.txt
+++ b/c/CMakeLists.txt
@@ -12,12 +12,20 @@ endif()
project(libblake3
VERSION 1.6.1
DESCRIPTION "BLAKE3 C implementation"
- LANGUAGES C ASM
+ LANGUAGES C CXX ASM
)
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
+
+option(BLAKE3_USE_TBB "Enable oneTBB parallelism" OFF)
+option(BLAKE3_FETCH_TBB "Allow fetching oneTBB from GitHub if not found on system" OFF)
+
+include(CTest)
include(FeatureSummary)
include(GNUInstallDirs)
+add_subdirectory(dependencies)
+
# architecture lists for which to enable assembly / SIMD sources
set(BLAKE3_AMD64_NAMES amd64 AMD64 x86_64)
set(BLAKE3_X86_NAMES i686 x86 X86)
@@ -138,6 +146,11 @@ set_target_properties(blake3 PROPERTIES
C_EXTENSIONS OFF
)
target_compile_features(blake3 PUBLIC c_std_99)
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.12)
+ target_compile_features(blake3 PUBLIC cxx_std_20)
+ # else: add it further below through `BLAKE3_CMAKE_CXXFLAGS_*`
+endif()
+
# ensure C_EXTENSIONS OFF is respected without overriding CMAKE_C_STANDARD
# which may be set by the user or toolchain file
if (NOT POLICY CMP0128 AND NOT DEFINED CMAKE_C_STANDARD)
@@ -204,6 +217,67 @@ else()
message(FATAL_ERROR "BLAKE3_SIMD_TYPE is set to an unknown value: '${BLAKE3_SIMD_TYPE}'")
endif()
+if(BLAKE3_USE_TBB)
+ find_package(TBB 2021.11.0 QUIET)
+ if(NOT TBB_FOUND AND NOT TARGET TBB::tbb)
+ message(WARNING
+ "oneTBB not found; disabling BLAKE3_USE_TBB\n"
+ "Enable BLAKE3_FETCH_TBB to automatically fetch and build oneTBB"
+ )
+ set(BLAKE3_USE_TBB OFF)
+ else()
+ target_sources(blake3
+ PRIVATE
+ blake3_tbb.cpp)
+ target_link_libraries(blake3
+ PUBLIC
+ # Make shared TBB a transitive dependency. The consuming program is technically not required
+ # to link TBB in order for libblake3 to function but we do this in order to prevent the
+ # possibility of multiple separate TBB runtimes being linked into a final program in case
+ # the consuming program also happens to already use TBB.
+ TBB::tbb)
+ target_compile_definitions(blake3
+ PUBLIC
+ BLAKE3_USE_TBB)
+ endif()
+endif()
+
+if(BLAKE3_USE_TBB)
+ # Define some scratch variables for building appropriate flags per compiler
+ if(CMAKE_VERSION VERSION_LESS 3.12)
+ set(APPEND BLAKE3_CXX_STANDARD_FLAGS_GNU -std=c++20)
+ set(APPEND BLAKE3_CXX_STANDARD_FLAGS_MSVC /std:c++20)
+ endif()
+ set(BLAKE3_CXXFLAGS_GNU "-fno-exceptions;-fno-rtti;${BLAKE3_CXX_STANDARD_FLAGS_GNU}" CACHE STRING "C++ flags used for compiling private BLAKE3 library components with GNU-like compiler frontends.")
+ set(BLAKE3_CXXFLAGS_MSVC "/EHs-c-;/GR;${BLAKE3_CXX_STANDARD_FLAGS_MSVC}" CACHE STRING "C++ flags used for compiling private BLAKE3 library components with MSVC-like compiler frontends.")
+ # Get the C++ compiler name without extension
+ get_filename_component(BLAKE3_CMAKE_CXX_COMPILER_NAME "${CMAKE_CXX_COMPILER}" NAME_WE)
+ # Strip any trailing versioning from the C++ compiler name
+ string(REGEX MATCH "^(clang\\+\\+|clang-cl)" BLAKE3_CMAKE_CXX_COMPILER_NAME "${BLAKE3_CMAKE_CXX_COMPILER_NAME}")
+
+ # TODO: Simplify with CMAKE_CXX_COMPILER_FRONTEND_VARIANT once min CMake version is 3.14.
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+ target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_GNU}>)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+ if(BLAKE3_CMAKE_CXX_COMPILER_NAME STREQUAL "clang++")
+ target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_GNU}>)
+ elseif(BLAKE3_CMAKE_CXX_COMPILER_NAME STREQUAL "clang-cl")
+ target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_MSVC}>)
+ endif()
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_GNU}>)
+ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
+ target_compile_options(blake3 PRIVATE $<$<COMPILE_LANGUAGE:CXX>:${BLAKE3_CXXFLAGS_MSVC}>)
+ endif()
+
+ # Undefine scratch variables
+ unset(BLAKE3_CXX_STANDARD_FLAGS_GNU)
+ unset(BLAKE3_CXX_STANDARD_FLAGS_MSVC)
+ unset(BLAKE3_CMAKE_CXX_COMPILER_NAME)
+ unset(BLAKE3_CXXFLAGS_GNU)
+ unset(BLAKE3_CXXFLAGS_MSVC)
+endif()
+
# cmake install support
install(FILES blake3.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
install(TARGETS blake3 EXPORT blake3-targets
@@ -267,4 +341,12 @@ install(FILES "${CMAKE_BINARY_DIR}/libblake3.pc"
add_feature_info("AMD64 assembly" BLAKE3_SIMD_AMD64_ASM "The library uses hand written amd64 SIMD assembly.")
add_feature_info("x86 SIMD intrinsics" BLAKE3_SIMD_X86_INTRINSICS "The library uses x86 SIMD intrinsics.")
add_feature_info("NEON SIMD intrinsics" BLAKE3_SIMD_NEON_INTRINSICS "The library uses NEON SIMD intrinsics.")
+add_feature_info("oneTBB parallelism" BLAKE3_USE_TBB "The library uses oneTBB parallelism.")
feature_summary(WHAT ENABLED_FEATURES)
+
+if(BLAKE3_EXAMPLES)
+ include(BLAKE3/Examples)
+endif()
+if(BLAKE3_TESTING)
+ include(BLAKE3/Testing)
+endif()
diff --git a/c/README.md b/c/README.md
index 965d8c7..c012233 100644
--- a/c/README.md
+++ b/c/README.md
@@ -84,6 +84,8 @@ Initialize a `blake3_hasher` in the default hashing mode.
---
+### Single-threaded update
+
```c
void blake3_hasher_update(
blake3_hasher *self,
@@ -91,7 +93,39 @@ void blake3_hasher_update(
size_t input_len);
```
-Add input to the hasher. This can be called any number of times.
+Add input to the hasher with single-threaded update. This can be called any number of times.
+
+### Multi-threaded update
+
+```c
+void blake3_hasher_update_tbb(
+ blake3_hasher *self,
+ const void *input,
+ size_t input_len);
+```
+
+NOTE: This function is only enabled when the library is compiled with CMake option `BLAKE3_USE_TBB`
+and when the oneTBB library is detected on the host system. See the building instructions for
+further details.
+
+NOTE: macOS and Linux users should probably install TBB through their package manager but Windows
+users, or users with special deployment scenarios, may wish to enable `BLAKE3_FETCH_TBB` in
+order to automatically fetch, build, and install TBB directly alongside BLAKE3.
+
+Add input to the hasher with multi-threaded update. This can be called any number of times.
+
+This update function uses [oneTBB](https://uxlfoundation.github.io/oneTBB/) task groups
+across which the input data is partitioned and dispatched for further processing.
+
+Input buffers as large as possible should be preferred in order to minimize additional
+overhead inherent in coordinating parallel tasks. If the input buffer is too small,
+the performance of this update function may be no faster than the single-threaded
+implementation or may even be slower.
+
+This implementation does not require configuration of thread resources and will use as
+many cores as possible by default. If the update function is used within the context of a
+larger program which uses the [oneTBB] API, more fine-grained control of resources is
+possible.
---
@@ -184,10 +218,7 @@ void blake3_hasher_reset(
Reset the hasher to its initial state, prior to any calls to
`blake3_hasher_update`. Currently this is no different from calling
-`blake3_hasher_init` or similar again. However, if this implementation gains
-multithreading support in the future, and if `blake3_hasher` holds (optional)
-threading resources, this function will reuse those resources. Until then, this
-is mainly for feature compatibility with the Rust implementation.
+`blake3_hasher_init` or similar again.
# Security Notes
@@ -207,12 +238,57 @@ smell](https://en.wikipedia.org/wiki/Design_smell) in any case.
# Building
-This implementation is just C and assembly files. It doesn't include a
-public-facing build system. (The `Makefile` in this directory is only
-for testing.) Instead, the intention is that you can include these files
-in whatever build system you're already using. This section describes
-the commands your build system should execute, or which you can execute
-by hand. Note that these steps may change in future versions.
+The easiest and most complete method of compiling the BLAKE3 library is with CMake.
+
+This is the method described in the next section.
+
+Toward the end of the building section there are more in depth notes about compiling manually and
+things that are useful to understand if you need to adapt the implementation to some pre-existing
+custom build system.
+
+## CMake
+
+The BLAKE3 library requires a minimum version of CMake 3.9.
+
+The following invocation will compile and install `libblake3`:
+
+With recent CMake:
+
+```bash
+cmake -S c -B c/build "-DCMAKE_INSTALL_PREFIX=/usr/local"
+cmake --build c/build --target install
+```
+
+With an older CMake:
+
+```bash
+cd c
+mkdir build
+cd build
+cmake .. "-DCMAKE_INSTALL_PREFIX=/usr/local"
+cmake --build . --target install
+```
+
+The following options are available when compiling with CMake:
+
+- `BLAKE3_USE_TBB`: Enable oneTBB parallelism (Requires a C++20 capable compiler)
+- `BLAKE3_FETCH_TBB`: Allow fetching oneTBB from GitHub (only if not found on system)
+- `BLAKE3_EXAMPLES`: Compile and install example programs
+
+These can be enabled in the following way:
+
+```bash
+cmake -S c -B c/build "-DCMAKE_INSTALL_PREFIX=/usr/local" -DCMAKE_USE_TBB=1 -DCMAKE_FETCH_TBB=1
+```
+
+## Building manually
+
+This implementation is mostly C and assembly files with some minor parts in C++ for optional
+features.
+
+The intention is that the implementation is simply enough that it can be easily compiled by hand
+without a build system or the sources adapted to whatever custom build system you may happen to be
+using without much difficulty.
## x86
@@ -300,6 +376,23 @@ in call to always_inline ‘vaddq_u32’: target specific option mismatch
...then you may need to add something like `-mfpu=neon-vfpv4
-mfloat-abi=hard`.
+## oneTBB-based multi-threading
+
+Optional multi-threading support with performance similar to the Rust Rayon implementation is
+available when using the oneTBB library and compiling the optional C++ support file:
+
+```bash
+g++ -c -O3 -fno-exceptions -fno-rtti -DBLAKE3_USE_TBB $(pkg-config --libs --cflags tbb) -o blake3_tbb.o blake3_tbb.cpp
+gcc -O3 -o example -lstdc++ -DBLAKE3_USE_TBB $(pkg-config --libs --cflags tbb) blake3_tbb.o \
+ example.c blake3.c blake3_dispatch.c blake3_portable.c \
+ blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S blake3_avx512_x86-64_unix.S
+```
+
+NOTE: Compiling `blake3_tbb.cpp` with C++ exceptions _disabled_ is required in order to satisfy the
+behavior that this implementation expects. The public API methods with external C linkage are marked
+`noexcept`. Attempting to compile this file with exceptions _enabled_ will fail and emit a static
+assertion message. Compiling with RTTI disabled is not mandatory but recommended for code size.
+
## Other Platforms
The portable implementation should work on most other architectures. For
@@ -308,14 +401,3 @@ example:
```bash
gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c
```
-
-# Multithreading
-
-Unlike the Rust implementation, the C implementation doesn't currently support
-multithreading. A future version of this library could add support by taking an
-optional dependency on OpenMP or similar. Alternatively, we could expose a
-lower-level API to allow callers to implement concurrency themselves. The
-former would be more convenient and less error-prone, but the latter would give
-callers the maximum possible amount of control. The best choice here depends on
-the specific use case, so if you have a use case for multithreaded hashing in
-C, please file a GitHub issue and let us know.
diff --git a/c/blake3.c b/c/blake3.c
index 7e6d01e..f21973e 100644
--- a/c/blake3.c
+++ b/c/blake3.c
@@ -265,11 +265,10 @@ INLINE size_t compress_parents_parallel(const uint8_t *child_chaining_values,
// Why not just have the caller split the input on the first update(), instead
// of implementing this special rule? Because we don't want to limit SIMD or
// multi-threading parallelism for that update().
-static size_t blake3_compress_subtree_wide(const uint8_t *input,
- size_t input_len,
- const uint32_t key[8],
- uint64_t chunk_counter,
- uint8_t flags, uint8_t *out) {
+size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len,
+ const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags,
+ uint8_t *out, bool use_tbb) {
// Note that the single chunk case does *not* bump the SIMD degree up to 2
// when it is 1. If this implementation adds multi-threading in the future,
// this gives us the option of multi-threading even the 2-chunk case, which
@@ -303,12 +302,24 @@ static size_t blake3_compress_subtree_wide(const uint8_t *input,
}
uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
- // Recurse! If this implementation adds multi-threading support in the
- // future, this is where it will go.
- size_t left_n = blake3_compress_subtree_wide(input, left_input_len, key,
- chunk_counter, flags, cv_array);
- size_t right_n = blake3_compress_subtree_wide(
- right_input, right_input_len, key, right_chunk_counter, flags, right_cvs);
+ // Recurse!
+ size_t left_n = -1;
+ size_t right_n = -1;
+
+#if defined(BLAKE3_USE_TBB)
+ blake3_compress_subtree_wide_join_tbb(
+ key, flags, use_tbb,
+ // left-hand side
+ input, left_input_len, chunk_counter, cv_array, &left_n,
+ // right-hand side
+ right_input, right_input_len, right_chunk_counter, right_cvs, &right_n);
+#else
+ left_n = blake3_compress_subtree_wide(
+ input, left_input_len, key, chunk_counter, flags, cv_array, use_tbb);
+ right_n = blake3_compress_subtree_wide(right_input, right_input_len, key,
+ right_chunk_counter, flags, right_cvs,
+ use_tbb);
+#endif // BLAKE3_USE_TBB
// The special case again. If simd_degree=1, then we'll have left_n=1 and
// right_n=1. Rather than compressing them into a single output, return
@@ -334,16 +345,18 @@ static size_t blake3_compress_subtree_wide(const uint8_t *input,
//
// As with compress_subtree_wide(), this function is not used on inputs of 1
// chunk or less. That's a different codepath.
-INLINE void compress_subtree_to_parent_node(
- const uint8_t *input, size_t input_len, const uint32_t key[8],
- uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) {
+INLINE void
+compress_subtree_to_parent_node(const uint8_t *input, size_t input_len,
+ const uint32_t key[8], uint64_t chunk_counter,
+ uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN],
+ bool use_tbb) {
#if defined(BLAKE3_TESTING)
assert(input_len > BLAKE3_CHUNK_LEN);
#endif
uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
size_t num_cvs = blake3_compress_subtree_wide(input, input_len, key,
- chunk_counter, flags, cv_array);
+ chunk_counter, flags, cv_array, use_tbb);
assert(num_cvs <= MAX_SIMD_DEGREE_OR_2);
// The following loop never executes when MAX_SIMD_DEGREE_OR_2 is 2, because
// as we just asserted, num_cvs will always be <=2 in that case. But GCC
@@ -459,8 +472,8 @@ INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
self->cv_stack_len += 1;
}
-void blake3_hasher_update(blake3_hasher *self, const void *input,
- size_t input_len) {
+INLINE void blake3_hasher_update_base(blake3_hasher *self, const void *input,
+ size_t input_len, bool use_tbb) {
// Explicitly checking for zero avoids causing UB by passing a null pointer
// to memcpy. This comes up in practice with things like:
// std::vector<uint8_t> v;
@@ -546,7 +559,7 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
compress_subtree_to_parent_node(input_bytes, subtree_len, self->key,
self->chunk.chunk_counter,
- self->chunk.flags, cv_pair);
+ self->chunk.flags, cv_pair, use_tbb);
hasher_push_cv(self, cv_pair, self->chunk.chunk_counter);
hasher_push_cv(self, &cv_pair[BLAKE3_OUT_LEN],
self->chunk.chunk_counter + (subtree_chunks / 2));
@@ -568,6 +581,20 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
}
}
+void blake3_hasher_update(blake3_hasher *self, const void *input,
+ size_t input_len) {
+ bool use_tbb = false;
+ blake3_hasher_update_base(self, input, input_len, use_tbb);
+}
+
+#if defined(BLAKE3_USE_TBB)
+void blake3_hasher_update_tbb(blake3_hasher *self, const void *input,
+ size_t input_len) {
+ bool use_tbb = true;
+ blake3_hasher_update_base(self, input, input_len, use_tbb);
+}
+#endif // BLAKE3_USE_TBB
+
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
size_t out_len) {
blake3_hasher_finalize_seek(self, 0, out, out_len);
diff --git a/c/blake3.h b/c/blake3.h
index d917503..14fdfdd 100644
--- a/c/blake3.h
+++ b/c/blake3.h
@@ -69,6 +69,10 @@ BLAKE3_API void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const voi
size_t context_len);
BLAKE3_API void blake3_hasher_update(blake3_hasher *self, const void *input,
size_t input_len);
+#if defined(BLAKE3_USE_TBB)
+BLAKE3_API void blake3_hasher_update_tbb(blake3_hasher *self, const void *input,
+ size_t input_len);
+#endif // BLAKE3_USE_TBB
BLAKE3_API void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
size_t out_len);
BLAKE3_API void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index 51d792a..facd599 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -9,6 +9,10 @@
#include "blake3.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
// internal flags
enum blake3_flags {
CHUNK_START = 1 << 0,
@@ -28,6 +32,12 @@ enum blake3_flags {
#define INLINE static inline __attribute__((always_inline))
#endif
+#ifdef __cplusplus
+#define NOEXCEPT noexcept
+#else
+#define NOEXCEPT
+#endif
+
#if (defined(__x86_64__) || defined(_M_X64)) && !defined(_M_ARM64EC)
#define IS_X86
#define IS_X86_64
@@ -210,6 +220,22 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
size_t blake3_simd_degree(void);
+BLAKE3_PRIVATE size_t blake3_compress_subtree_wide(const uint8_t *input, size_t input_len,
+ const uint32_t key[8],
+ uint64_t chunk_counter, uint8_t flags,
+ uint8_t *out, bool use_tbb);
+
+#if defined(BLAKE3_USE_TBB)
+BLAKE3_PRIVATE void blake3_compress_subtree_wide_join_tbb(
+ // shared params
+ const uint32_t key[8], uint8_t flags, bool use_tbb,
+ // left-hand side params
+ const uint8_t *l_input, size_t l_input_len, uint64_t l_chunk_counter,
+ uint8_t *l_cvs, size_t *l_n,
+ // right-hand side params
+ const uint8_t *r_input, size_t r_input_len, uint64_t r_chunk_counter,
+ uint8_t *r_cvs, size_t *r_n) NOEXCEPT;
+#endif
// Declarations for implementation-specific functions.
void blake3_compress_in_place_portable(uint32_t cv[8],
@@ -300,5 +326,8 @@ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
uint8_t flags_end, uint8_t *out);
#endif
+#ifdef __cplusplus
+}
+#endif
#endif /* BLAKE3_IMPL_H */
diff --git a/c/blake3_tbb.cpp b/c/blake3_tbb.cpp
new file mode 100644
index 0000000..c2bc2db
--- /dev/null
+++ b/c/blake3_tbb.cpp
@@ -0,0 +1,37 @@
+#include <cstddef>
+#include <cstdint>
+
+#include <oneapi/tbb/parallel_invoke.h>
+
+#include "blake3_impl.h"
+
+static_assert(TBB_USE_EXCEPTIONS == 0,
+ "This file should be compiled with C++ exceptions disabled.");
+
+extern "C" void blake3_compress_subtree_wide_join_tbb(
+ // shared params
+ const uint32_t key[8], uint8_t flags, bool use_tbb,
+ // left-hand side params
+ const uint8_t *l_input, size_t l_input_len, uint64_t l_chunk_counter,
+ uint8_t *l_cvs, size_t *l_n,
+ // right-hand side params
+ const uint8_t *r_input, size_t r_input_len, uint64_t r_chunk_counter,
+ uint8_t *r_cvs, size_t *r_n) noexcept {
+ if (!use_tbb) {
+ *l_n = blake3_compress_subtree_wide(l_input, l_input_len, key,
+ l_chunk_counter, flags, l_cvs, use_tbb);
+ *r_n = blake3_compress_subtree_wide(r_input, r_input_len, key,
+ r_chunk_counter, flags, r_cvs, use_tbb);
+ return;
+ }
+
+ oneapi::tbb::parallel_invoke(
+ [=]() {
+ *l_n = blake3_compress_subtree_wide(
+ l_input, l_input_len, key, l_chunk_counter, flags, l_cvs, use_tbb);
+ },
+ [=]() {
+ *r_n = blake3_compress_subtree_wide(
+ r_input, r_input_len, key, r_chunk_counter, flags, r_cvs, use_tbb);
+ });
+}
diff --git a/c/cmake/BLAKE3/ContinuousIntegration.cmake b/c/cmake/BLAKE3/ContinuousIntegration.cmake
new file mode 100644
index 0000000..57a1f18
--- /dev/null
+++ b/c/cmake/BLAKE3/ContinuousIntegration.cmake
@@ -0,0 +1,235 @@
+cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
+
+if(BUILD_SHARED_LIBS)
+ message(FATAL_ERROR "BUILD_SHARED_LIBS is incompatible with BLAKE3_TESTING_CI")
+endif()
+
+include(CTest)
+
+# Declare a testing specific variant of the `blake3` library target.
+#
+# We use a separate library target in order to be able to perform compilation with various
+# combinations of features which are too noisy to specify in the main CMake config as options for
+# the normal `blake3` target.
+#
+# Initially this target has no properties but eventually we will populate them by copying all of the
+# relevant properties from the normal `blake3` target.
+add_library(blake3-testing
+ blake3.c
+ blake3_dispatch.c
+ blake3_portable.c
+)
+
+if(BLAKE3_USE_TBB AND TBB_FOUND)
+ target_sources(blake3-testing
+ PRIVATE
+ blake3_tbb.cpp)
+endif()
+
+if(BLAKE3_SIMD_TYPE STREQUAL "amd64-asm")
+ # Conditionally add amd64 asm files to `blake3-testing` sources
+ if(MSVC)
+ if(NOT BLAKE3_NO_AVX2)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx2_x86-64_windows_msvc.asm)
+ endif()
+ if(NOT BLAKE3_NO_AVX512)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx512_x86-64_windows_msvc.asm)
+ endif()
+ if(NOT BLAKE3_NO_SSE2)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse2_x86-64_windows_msvc.asm)
+ endif()
+ if(NOT BLAKE3_NO_SSE41)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse41_x86-64_windows_msvc.asm)
+ endif()
+ elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU"
+ OR CMAKE_C_COMPILER_ID STREQUAL "Clang"
+ OR CMAKE_C_COMPILER_ID STREQUAL "AppleClang")
+ if (WIN32)
+ if(NOT BLAKE3_NO_AVX2)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx2_x86-64_windows_gnu.S)
+ endif()
+ if(NOT BLAKE3_NO_AVX512)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx512_x86-64_windows_gnu.S)
+ endif()
+ if(NOT BLAKE3_NO_SSE2)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse2_x86-64_windows_gnu.S)
+ endif()
+ if(NOT BLAKE3_NO_SSE41)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse41_x86-64_windows_gnu.S)
+ endif()
+ elseif(UNIX)
+ if(NOT BLAKE3_NO_AVX2)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx2_x86-64_unix.S)
+ endif()
+ if(NOT BLAKE3_NO_AVX512)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_avx512_x86-64_unix.S)
+ endif()
+ if(NOT BLAKE3_NO_SSE2)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse2_x86-64_unix.S)
+ endif()
+ if(NOT BLAKE3_NO_SSE41)
+ list(APPEND BLAKE3_TESTING_AMD64_ASM_SOURCES blake3_sse41_x86-64_unix.S)
+ endif()
+ endif()
+ endif()
+ target_sources(blake3-testing PRIVATE ${BLAKE3_AMD64_ASM_SOURCES})
+elseif(BLAKE3_SIMD_TYPE STREQUAL "x86-intrinsics")
+ # Conditionally add amd64 C files to `blake3-testing` sources
+ if (NOT DEFINED BLAKE3_CFLAGS_SSE2
+ OR NOT DEFINED BLAKE3_CFLAGS_SSE4.1
+ OR NOT DEFINED BLAKE3_CFLAGS_AVX2
+ OR NOT DEFINED BLAKE3_CFLAGS_AVX512)
+ message(WARNING "BLAKE3_SIMD_TYPE is set to 'x86-intrinsics' but no compiler flags are available for the target architecture.")
+ else()
+ set(BLAKE3_SIMD_X86_INTRINSICS ON)
+ endif()
+
+ if(NOT BLAKE3_NO_AVX2)
+ target_sources(blake3-testing PRIVATE blake3_avx2.c)
+ set_source_files_properties(blake3_avx2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX2}")
+ endif()
+ if(NOT BLAKE3_NO_AVX512)
+ target_sources(blake3-testing PRIVATE blake3_avx512.c)
+ set_source_files_properties(blake3_avx512.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_AVX512}")
+ endif()
+ if(NOT BLAKE3_NO_SSE2)
+ target_sources(blake3-testing PRIVATE blake3_sse2.c)
+ set_source_files_properties(blake3_sse2.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE2}")
+ endif()
+ if(NOT BLAKE3_NO_SSE41)
+ target_sources(blake3-testing PRIVATE blake3_sse41.c)
+ set_source_files_properties(blake3_sse41.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_SSE4.1}")
+ endif()
+
+elseif(BLAKE3_SIMD_TYPE STREQUAL "neon-intrinsics")
+ # Conditionally add neon C files to `blake3-testing` sources
+
+ target_sources(blake3-testing PRIVATE
+ blake3_neon.c
+ )
+ target_compile_definitions(blake3-testing PRIVATE
+ BLAKE3_USE_NEON=1
+ )
+
+ if (DEFINED BLAKE3_CFLAGS_NEON)
+ set_source_files_properties(blake3_neon.c PROPERTIES COMPILE_FLAGS "${BLAKE3_CFLAGS_NEON}")
+ endif()
+
+elseif(BLAKE3_SIMD_TYPE STREQUAL "none")
+ # Disable neon if simd type is "none". We check for individual amd64 features further below.
+
+ target_compile_definitions(blake3-testing PRIVATE
+ BLAKE3_USE_NEON=0
+ )
+
+endif()
+
+if(BLAKE3_NO_AVX2)
+ target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_AVX2)
+endif()
+if(BLAKE3_NO_AVX512)
+ target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_AVX512)
+endif()
+if(BLAKE3_NO_SSE2)
+ target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_SSE2)
+endif()
+if(BLAKE3_NO_SSE41)
+ target_compile_definitions(blake3-testing PRIVATE BLAKE3_NO_SSE41)
+endif()
+
+target_compile_definitions(blake3-testing PUBLIC BLAKE3_TESTING)
+
+get_target_property(BLAKE3_COMPILE_DEFINITIONS blake3 COMPILE_DEFINITIONS)
+if(BLAKE3_COMPILE_DEFINITIONS)
+ target_compile_definitions(blake3-testing PUBLIC
+ ${BLAKE3_COMPILE_DEFINITIONS})
+endif()
+
+get_target_property(BLAKE3_COMPILE_OPTIONS blake3 COMPILE_OPTIONS)
+if(BLAKE3_COMPILE_OPTIONS)
+ target_compile_options(blake3-testing PRIVATE
+ ${BLAKE3_COMPILE_OPTIONS}
+ -O3
+ -Wall
+ -Wextra
+ -pedantic
+ -fstack-protector-strong
+ -D_FORTIFY_SOURCE=2
+ -fPIE
+ -fvisibility=hidden
+ -fsanitize=address,undefined
+ )
+endif()
+
+get_target_property(BLAKE3_INCLUDE_DIRECTORIES blake3 INCLUDE_DIRECTORIES)
+if(BLAKE3_INCLUDE_DIRECTORIES)
+ target_include_directories(blake3-testing PUBLIC
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+ $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+ )
+endif()
+
+get_target_property(BLAKE3_LINK_LIBRARIES blake3 LINK_LIBRARIES)
+if(BLAKE3_LINK_LIBRARIES)
+ target_link_libraries(blake3-testing PRIVATE ${BLAKE3_LINK_LIBRARIES})
+endif()
+
+get_target_property(BLAKE3_LINK_OPTIONS blake3 LINK_OPTIONS)
+if(BLAKE3_LINK_OPTIONS)
+ target_link_options(blake3-testing PRIVATE
+ ${BLAKE3_LINK_OPTIONS}
+ -fsanitize=address,undefined
+ -pie
+ -Wl,-z,relro,-z,now
+ )
+endif()
+
+# test asm target
+add_executable(blake3-asm-test
+ main.c
+)
+set_target_properties(blake3-asm-test PROPERTIES
+ OUTPUT_NAME blake3
+ RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR})
+target_link_libraries(blake3-asm-test PRIVATE blake3-testing)
+target_compile_definitions(blake3-asm-test PRIVATE BLAKE3_TESTING)
+target_compile_options(blake3-asm-test PRIVATE
+ -O3
+ -Wall
+ -Wextra
+ -pedantic
+ -fstack-protector-strong
+ -D_FORTIFY_SOURCE=2
+ -fPIE
+ -fvisibility=hidden
+ -fsanitize=address,undefined
+)
+target_link_options(blake3-asm-test PRIVATE
+ -fsanitize=address,undefined
+ -pie
+ -Wl,-z,relro,-z,now
+)
+
+add_test(NAME blake3-testing
+ COMMAND "${CMAKE_CTEST_COMMAND}"
+ --verbose
+ --extra-verbose
+ --build-and-test "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}"
+ --build-generator "${CMAKE_GENERATOR}"
+ --build-makeprogram "${CMAKE_MAKE_PROGRAM}"
+ --build-project libblake3
+ --build-target blake3-asm-test
+ --build-options
+ --fresh
+ "-DBUILD_SHARED_LIBS=${BUILD_SHARED_LIBS}"
+ "-DBLAKE3_TESTING=${BLAKE3_TESTING}"
+ "-DBLAKE3_TESTING_CI=${BLAKE3_TESTING_CI}"
+ "-DBLAKE3_USE_TBB=${BLAKE3_USE_TBB}"
+ "-DBLAKE3_SIMD_TYPE=${BLAKE3_SIMD_TYPE}"
+ "-DBLAKE3_NO_SSE2=${BLAKE3_NO_SSE2}"
+ "-DBLAKE3_NO_SSE41=${BLAKE3_NO_SSE41}"
+ "-DBLAKE3_NO_AVX2=${BLAKE3_NO_AVX2}"
+ "-DBLAKE3_NO_AVX512=${BLAKE3_NO_AVX512}"
+ --test-command
+ "${CMAKE_SOURCE_DIR}/test.py"
+ )
diff --git a/c/cmake/BLAKE3/Examples.cmake b/c/cmake/BLAKE3/Examples.cmake
new file mode 100644
index 0000000..8911820
--- /dev/null
+++ b/c/cmake/BLAKE3/Examples.cmake
@@ -0,0 +1,6 @@
+if(NOT WIN32)
+ add_executable(blake3-example
+ example.c)
+ target_link_libraries(blake3-example PRIVATE blake3)
+ install(TARGETS blake3-example)
+endif()
diff --git a/c/cmake/BLAKE3/Testing.cmake b/c/cmake/BLAKE3/Testing.cmake
new file mode 100644
index 0000000..1c22baa
--- /dev/null
+++ b/c/cmake/BLAKE3/Testing.cmake
@@ -0,0 +1,3 @@
+if(BLAKE3_TESTING_CI)
+ include(BLAKE3/ContinuousIntegration)
+endif() \ No newline at end of file
diff --git a/c/dependencies/CMakeLists.txt b/c/dependencies/CMakeLists.txt
new file mode 100644
index 0000000..4382353
--- /dev/null
+++ b/c/dependencies/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(BLAKE3_USE_TBB)
+ add_subdirectory(tbb)
+endif()
diff --git a/c/dependencies/tbb/CMakeLists.txt b/c/dependencies/tbb/CMakeLists.txt
new file mode 100644
index 0000000..0f51395
--- /dev/null
+++ b/c/dependencies/tbb/CMakeLists.txt
@@ -0,0 +1,33 @@
+if(WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ message(FATAL_ERROR "BLAKE3_USE_TBB requires building with Clang or MSVC on Windows")
+ set(BLAKE3_USE_TBB OFF)
+endif()
+
+find_package(TBB 2021.11.0 QUIET)
+
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.11)
+ include(FetchContent)
+
+ if(NOT TBB_FOUND AND BLAKE3_FETCH_TBB)
+ set(CMAKE_C_STANDARD 99)
+ set(CMAKE_C_EXTENSIONS OFF)
+
+ set(CMAKE_CXX_STANDARD 20)
+ set(CMAKE_CXX_EXTENSIONS ON)
+
+ option(TBB_TEST OFF "")
+ option(TBBMALLOC_BUILD OFF "")
+
+ mark_as_advanced(TBB_TEST)
+ mark_as_advanced(TBBMALLOC_BUILD)
+
+ FetchContent_Declare(
+ TBB
+ GIT_REPOSITORY https://github.com/uxlfoundation/oneTBB
+ GIT_TAG 0c0ff192a2304e114bc9e6557582dfba101360ff # v2022.0.0
+ GIT_SHALLOW TRUE
+ )
+
+ FetchContent_MakeAvailable(TBB)
+ endif()
+endif()