diff options
| -rw-r--r-- | .github/workflows/ci.yml | 3 | ||||
| -rw-r--r-- | c/.gitignore | 1 | ||||
| -rw-r--r-- | c/README.md | 55 | ||||
| -rw-r--r-- | c/cmake/BLAKE3/Examples.cmake | 7 | ||||
| -rw-r--r-- | c/example.c | 3 | ||||
| -rw-r--r-- | c/example_tbb.c | 52 |
6 files changed, 88 insertions, 33 deletions
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c859909..0ed8325 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -271,7 +271,7 @@ jobs: cat c/build/Testing/Temporary/LastTest.log # Build the example with TBB disabled/enabled. - run: | - cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON -DBLAKE3_EXAMPLES=ON + cmake --fresh -S c -B c/build -G Ninja -DBLAKE3_TESTING=ON -DBLAKE3_TESTING_CI=ON -DBLAKE3_EXAMPLES=ON "-DBLAKE3_USE_TBB=${{ matrix.use_tbb }}" cmake --build c/build --target blake3-example # Note that this jobs builds AArch64 binaries from an x86_64 host. @@ -376,6 +376,7 @@ jobs: ${{ matrix.os != 'windows-latest' || '& "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/Common7/Tools/Launch-VsDevShell.ps1" -Arch amd64 -SkipAutomaticLocation' }} cmake -S c -B c/build -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/target -DCMAKE_C_COMPILER=${{ matrix.toolchain.cc }} -DCMAKE_CXX_COMPILER=${{ matrix.toolchain.cxx }} -DBLAKE3_USE_TBB=${{ matrix.use_tbb }} -DBLAKE3_FETCH_TBB=${{ matrix.os == 'windows-latest' && 'YES' || 'NO' }} -DBLAKE3_EXAMPLES=ON cmake --build c/build --target install + cmake_3-9_build: name: CMake 3.9.6 ubuntu-latest runs-on: ubuntu-latest diff --git a/c/.gitignore b/c/.gitignore index ff52a80..6385bd3 100644 --- a/c/.gitignore +++ b/c/.gitignore @@ -1,5 +1,6 @@ blake3 example +example_tbb build/ *.o diff --git a/c/README.md b/c/README.md index 8a108c6..6bd6add 100644 --- a/c/README.md +++ b/c/README.md @@ -9,7 +9,6 @@ result: #include "blake3.h" #include <errno.h> #include <stdio.h> -#include <stdlib.h> #include <string.h> #include <unistd.h> @@ -28,7 +27,7 @@ int main(void) { break; // end of file } else { fprintf(stderr, "read failed: %s\n", strerror(errno)); - exit(1); + return 1; } } @@ -188,13 +187,12 @@ than `blake3_hasher_update` for inputs under 128 KiB. That threshold varies quite a lot across different processors, and it's important to benchmark your specific use case. -Hashing large files with this function typically requires +Hashing large files with this function usually requires [memory-mapping](https://en.wikipedia.org/wiki/Memory-mapped_file), since -copying a file into memory is a single-threaded operation that takes longer -than hashing the resulting buffer with this function. Note that hashing a -memory-mapped file leads to a lot of "random" disk reads, which perform well on -SSD but _very poorly_ on spinning disks. Again it's important to benchmark your -specific use case. +reading a file into memory in a single-threaded loop takes longer than hashing +the resulting buffer. Note that hashing a memory-mapped file with this function +produces a "random" pattern of disk reads, which can be slow on spinning disks. +Again it's important to benchmark your specific use case. This implementation doesn't require configuration of thread resources and will use as many cores as possible by default. More fine-grained control of @@ -375,34 +373,31 @@ in call to always_inline ‘vaddq_u32’: target specific option mismatch ...then you may need to add something like `-mfpu=neon-vfpv4 -mfloat-abi=hard`. -### oneTBB-based multi-threading +### Other Platforms -Optional multi-threading support with performance similar to [the Rust Rayon -implementation](https://docs.rs/blake3/latest/blake3/struct.Hasher.html#method.update_rayon) -is available when using the oneTBB library and compiling the optional C++ -support file: +The portable implementation should work on most other architectures. For +example: ```bash -g++ -c -O3 -fno-exceptions -fno-rtti -DBLAKE3_USE_TBB $(pkg-config --libs --cflags tbb) -o blake3_tbb.o blake3_tbb.cpp -gcc -O3 -o example -lstdc++ -DBLAKE3_USE_TBB $(pkg-config --libs --cflags tbb) blake3_tbb.o \ - example.c blake3.c blake3_dispatch.c blake3_portable.c \ - blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S blake3_avx512_x86-64_unix.S +gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c ``` -Note that while this _builds_ the multithreaded implementation, `example.c` -doesn't _use_ multithreading, because it doesn't call -`blake3_hasher_update_tbb`. - -NOTE: Compiling `blake3_tbb.cpp` with C++ exceptions _disabled_ is required in order to satisfy the -behavior that this implementation expects. The public API methods with external C linkage are marked -`noexcept`. Attempting to compile this file with exceptions _enabled_ will fail and emit a static -assertion message. Compiling with RTTI disabled is not mandatory but recommended for code size. +### Multithreading -### Other Platforms - -The portable implementation should work on most other architectures. For -example: +Multithreading is available using [oneTBB], by compiling the optional C++ +support file [`blake3_tbb.cpp`](./blake3_tbb.cpp). For an example of using +`mmap` (non-Windows) and `blake3_hasher_update_tbb` to get large-file +performance on par with [`b3sum`](../b3sum), see +[`example_tbb.c`](./example_tbb.c). You can build it like this: ```bash -gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c +g++ -c -O3 -fno-exceptions -fno-rtti -DBLAKE3_USE_TBB -o blake3_tbb.o blake3_tbb.cpp +gcc -O3 -o example_tbb -lstdc++ -ltbb -DBLAKE3_USE_TBB blake3_tbb.o example_tbb.c blake3.c \ + blake3_dispatch.c blake3_portable.c blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S \ + blake3_avx2_x86-64_unix.S blake3_avx512_x86-64_unix.S ``` + +NOTE: `-fno-exceptions` or equivalent is required to compile `blake3_tbb.cpp`, +and public API methods with external C linkage are marked `noexcept`. Compiling +that file with exceptions enabled will fail. Compiling with RTTI disabled isn't +required but is recommended for code size. diff --git a/c/cmake/BLAKE3/Examples.cmake b/c/cmake/BLAKE3/Examples.cmake index 8911820..8f0edc5 100644 --- a/c/cmake/BLAKE3/Examples.cmake +++ b/c/cmake/BLAKE3/Examples.cmake @@ -3,4 +3,11 @@ if(NOT WIN32) example.c) target_link_libraries(blake3-example PRIVATE blake3) install(TARGETS blake3-example) + + if(BLAKE3_USE_TBB) + add_executable(blake3-example-tbb + example_tbb.c) + target_link_libraries(blake3-example-tbb PRIVATE blake3) + install(TARGETS blake3-example-tbb) + endif() endif() diff --git a/c/example.c b/c/example.c index ee8430b..41a707e 100644 --- a/c/example.c +++ b/c/example.c @@ -1,7 +1,6 @@ #include "blake3.h" #include <errno.h> #include <stdio.h> -#include <stdlib.h> #include <string.h> #include <unistd.h> @@ -20,7 +19,7 @@ int main(void) { break; // end of file } else { fprintf(stderr, "read failed: %s\n", strerror(errno)); - exit(1); + return 1; } } diff --git a/c/example_tbb.c b/c/example_tbb.c new file mode 100644 index 0000000..2b02fbe --- /dev/null +++ b/c/example_tbb.c @@ -0,0 +1,52 @@ +#include "blake3.h" +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/stat.h> + +int main(int argc, char **argv) { + // For each filepath argument, memory map it and hash it. + for (int i = 1; i < argc; i++) { + // Memory map the file. + int fd = open(argv[i], O_RDONLY); + if (fd == -1) { + fprintf(stderr, "open failed: %s\n", strerror(errno)); + return 1; + } + struct stat statbuf; + if (fstat(fd, &statbuf) == -1) { + fprintf(stderr, "stat failed: %s\n", strerror(errno)); + return 1; + } + void *mapped = mmap(NULL, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (mapped == MAP_FAILED) { + fprintf(stderr, "mmap failed: %s\n", strerror(errno)); + return 1; + } + + // Initialize the hasher. + blake3_hasher hasher; + blake3_hasher_init(&hasher); + + // Hash the mapped file using multiple threads. + blake3_hasher_update_tbb(&hasher, mapped, statbuf.st_size); + + // Unmap the file. + if (munmap(mapped, statbuf.st_size) == -1) { + fprintf(stderr, "munmap failed: %s\n", strerror(errno)); + return 1; + } + + // Finalize the hash. BLAKE3_OUT_LEN is the default output length, 32 bytes. + uint8_t output[BLAKE3_OUT_LEN]; + blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN); + + // Print the hash as hexadecimal. + for (size_t i = 0; i < BLAKE3_OUT_LEN; i++) { + printf("%02x", output[i]); + } + printf("\n"); + } +} |
