diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6d204ec4..e75fc81c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,7 +32,6 @@ jobs: version-file: "VERSION" update-version-in: | Cargo.toml:^version = "(\d+\.\d+\.\d+)" - Cargo.lock:name = "simsimd"\nversion = "(\d+\.\d+\.\d+)" package.json:"version": "(\d+\.\d+\.\d+)" CMakeLists.txt:VERSION (\d+\.\d+\.\d+) update-major-version-in: | diff --git a/.gitignore b/.gitignore index 744e6183..4745d5e2 100644 --- a/.gitignore +++ b/.gitignore @@ -69,3 +69,7 @@ target/ # perfplot charts *.png +\n + +# Library crate: don't commit lockfile +Cargo.lock diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e06ac137..b56902d8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -49,7 +49,7 @@ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 ``` -To compile with the default Apple Clang on MacOS, use: +To compile with the default Apple Clang on macOS, use: ```sh brew install openblas @@ -63,8 +63,8 @@ cmake -D CMAKE_BUILD_TYPE=Release \ cmake --build build_release --config Release ``` -On MacOS it's recommended to use Homebrew and install Clang, as opposed to "Apple Clang". -Replacing the default compiler across the entire system is not recommended on MacOS, as it may break the system, but you can pass it as an environment variable: +On macOS it's recommended to use Homebrew and install Clang, as opposed to "Apple Clang". +Replacing the default compiler across the entire system is not recommended on macOS, as it may break the system, but you can pass it as an environment variable: ```sh brew install llvm openblas @@ -124,7 +124,7 @@ Here, `-s` will output the logs. The `-x` will stop on the first failure. The `-Wd` will silence overflows and runtime warnings. -When building on MacOS, same as with C/C++, use non-Apple Clang version: +When building on macOS, same as with C/C++, use non-Apple Clang version: ```sh brew install llvm @@ -174,7 +174,7 @@ $ python scripts/bench_vectors.py --help Before merging your changes you may want to test your changes against the entire matrix of Python versions USearch supports. -For that you need the `cibuildwheel`, which is tricky to use on MacOS and Windows, as it would target just the local environment. +For that you need the `cibuildwheel`, which is tricky to use on macOS and Windows, as it would target just the local environment. Still, if you have Docker running on any desktop OS, you can use it to build and test the Python bindings for all Python versions for Linux: ```sh @@ -184,7 +184,7 @@ cibuildwheel --platform linux # works on any OS and builds all cibuildwheel --platform linux --archs x86_64 # 64-bit x86, the most common on desktop and servers cibuildwheel --platform linux --archs aarch64 # 64-bit Arm for mobile devices, Apple M-series, and AWS Graviton cibuildwheel --platform linux --archs i686 # 32-bit Linux -cibuildwheel --platform macos # works only on MacOS +cibuildwheel --platform macos # works only on macOS cibuildwheel --platform windows # works only on Windows ``` @@ -194,7 +194,7 @@ You may need root privileges for multi-architecture builds: sudo $(which cibuildwheel) --platform linux ``` -On Windows and MacOS, to avoid frequent path resolution issues, you may want to use: +On Windows and macOS, to avoid frequent path resolution issues, you may want to use: ```sh python -m cibuildwheel --platform windows @@ -209,6 +209,13 @@ cargo bench open target/criterion/report/index.html ``` +To automatically detect the Minimum Supported Rust Version (MSRV): + +```sh +cargo +stable install cargo-msrv +cargo msrv find --ignore-lockfile +``` + ## JavaScript ### NodeJS @@ -217,7 +224,7 @@ If you don't have the environment configured, here are the [installation options ```sh wget -qO- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # Linux -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # MacOS +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash # macOS ``` Install dependencies: @@ -242,7 +249,7 @@ If you don't have the environment configured, here are [installation options](ht ```sh wget -qO- https://deno.land/x/install/install.sh | sh # Linux -curl -fsSL https://deno.land/install.sh | sh # MacOS +curl -fsSL https://deno.land/install.sh | sh # macOS irm https://deno.land/install.ps1 | iex # Windows ``` diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 972b8fb9..00000000 --- a/Cargo.lock +++ /dev/null @@ -1,713 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" -dependencies = [ - "memchr", -] - -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "bitflags" -version = "2.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" - -[[package]] -name = "bumpalo" -version = "3.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.2.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" -dependencies = [ - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" - -[[package]] -name = "criterion" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3bf7af66b0989381bd0be551bd7cc91912a655a58c6918420c9527b1fd8b4679" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools 0.13.0", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "getrandom" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasi", -] - -[[package]] -name = "half" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" -dependencies = [ - "cfg-if", - "crunchy", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - -[[package]] -name = "js-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "libc" -version = "0.2.174" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" - -[[package]] -name = "log" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" - -[[package]] -name = "memchr" -version = "2.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "rand" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" -dependencies = [ - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rayon" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "regex" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" - -[[package]] -name = "rustversion" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" - -[[package]] -name = "ryu" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "serde" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.219" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.140" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "simsimd" -version = "6.5.1" -dependencies = [ - "cc", - "criterion", - "half", - "rand", -] - -[[package]] -name = "syn" -version = "2.0.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "unicode-ident" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.14.2+wasi-0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" -dependencies = [ - "wit-bindgen-rt", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "wit-bindgen-rt" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" -dependencies = [ - "bitflags", -] - -[[package]] -name = "zerocopy" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] diff --git a/Cargo.toml b/Cargo.toml index 3a845b80..a73171b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,8 @@ categories = [ "wasm", "external-ffi-bindings", ] -include = ["/rust/**", "/c/**", "/include/**", "/build.rs"] +rust-version = "1.64" # Introduced Core C FFI in stable Rust +include = ["rust/**", "c/**", "include/**", "build.rs"] [lib] @@ -24,7 +25,7 @@ name = "simsimd" path = "rust/lib.rs" [build-dependencies] -cc = "1.0.83" +cc = "1.2.36" [[bench]] @@ -48,6 +49,6 @@ default = [] std = [] [dev-dependencies] -criterion = { version = "0.6.0" } +criterion = { version = "0.7.0" } rand = { version = "0.9.1" } half = { version = "2.6.0" } diff --git a/README.md b/README.md index 00290825..d399d555 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ![SimSIMD banner](https://github.com/ashvardanian/ashvardanian/blob/master/repositories/SimSIMD.jpg?raw=true) -Computing dot-products, similarity measures, and distances between low- and high-dimensional vectors is ubiquitous in Machine Learning, Scientific Computing, Geo-Spatial Analysis, and Information Retrieval. +Computing dot-products, similarity measures, and distances between low- and high-dimensional vectors is ubiquitous in Machine Learning, Scientific Computing, Geospatial Analysis, and Information Retrieval. These algorithms generally have linear complexity in time, constant or linear complexity in space, and are data-parallel. In other words, it is easily parallelizable and vectorizable and often available in packages like BLAS (level 1) and LAPACK, as well as higher-level `numpy` and `scipy` Python libraries. Ironically, even with decades of evolution in compilers and numerical computing, [most libraries can be 3-200x slower than hardware potential][benchmarks] even on the most popular hardware, like 64-bit x86 and Arm CPUs. @@ -32,7 +32,7 @@ SimSIMD provides an alternative. GitHub Actions Windows - GitHub Actions MacOS + GitHub Actions macOS GitHub Actions CentOS Linux @@ -43,7 +43,7 @@ SimSIMD provides an alternative. ## Features __SimSIMD__ (Arabic: "سيمسيم دي") is a mixed-precision math library of __over 350 SIMD-optimized kernels__ extensively used in AI, Search, and DBMS workloads. -Named after the iconic ["Open Sesame"](https://en.wikipedia.org/wiki/Open_sesame) command that opened doors to treasure in _Ali Baba and the Forty Thieves_, SimSimd can help you 10x the cost-efficiency of your computational pipelines. +Named after the iconic ["Open Sesame"](https://en.wikipedia.org/wiki/Open_sesame) command that opened doors to treasure in _Ali Baba and the Forty Thieves_, SimSIMD can help you 10x the cost-efficiency of your computational pipelines. Implemented distance functions include: - Euclidean (L2) and Cosine (Angular) spatial distances for Vector Search. _[docs][docs-spatial]_ @@ -85,7 +85,7 @@ You can learn more about the technical implementation details in the following b - [Uses Horner's method for polynomial approximations, beating GCC 12 by 119x](https://ashvardanian.com/posts/gcc-12-vs-avx512fp16/). - [Uses Arm SVE and x86 AVX-512's masked loads to eliminate tail `for`-loops](https://ashvardanian.com/posts/simsimd-faster-scipy/#tails-of-the-past-the-significance-of-masked-loads). -- [Substitutes LibC's `sqrt` with Newton Raphson iterations](https://github.com/ashvardanian/SimSIMD/releases/tag/v5.4.0). +- [Substitutes libc's `sqrt` with Newton Raphson iterations](https://github.com/ashvardanian/SimSIMD/releases/tag/v5.4.0). - [Uses Galloping and SVE2 histograms to intersect sparse vectors](https://ashvardanian.com/posts/simd-set-intersections-sve2-avx512/). - For Python: [avoids slow PyBind11, SWIG, & `PyArg_ParseTuple`](https://ashvardanian.com/posts/pybind11-cpython-tutorial/) [using faster calling convention](https://ashvardanian.com/posts/discount-on-keyword-arguments-in-python/). - For JavaScript: [uses typed arrays and NAPI for zero-copy calls](https://ashvardanian.com/posts/javascript-ai-vector-search/). @@ -625,13 +625,13 @@ fn main() { let vector_a: Vec = vec![1.0, 2.0, 3.0]; let vector_b: Vec = vec![4.0, 5.0, 6.0]; - // Compute the cosine similarity between vector_a and vector_b - let cosine_similarity = f32::cosine(&vector_a, &vector_b) + // Compute the cosine distance between vectors + let cosine_distance = f32::cosine(&vector_a, &vector_b) .expect("Vectors must be of the same length"); - println!("Cosine Similarity: {}", cosine_similarity); + println!("Cosine Distance: {}", cosine_distance); - // Compute the squared Euclidean distance between vector_a and vector_b + // Compute the squared Euclidean distance between vectors let sq_euclidean_distance = f32::sqeuclidean(&vector_a, &vector_b) .expect("Vectors must be of the same length"); @@ -648,16 +648,17 @@ use simsimd::SpatialSimilarity; use simsimd::ComplexProducts; fn main() { + // Complex vectors have interleaved real & imaginary components let vector_a: Vec = vec![1.0, 2.0, 3.0, 4.0]; let vector_b: Vec = vec![5.0, 6.0, 7.0, 8.0]; - // Compute the inner product between vector_a and vector_b + // Compute the inner product between vectors let inner_product = SpatialSimilarity::dot(&vector_a, &vector_b) .expect("Vectors must be of the same length"); println!("Inner Product: {}", inner_product); - // Compute the complex inner product between complex_vector_a and complex_vector_b + // Compute the complex inner product between vectors let complex_inner_product = ComplexProducts::dot(&vector_a, &vector_b) .expect("Vectors must be of the same length"); @@ -705,13 +706,13 @@ fn main() { let vector_a = &[0b11110000, 0b00001111, 0b10101010]; let vector_b = &[0b11110000, 0b00001111, 0b01010101]; - // Compute the Hamming distance between vector_a and vector_b + // Compute the Hamming distance between vectors let hamming_distance = u8::hamming(&vector_a, &vector_b) .expect("Vectors must be of the same length"); println!("Hamming Distance: {}", hamming_distance); - // Compute the Jaccard distance between vector_a and vector_b + // Compute the Jaccard distance between vectors let jaccard_distance = u8::jaccard(&vector_a, &vector_b) .expect("Vectors must be of the same length"); @@ -734,11 +735,11 @@ fn main() { let vector_a: Vec = vec![1.0, 2.0, 3.0].iter().map(|&x| f16::from_f32(x)).collect(); let vector_b: Vec = vec![4.0, 5.0, 6.0].iter().map(|&x| f16::from_f32(x)).collect(); - // Compute the cosine similarity - let cosine_similarity = f16::cosine(&vector_a, &vector_b) + // Compute the cosine distance + let cosine_distance = f16::cosine(&vector_a, &vector_b) .expect("Vectors must be of the same length"); - println!("Cosine Similarity: {}", cosine_similarity); + println!("Cosine Distance: {}", cosine_distance); // Direct bit manipulation let half = f16::from_f32(3.14159); @@ -764,10 +765,10 @@ fn main() { let buffer_a: &[SimF16] = unsafe { std::slice::from_raw_parts(vector_a.as_ptr() as *const SimF16, vector_a.len()) }; let buffer_b: &[SimF16] = unsafe { std::slice::from_raw_parts(vector_b.as_ptr() as *const SimF16, vector_b.len()) }; - let cosine_similarity = SimF16::cosine(buffer_a, buffer_b) + let cosine_distance = SimF16::cosine(buffer_a, buffer_b) .expect("Vectors must be of the same length"); - println!("Cosine Similarity: {}", cosine_similarity); + println!("Cosine Distance: {}", cosine_distance); } ``` @@ -787,10 +788,10 @@ fn main() { let vector_b: Vec = vec![4.0, 5.0, 6.0].iter().map(|&x| bf16::from_f32(x)).collect(); // Compute the cosine similarity - let cosine_similarity = bf16::cosine(&vector_a, &vector_b) + let cosine_distance = bf16::cosine(&vector_a, &vector_b) .expect("Vectors must be of the same length"); - println!("Cosine Similarity: {}", cosine_similarity); + println!("Cosine Distance: {}", cosine_distance); // Direct bit manipulation let brain_half = bf16::from_f32(3.14159); @@ -899,8 +900,8 @@ import SimSIMD let vectorA: [Int8] = [1, 2, 3] let vectorB: [Int8] = [4, 5, 6] -let cosineSimilarity = vectorA.cosine(vectorB) // Computes the cosine similarity let dotProduct = vectorA.dot(vectorB) // Computes the dot product +let cosineDistance = vectorA.cosine(vectorB) // Computes the cosine distance let sqEuclidean = vectorA.sqeuclidean(vectorB) // Computes the squared Euclidean distance ``` diff --git a/build.rs b/build.rs index 82715c15..d513690e 100644 --- a/build.rs +++ b/build.rs @@ -2,14 +2,15 @@ fn main() -> Result<(), cc::Error> { let mut build = cc::Build::new(); build + // Prefer portable flags to support MSVC and older toolchains + .std("c99") // Enforce C99 standard when supported .file("c/lib.c") .include("include") .define("SIMSIMD_NATIVE_F16", "0") .define("SIMSIMD_NATIVE_BF16", "0") .define("SIMSIMD_DYNAMIC_DISPATCH", "1") - .flag("-O3") - .flag("-std=c99") // Enforce C99 standard - .flag("-pedantic") // Ensure strict compliance with the C standard + .opt_level(3) + .flag_if_supported("-pedantic") // Strict compliance when supported .warnings(false); if let Err(e) = build.try_compile("simsimd") { diff --git a/c/lib.c b/c/lib.c index 2fdff43e..b5b89dee 100644 --- a/c/lib.c +++ b/c/lib.c @@ -18,7 +18,7 @@ * * - Linux: everything is available in GCC 12+ and Clang 16+. * - Windows - MSVC: everything except Sapphire Rapids and ARM SVE. - * - MacOS - Apple Clang: only Arm NEON and x86 AVX2 Haswell extensions are available. + * - macOS - Apple Clang: only Arm NEON and x86 AVX2 Haswell extensions are available. */ #if !defined(SIMSIMD_TARGET_NEON) && (defined(__APPLE__) || defined(__linux__)) #define SIMSIMD_TARGET_NEON 1 diff --git a/golang/bench_test.go b/golang/bench_test.go index 7874e347..d3ac13fd 100644 --- a/golang/bench_test.go +++ b/golang/bench_test.go @@ -6,7 +6,7 @@ import ( "testing" ) -func cosineSimilarity(a, b []float32) float32 { +func cosineDistance(a, b []float32) float32 { var dotProduct float32 var normA, normB float32 for i := range a { @@ -14,7 +14,8 @@ func cosineSimilarity(a, b []float32) float32 { normA += a[i] * a[i] normB += b[i] * b[i] } - return dotProduct / (float32(math.Sqrt(float64(normA))) * float32(math.Sqrt(float64(normB)))) + sim := dotProduct / (float32(math.Sqrt(float64(normA))) * float32(math.Sqrt(float64(normB)))) + return 1 - sim } func generateRandomVector(dim int) []float32 { @@ -25,16 +26,16 @@ func generateRandomVector(dim int) []float32 { return vec } -func BenchmarkCosineSimilarityNative(b *testing.B) { - first, second := generateRandomVector(1536), generateRandomVector(1536) - for i := 0; i < b.N; i++ { - cosineSimilarity(first, second) - } +func BenchmarkCosineDistanceNative(b *testing.B) { + first, second := generateRandomVector(1536), generateRandomVector(1536) + for i := 0; i < b.N; i++ { + cosineDistance(first, second) + } } -func BenchmarkCosineSimilaritySIMD(b *testing.B) { - first, second := generateRandomVector(1536), generateRandomVector(1536) - for i := 0; i < b.N; i++ { - CosineF32(first, second) - } -} \ No newline at end of file +func BenchmarkCosineDistanceSIMD(b *testing.B) { + first, second := generateRandomVector(1536), generateRandomVector(1536) + for i := 0; i < b.N; i++ { + CosineF32(first, second) + } +} diff --git a/golang/simsimd.go b/golang/simsimd.go index 6b54ff97..1d3b5a5f 100644 --- a/golang/simsimd.go +++ b/golang/simsimd.go @@ -53,7 +53,7 @@ func InnerF32(a, b []float32) float32 { return float32(C.inner_f32((*C.simsimd_f32_t)(&a[0]), (*C.simsimd_f32_t)(&b[0]), C.simsimd_size_t(len(a)))) } -// SqEuclideanI8 computes the squared euclidean similarity between two i8 vectors using the most suitable SIMD instruction set available. +// SqEuclideanI8 computes the squared Euclidean distance between two i8 vectors using the most suitable SIMD instruction set available. func SqEuclideanI8(a, b []int8) float32 { if len(a) != len(b) { panic("both vectors must have the same length") @@ -62,7 +62,7 @@ func SqEuclideanI8(a, b []int8) float32 { return float32(C.sqeuclidean_i8((*C.simsimd_i8_t)(&a[0]), (*C.simsimd_i8_t)(&b[0]), C.simsimd_size_t(len(a)))) } -// SqEuclideanF32 computes the squared euclidean similarity between two f32 vectors using the most suitable SIMD instruction set available. +// SqEuclideanF32 computes the squared Euclidean distance between two f32 vectors using the most suitable SIMD instruction set available. func SqEuclideanF32(a, b []float32) float32 { if len(a) != len(b) { panic("both vectors must have the same length") diff --git a/golang/unit_test.go b/golang/unit_test.go index 57603217..b54fefa1 100644 --- a/golang/unit_test.go +++ b/golang/unit_test.go @@ -10,7 +10,7 @@ func TestCosineI8(t *testing.T) { b := []int8{0, 1} result := CosineI8(a, b) - expected := float32(1.0) // Cosine similarity of orthogonal vectors is 0 + expected := float32(1.0) // Cosine distance of orthogonal vectors is 1 if math.Abs(float64(result-expected)) > 1e-3 { t.Errorf("Expected %v, got %v", expected, result) } @@ -21,7 +21,7 @@ func TestCosineF32(t *testing.T) { b := []float32{0, 1} result := CosineF32(a, b) - expected := float32(1.0) // Cosine similarity of orthogonal vectors is 0 + expected := float32(1.0) // Cosine distance of orthogonal vectors is 1 if math.Abs(float64(result-expected)) > 1e-3 { t.Errorf("Expected %v, got %v", expected, result) } diff --git a/include/simsimd/binary.h b/include/simsimd/binary.h index 5c320cd1..0301a85b 100644 --- a/include/simsimd/binary.h +++ b/include/simsimd/binary.h @@ -6,7 +6,7 @@ * * Contains: * - Hamming distance - * - Jaccard similarity (Tanimoto coefficient) + * - Jaccard distance (Tanimoto coefficient) * * For hardware architectures: * - Arm: NEON, SVE @@ -19,7 +19,7 @@ * - Lookup tables, mostly using nibbles (4-bit lookups) * - Harley-Seal population counts: https://arxiv.org/pdf/1611.07612 * - * On binary vectors, when computing Jaccard similarity we can clearly see how the CPU struggles + * On binary vectors, when computing Jaccard distance we can clearly see how the CPU struggles * to compute that many population counts. There are several instructions we should keep in mind * for future optimizations: * diff --git a/include/simsimd/geospatial.h b/include/simsimd/geospatial.h index 91f2d6f9..88a47bef 100644 --- a/include/simsimd/geospatial.h +++ b/include/simsimd/geospatial.h @@ -1,6 +1,6 @@ /** * @file geospatial.h - * @brief SIMD-accelerated Geo-Spatial distance functions. + * @brief SIMD-accelerated Geospatial distance functions. * @author Ash Vardanian * @date July 1, 2023 * diff --git a/include/simsimd/mesh.h b/include/simsimd/mesh.h index 2d3c16c2..2ec098c6 100644 --- a/include/simsimd/mesh.h +++ b/include/simsimd/mesh.h @@ -15,7 +15,7 @@ * - 16-bit brain-floating point * * For hardware architectures: - * - Arm: Neon + * - Arm: NEON * - x86: Genoa, Sapphire * * x86 intrinsics: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/ diff --git a/include/simsimd/simsimd.h b/include/simsimd/simsimd.h index 028a3702..a7933a39 100644 --- a/include/simsimd/simsimd.h +++ b/include/simsimd/simsimd.h @@ -26,7 +26,7 @@ * * Intel Palm Cove was an irrelevant intermediate release extending Skylake with IFMA and VBMI. * Intel Willow Cove was an irrelevant intermediate release extending Sunny Cove with VP2INTERSECT, - * that aren't supported by any other CPU built to date... and those are only available in Tiger Lake laptops. + * which are not supported by other CPUs to date and are only available in Tiger Lake laptops. * Intel Cooper Lake was the only intermediary platform, that supported BF16, but not FP16. * It's mostly used in 4-socket and 8-socket high-memory configurations. * @@ -80,7 +80,7 @@ * The N2 core is very similar to V2 and is used by Microsoft @b Cobalt. * https://developer.arm.com/Processors/Neoverse%20N2 * - * On Consumer side, Apple is the biggest player with mobile @b A chips and desktop @b M chips. + * On the consumer side, Apple is the biggest player with mobile @b A chips and desktop @b M chips. * The M1 implements Armv8.5-A, both M2 and M3 implement Armv8.6-A, and M4 is expected to have Armv9.1-A. */ diff --git a/include/simsimd/types.h b/include/simsimd/types.h index 061a4ee1..3dbab565 100644 --- a/include/simsimd/types.h +++ b/include/simsimd/types.h @@ -12,7 +12,7 @@ #ifndef SIMSIMD_TYPES_H #define SIMSIMD_TYPES_H -// Inferring target OS: Windows, MacOS, or Linux +// Inferring target OS: Windows, macOS, or Linux #if defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__NT__) #define _SIMSIMD_DEFINED_WINDOWS 1 #elif defined(__APPLE__) && defined(__MACH__) @@ -28,7 +28,7 @@ // - `SIMSIMD_DYNAMIC` is used for functions that are part of the public API, but are dispatched at runtime. // // On GCC we mark the functions as `nonnull` informing that none of the arguments can be `NULL`. -// Marking with `pure` and `const` isn't possible as outputing to a pointer is a "side effect". +// Marking with `pure` and `const` isn't possible as outputting to a pointer is a "side effect". #if defined(_WIN32) || defined(__CYGWIN__) #define SIMSIMD_DYNAMIC __declspec(dllexport) #define SIMSIMD_PUBLIC inline static @@ -259,6 +259,7 @@ #define SIMSIMD_LOG(x) (log(x)) #endif +// Copy 16 bits (2 bytes) from source to destination #if defined(__GNUC__) || defined(__clang__) #define SIMSIMD_COPY16(destination_ptr, source_ptr) __builtin_memcpy((destination_ptr), (source_ptr), 2) #else @@ -395,7 +396,7 @@ typedef unsigned short simsimd_bf16_t; /* * Let's make sure the sizes of the types are as expected. - * In C the `_Static_assert` is only available with C 11 and later. + * In C the `_Static_assert` is only available with C11 and later. */ #define SIMSIMD_STATIC_ASSERT(cond, msg) typedef char static_assertion_##msg[(cond) ? 1 : -1] SIMSIMD_STATIC_ASSERT(sizeof(simsimd_b8_t) == 1, simsimd_b8_t_must_be_1_byte); diff --git a/javascript/fallback.ts b/javascript/fallback.ts index d15a21a1..e4b4f6a7 100644 --- a/javascript/fallback.ts +++ b/javascript/fallback.ts @@ -1,8 +1,8 @@ /** - * @brief Computes the inner distance of two vectors (same as dot product). + * @brief Computes the inner product of two vectors (dot product). * @param {Float64Array|Float32Array} a - The first vector. * @param {Float64Array|Float32Array} b - The second vector. - * @returns {number} The inner distance of vectors a and b. + * @returns {number} The inner product of vectors a and b. */ export function inner(a: Float64Array | Float32Array, b: Float64Array | Float32Array): number { if (a.length !== b.length) { @@ -17,10 +17,10 @@ export function inner(a: Float64Array | Float32Array, b: Float64Array | Float32A } /** - * @brief Computes the inner distance of two vectors (same as inner product). + * @brief Computes the dot product of two vectors (same as inner product). * @param {Float64Array|Float32Array} a - The first vector. * @param {Float64Array|Float32Array} b - The second vector. - * @returns {number} The inner distance of vectors a and b. + * @returns {number} The dot product of vectors a and b. */ export function dot(a: Float64Array | Float32Array, b: Float64Array | Float32Array): number { return inner(a, b); @@ -51,7 +51,7 @@ export function sqeuclidean( * @brief Computes the L2 Euclidean distance between two vectors. * @param {Float64Array|Float32Array|Int8Array | Uint8Array} a - The first vector. * @param {Float64Array|Float32Array|Int8Array | Uint8Array} b - The second vector. - * @returns {number} The L2 euclidean distance between vectors a and b. + * @returns {number} The L2 Euclidean distance between vectors a and b. */ export function euclidean( a: Float64Array | Float32Array | Int8Array | Uint8Array, @@ -96,12 +96,8 @@ export function cosine( magnitudeA = Math.sqrt(magnitudeA); magnitudeB = Math.sqrt(magnitudeB); - if (magnitudeA === 0 || magnitudeB === 0) { - console.warn( - "Warning: One of the magnitudes is zero. Cosine similarity is undefined." - ); - return 0; - } + if (magnitudeA === 0 && magnitudeB === 0) return 0; // distance when both zero + if (magnitudeA === 0 || magnitudeB === 0) return 1; // distance when one is zero return 1 - dotProduct / (magnitudeA * magnitudeB); } @@ -133,10 +129,10 @@ export const hamming = (a: Uint8Array, b: Uint8Array): number => { }; /** - * @brief Computes the bitwise Jaccard similarity coefficient between two vectors. + * @brief Computes the bitwise Jaccard distance between two vectors. * @param {Uint8Array} a - The first vector. * @param {Uint8Array} b - The second vector. - * @returns {number} The Jaccard similarity coefficient between vectors a and b. + * @returns {number} The Jaccard distance between vectors a and b. */ export const jaccard = (a: Uint8Array, b: Uint8Array): number => { if (a.length !== b.length) { @@ -174,7 +170,7 @@ export const jaccard = (a: Uint8Array, b: Uint8Array): number => { * @brief Computes the Kullback-Leibler divergence between two probability distributions. * @param {Float64Array|Float32Array} a - The first vector. * @param {Float64Array|Float32Array} b - The second vector. - * @returns {number} The Jaccard similarity coefficient between vectors a and b. + * @returns {number} The Kullback-Leibler divergence between vectors a and b. */ export const kullbackleibler = (a: Float64Array | Float32Array, b: Float64Array | Float32Array): number => { if (a.length !== b.length) { diff --git a/javascript/lib.c b/javascript/lib.c index 9ef7777c..e7a4387a 100644 --- a/javascript/lib.c +++ b/javascript/lib.c @@ -10,7 +10,7 @@ #include // `napi_*` functions #include // `simsimd_*` functions -/// @brief Global variable that caches the CPU capabilities, and is computed just onc, when the module is loaded. +/// @brief Global variable that caches the CPU capabilities, and is computed just once, when the module is loaded. simsimd_capability_t static_capabilities = simsimd_cap_serial_k; napi_value dense(napi_env env, napi_callback_info info, simsimd_metric_kind_t metric_kind, diff --git a/javascript/simsimd.ts b/javascript/simsimd.ts index 4fbf774d..c04a5e24 100644 --- a/javascript/simsimd.ts +++ b/javascript/simsimd.ts @@ -12,7 +12,7 @@ try { } catch (e) { compiled = fallback; console.warn( - "It seems like your environment does't support the native simsimd module, so we are providing a JS fallback." + "It seems like your environment doesn't support the native simsimd module, so we are providing a JS fallback." ); } @@ -92,10 +92,10 @@ export const hamming = (a: Uint8Array, b: Uint8Array): number => { }; /** - * @brief Computes the bitwise Jaccard similarity coefficient between two vectors. + * @brief Computes the bitwise Jaccard distance between two vectors. * @param {Uint8Array} a - The first vector. * @param {Uint8Array} b - The second vector. - * @returns {number} The Jaccard similarity coefficient between vectors a and b. + * @returns {number} The Jaccard distance between vectors a and b. */ export const jaccard = (a: Uint8Array, b: Uint8Array): number => { return compiled.jaccard(a, b); @@ -123,7 +123,7 @@ export const jensenshannon = (a: Float64Array | Float32Array, b: Float64Array | /** * Quantizes a floating-point vector into a binary vector (1 for positive values, 0 for non-positive values) and packs the result into a Uint8Array, where each element represents 8 binary values from the original vector. - * This function is useful for preparing data for bitwise distance or similarity computations, such as Hamming or Jaccard indices. + * This function is useful for preparing data for bitwise distance computations, such as Hamming or Jaccard indices. * * @param {Float32Array | Float64Array | Int8Array} vector The floating-point vector to be quantized and packed. * @returns {Uint8Array} A Uint8Array where each byte represents 8 binary quantized values from the input vector. diff --git a/pyproject.toml b/pyproject.toml index fdfce3ec..aef40a42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ test-command = """ # We need to build for all platforms: # - on Linux: x86_64, aarch64, i686 -# - on MacOS: x86_64, arm64 +# - on macOS: x86_64, arm64 # - on Windows: AMD64, ARM64 # https://cibuildwheel.readthedocs.io/en/stable/options/#archs # diff --git a/python/lib.c b/python/lib.c index bd001ffd..8f2264e8 100644 --- a/python/lib.c +++ b/python/lib.c @@ -133,7 +133,7 @@ static PyTypeObject DistancesTensorType = { .tp_as_buffer = &DistancesTensor_as_buffer, }; -/// @brief Global variable that caches the CPU capabilities, and is computed just onc, when the module is loaded. +/// @brief Global variable that caches the CPU capabilities, and is computed just once, when the module is loaded. simsimd_capability_t static_capabilities = simsimd_cap_serial_k; /// @brief Helper method to check for string equality. diff --git a/rust/lib.rs b/rust/lib.rs index f95b1cfc..69e268e5 100644 --- a/rust/lib.rs +++ b/rust/lib.rs @@ -7,7 +7,7 @@ //! //! ## Implemented distance functions include: //! -//! * Euclidean (L2), Inner Distance, and Cosine (Angular) spatial distances. +//! * Euclidean (L2), inner product, and cosine (angular) spatial distances. //! * Hamming (~ Manhattan) and Jaccard (~ Tanimoto) binary distances. //! * Kullback-Leibler and Jensen-Shannon divergences for probability distributions. //! @@ -19,8 +19,8 @@ //! let a = &[1, 2, 3]; //! let b = &[4, 5, 6]; //! -//! // Compute cosine similarity -//! let cos_sim = i8::cos(a, b); +//! // Compute cosine distance +//! let cos_dist = i8::cos(a, b); //! //! // Compute dot product distance //! let dot_product = i8::dot(a, b); @@ -40,12 +40,12 @@ //! // Work with half-precision floats //! let half_a: Vec = vec![1.0, 2.0, 3.0].iter().map(|&x| f16::from_f32(x)).collect(); //! let half_b: Vec = vec![4.0, 5.0, 6.0].iter().map(|&x| f16::from_f32(x)).collect(); -//! let half_cos = f16::cos(&half_a, &half_b); +//! let half_cos_dist = f16::cos(&half_a, &half_b); //! //! // Work with brain floats //! let brain_a: Vec = vec![1.0, 2.0, 3.0].iter().map(|&x| bf16::from_f32(x)).collect(); //! let brain_b: Vec = vec![4.0, 5.0, 6.0].iter().map(|&x| bf16::from_f32(x)).collect(); -//! let brain_cos = bf16::cos(&brain_a, &brain_b); +//! let brain_cos_dist = bf16::cos(&brain_a, &brain_b); //! //! // Direct bit manipulation //! let half = f16::from_f32(3.14); @@ -57,14 +57,14 @@ //! //! The `SpatialSimilarity` trait covers following methods: //! -//! - `cosine(a: &[Self], b: &[Self]) -> Option`: Computes cosine similarity between two slices. +//! - `cosine(a: &[Self], b: &[Self]) -> Option`: Computes cosine distance (1 - similarity) between two slices. //! - `dot(a: &[Self], b: &[Self]) -> Option`: Computes dot product distance between two slices. //! - `sqeuclidean(a: &[Self], b: &[Self]) -> Option`: Computes squared Euclidean distance between two slices. //! //! The `BinarySimilarity` trait covers following methods: //! //! - `hamming(a: &[Self], b: &[Self]) -> Option`: Computes Hamming distance between two slices. -//! - `jaccard(a: &[Self], b: &[Self]) -> Option`: Computes Jaccard index between two slices. +//! - `jaccard(a: &[Self], b: &[Self]) -> Option`: Computes Jaccard distance between two slices. //! //! The `ProbabilitySimilarity` trait covers following methods: //! @@ -77,6 +77,12 @@ pub type Distance = f64; pub type ComplexProduct = (f64, f64); +/// Compatibility function for pre 1.85 Rust versions lacking `f32::abs`. +#[inline(always)] +fn f32_abs_compat(x: f32) -> f32 { + f32::from_bits(x.to_bits() & 0x7FFF_FFFF) +} + #[link(name = "simsimd")] extern "C" { @@ -194,10 +200,10 @@ pub struct f16(pub u16); impl f16 { /// Positive zero. pub const ZERO: Self = f16(0); - + /// Positive one. pub const ONE: Self = f16(0x3C00); - + /// Negative one. pub const NEG_ONE: Self = f16(0xBC00); @@ -251,11 +257,11 @@ impl f16 { /// Returns the absolute value of self. #[inline(always)] pub fn abs(self) -> Self { - Self::from_f32(self.to_f32().abs()) + Self::from_f32(f32_abs_compat(self.to_f32())) } /// Returns the largest integer less than or equal to a number. - /// + /// /// This method is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline(always)] @@ -264,7 +270,7 @@ impl f16 { } /// Returns the smallest integer greater than or equal to a number. - /// + /// /// This method is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline(always)] @@ -273,7 +279,7 @@ impl f16 { } /// Returns the nearest integer to a number. Round half-way cases away from 0.0. - /// + /// /// This method is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline(always)] @@ -368,10 +374,10 @@ pub struct bf16(pub u16); impl bf16 { /// Positive zero. pub const ZERO: Self = bf16(0); - + /// Positive one. pub const ONE: Self = bf16(0x3F80); - + /// Negative one. pub const NEG_ONE: Self = bf16(0xBF80); @@ -425,11 +431,11 @@ impl bf16 { /// Returns the absolute value of self. #[inline(always)] pub fn abs(self) -> Self { - Self::from_f32(self.to_f32().abs()) + Self::from_f32(f32_abs_compat(self.to_f32())) } /// Returns the largest integer less than or equal to a number. - /// + /// /// This method is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline(always)] @@ -438,7 +444,7 @@ impl bf16 { } /// Returns the smallest integer greater than or equal to a number. - /// + /// /// This method is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline(always)] @@ -447,7 +453,7 @@ impl bf16 { } /// Returns the nearest integer to a number. Round half-way cases away from 0.0. - /// + /// /// This method is only available when the `std` feature is enabled. #[cfg(feature = "std")] #[inline(always)] @@ -596,8 +602,8 @@ pub mod capabilities { /// /// # Returns /// - /// Returns `true` if dynamic dispatch is enabled, `false` otherwise. - /// Currently always returns `false` as dynamic dispatch is not implemented. + /// Returns `true` when the C backend is compiled with dynamic dispatch + /// (default for this crate via `build.rs`), otherwise `false`. pub fn uses_dynamic_dispatch() -> bool { unsafe { crate::simsimd_uses_dynamic_dispatch() != 0 } } @@ -605,19 +611,21 @@ pub mod capabilities { /// `SpatialSimilarity` provides a set of trait methods for computing similarity /// or distance between spatial data vectors in SIMD (Single Instruction, Multiple Data) context. -/// These methods can be used to calculate metrics like cosine similarity, dot product, +/// These methods can be used to calculate metrics like cosine distance, dot product, /// and squared Euclidean distance between two slices of data. /// /// Each method takes two slices of data (a and b) and returns an Option. /// The result is `None` if the slices are not of the same length, as these operations /// require one-to-one correspondence between the elements of the slices. -/// Otherwise, it returns the computed similarity or distance as `Some(f32)`. +/// Otherwise, it returns the computed similarity or distance as `Some(f64)`. +/// Convenience methods like `cosine`/`sqeuclidean` delegate to the core methods +/// `cos`/`l2sq` implemented by this trait. pub trait SpatialSimilarity where Self: Sized, { - /// Computes the cosine similarity between two slices. - /// The cosine similarity is a measure of similarity between two non-zero vectors + /// Computes the cosine distance between two slices. + /// The cosine distance is 1 minus the cosine similarity between two non-zero vectors /// of an dot product space that measures the cosine of the angle between them. fn cos(a: &[Self], b: &[Self]) -> Option; @@ -659,8 +667,8 @@ where SpatialSimilarity::dot(a, b) } - /// Computes the cosine similarity between two slices. - /// The cosine similarity is a measure of similarity between two non-zero vectors + /// Computes the cosine distance between two slices. + /// The cosine distance is 1 minus the cosine similarity between two non-zero vectors /// of an dot product space that measures the cosine of the angle between them. fn cosine(a: &[Self], b: &[Self]) -> Option { SpatialSimilarity::cos(a, b) @@ -1228,7 +1236,7 @@ mod tests { use half::f16 as HalfF16; #[test] - fn test_hardware_features_detection() { + fn hardware_features_detection() { let uses_arm = capabilities::uses_neon() || capabilities::uses_sve(); let uses_x86 = capabilities::uses_haswell() || capabilities::uses_skylake() @@ -1265,7 +1273,7 @@ mod tests { } #[test] - fn test_cos_i8() { + fn cos_i8() { let a = &[3, 97, 127]; let b = &[3, 97, 127]; @@ -1276,7 +1284,7 @@ mod tests { } #[test] - fn test_cos_f32() { + fn cos_f32() { let a = &[1.0, 2.0, 3.0]; let b = &[4.0, 5.0, 6.0]; @@ -1287,7 +1295,7 @@ mod tests { } #[test] - fn test_dot_i8() { + fn dot_i8() { let a = &[1, 2, 3]; let b = &[4, 5, 6]; @@ -1298,7 +1306,7 @@ mod tests { } #[test] - fn test_dot_f32() { + fn dot_f32() { let a = &[1.0, 2.0, 3.0]; let b = &[4.0, 5.0, 6.0]; @@ -1309,7 +1317,7 @@ mod tests { } #[test] - fn test_dot_f32_complex() { + fn dot_f32_complex() { // Let's consider these as complex numbers where every pair is (real, imaginary) let a: &[f32; 4] = &[1.0, 2.0, 3.0, 4.0]; // Represents two complex numbers: 1+2i, 3+4i let b: &[f32; 4] = &[5.0, 6.0, 7.0, 8.0]; // Represents two complex numbers: 5+6i, 7+8i @@ -1326,7 +1334,7 @@ mod tests { } #[test] - fn test_vdot_f32_complex() { + fn vdot_f32_complex() { // Here we're assuming a similar setup to the previous test, but for the Hermitian (conjugate) dot product let a: &[f32; 4] = &[1.0, 2.0, 3.0, 4.0]; // Represents two complex numbers: 1+2i, 3+4i let b: &[f32; 4] = &[5.0, 6.0, 7.0, 8.0]; // Represents two complex numbers: 5+6i, 7+8i @@ -1343,7 +1351,7 @@ mod tests { } #[test] - fn test_l2sq_i8() { + fn l2sq_i8() { let a = &[1, 2, 3]; let b = &[4, 5, 6]; @@ -1354,7 +1362,7 @@ mod tests { } #[test] - fn test_l2sq_f32() { + fn l2sq_f32() { let a = &[1.0, 2.0, 3.0]; let b = &[4.0, 5.0, 6.0]; @@ -1365,7 +1373,7 @@ mod tests { } #[test] - fn test_l2_f32() { + fn l2_f32() { let a: &[f32; 3] = &[1.0, 2.0, 3.0]; let b: &[f32; 3] = &[4.0, 5.0, 6.0]; if let Some(result) = SpatialSimilarity::euclidean(a, b) { @@ -1375,7 +1383,7 @@ mod tests { } #[test] - fn test_l2_f64() { + fn l2_f64() { let a: &[f64; 3] = &[1.0, 2.0, 3.0]; let b: &[f64; 3] = &[4.0, 5.0, 6.0]; if let Some(result) = SpatialSimilarity::euclidean(a, b) { @@ -1385,7 +1393,7 @@ mod tests { } #[test] - fn test_l2_f16() { + fn l2_f16() { let a_half: Vec = vec![1.0, 2.0, 3.0] .iter() .map(|&x| HalfF16::from_f32(x)) @@ -1407,7 +1415,7 @@ mod tests { } #[test] - fn test_l2_i8() { + fn l2_i8() { let a = &[1, 2, 3]; let b = &[4, 5, 6]; @@ -1418,7 +1426,7 @@ mod tests { } // Adding new tests for bit-level distances #[test] - fn test_hamming_u8() { + fn hamming_u8() { let a = &[0b01010101, 0b11110000, 0b10101010]; // Binary representations for clarity let b = &[0b01010101, 0b11110000, 0b10101010]; @@ -1429,7 +1437,7 @@ mod tests { } #[test] - fn test_jaccard_u8() { + fn jaccard_u8() { // For binary data, treat each byte as a set of bits let a = &[0b11110000, 0b00001111, 0b10101010]; let b = &[0b11110000, 0b00001111, 0b01010101]; @@ -1442,7 +1450,7 @@ mod tests { // Adding new tests for probability similarities #[test] - fn test_js_f32() { + fn js_f32() { let a: &[f32; 3] = &[0.1, 0.9, 0.0]; let b: &[f32; 3] = &[0.2, 0.8, 0.0]; @@ -1453,7 +1461,7 @@ mod tests { } #[test] - fn test_kl_f32() { + fn kl_f32() { let a: &[f32; 3] = &[0.1, 0.9, 0.0]; let b: &[f32; 3] = &[0.2, 0.8, 0.0]; @@ -1464,7 +1472,7 @@ mod tests { } #[test] - fn test_cos_f16_same() { + fn cos_f16_same() { // Assuming these u16 values represent f16 bit patterns, and they are identical let a_u16: &[u16] = &[15360, 16384, 17408]; // Corresponding to some f16 values let b_u16: &[u16] = &[15360, 16384, 17408]; // Same as above for simplicity @@ -1484,7 +1492,7 @@ mod tests { } #[test] - fn test_cos_bf16_same() { + fn cos_bf16_same() { // Assuming these u16 values represent bf16 bit patterns, and they are identical let a_u16: &[u16] = &[15360, 16384, 17408]; // Corresponding to some bf16 values let b_u16: &[u16] = &[15360, 16384, 17408]; // Same as above for simplicity @@ -1504,7 +1512,7 @@ mod tests { } #[test] - fn test_cos_f16_interop() { + fn cos_f16_interop() { let a_half: Vec = vec![1.0, 2.0, 3.0] .iter() .map(|&x| HalfF16::from_f32(x)) @@ -1524,14 +1532,14 @@ mod tests { // Use the reinterpret-casted slices with your SpatialSimilarity implementation if let Some(result) = SpatialSimilarity::cosine(a_simsimd, b_simsimd) { // Expected value might need adjustment depending on actual cosine functionality - // Assuming identical vectors yield cosine similarity of 1.0 + // Assuming identical vectors yield cosine distance of 0.0 println!("The result of cos_f16 (interop) is {:.8}", result); assert_almost_equal(0.025, result, 0.01); } } #[test] - fn test_cos_bf16_interop() { + fn cos_bf16_interop() { let a_half: Vec = vec![1.0, 2.0, 3.0] .iter() .map(|&x| HalfBF16::from_f32(x)) @@ -1551,14 +1559,14 @@ mod tests { // Use the reinterpret-casted slices with your SpatialSimilarity implementation if let Some(result) = SpatialSimilarity::cosine(a_simsimd, b_simsimd) { // Expected value might need adjustment depending on actual cosine functionality - // Assuming identical vectors yield cosine similarity of 1.0 + // Assuming identical vectors yield cosine distance of 0.0 println!("The result of cos_bf16 (interop) is {:.8}", result); assert_almost_equal(0.025, result, 0.01); } } #[test] - fn test_intersect_u16() { + fn intersect_u16() { { let a_u16: &[u16] = &[153, 16384, 17408]; let b_u16: &[u16] = &[15360, 16384, 7408]; @@ -1581,7 +1589,7 @@ mod tests { } #[test] - fn test_intersect_u32() { + fn intersect_u32() { { let a_u32: &[u32] = &[11, 153]; let b_u32: &[u32] = &[11, 153, 7408, 16384]; @@ -1604,27 +1612,27 @@ mod tests { } #[test] - fn test_f16_arithmetic() { + fn f16_arithmetic() { let a = f16::from_f32(3.5); let b = f16::from_f32(2.0); - + // Test basic arithmetic assert!((a + b).to_f32() - 5.5 < 0.01); assert!((a - b).to_f32() - 1.5 < 0.01); assert!((a * b).to_f32() - 7.0 < 0.01); assert!((a / b).to_f32() - 1.75 < 0.01); assert!((-a).to_f32() + 3.5 < 0.01); - + // Test constants assert!(f16::ZERO.to_f32() == 0.0); assert!((f16::ONE.to_f32() - 1.0).abs() < 0.01); assert!((f16::NEG_ONE.to_f32() + 1.0).abs() < 0.01); - + // Test comparisons assert!(a > b); assert!(!(a < b)); assert!(a == a); - + // Test utility methods assert!((-a).abs().to_f32() - 3.5 < 0.01); assert!(a.is_finite()); @@ -1633,27 +1641,27 @@ mod tests { } #[test] - fn test_bf16_arithmetic() { + fn bf16_arithmetic() { let a = bf16::from_f32(3.5); let b = bf16::from_f32(2.0); - + // Test basic arithmetic assert!((a + b).to_f32() - 5.5 < 0.1); assert!((a - b).to_f32() - 1.5 < 0.1); assert!((a * b).to_f32() - 7.0 < 0.1); assert!((a / b).to_f32() - 1.75 < 0.1); assert!((-a).to_f32() + 3.5 < 0.1); - + // Test constants assert!(bf16::ZERO.to_f32() == 0.0); assert!((bf16::ONE.to_f32() - 1.0).abs() < 0.01); assert!((bf16::NEG_ONE.to_f32() + 1.0).abs() < 0.01); - + // Test comparisons assert!(a > b); assert!(!(a < b)); assert!(a == a); - + // Test utility methods assert!((-a).abs().to_f32() - 3.5 < 0.1); assert!(a.is_finite());