Initial commit

25d2752f · yongshk · 25d2752f · 25d2752f · 25d2752f · 25d2752f
Commit 25d2752f authored May 29, 2025 by yongshk
20 changed files
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
+[build]
+rustflags = ["-C", "target-cpu=native"]
+[target.wasm32-unknown-unknown]
+rustflags = ["-C", "target-feature=+simd128"]
+[target.x86_64-apple-darwin]
+rustflags = ["-C", "target-feature=-avx,-avx2"]
\ No newline at end of file
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
+version: 2
+updates:
+  - package-ecosystem: "cargo"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 5
--- a/.github/workflows/book-cd.yml
+++ b/.github/workflows/book-cd.yml
+name: Deploy Rust book
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write  # To push a branch 
+      pull-requests: write  # To create a PR from that branch
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+    - name: Install latest mdbook
+      run: |
+        tag=$(curl 'https://api.github.com/repos/rust-lang/mdbook/releases/latest' | jq -r '.tag_name')
+        url="https://github.com/rust-lang/mdbook/releases/download/${tag}/mdbook-${tag}-x86_64-unknown-linux-gnu.tar.gz"
+        mkdir mdbook
+        curl -sSL $url | tar -xz --directory=./mdbook
+        echo `pwd`/mdbook >> $GITHUB_PATH
+    - name: Deploy GitHub Pages
+      run: |
+        # This assumes your book is in the root of your repository.
+        # Just add a `cd` here if you need to change to another directory.
+        cd candle-book
+        mdbook build
+        git worktree add gh-pages
+        git config user.name "Deploy from CI"
+        git config user.email ""
+        cd gh-pages
+        # Delete the ref to avoid keeping history.
+        git update-ref -d refs/heads/gh-pages
+        rm -rf *
+        mv ../book/* .
+        git add .
+        git commit -m "Deploy $GITHUB_SHA to gh-pages"
+        git push --force --set-upstream origin gh-pages
--- a/.github/workflows/book.yml
+++ b/.github/workflows/book.yml
+name: CI
+on: 
+  pull_request:
+jobs:
+  test:
+    name: Test candle-book
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write  # To push a branch 
+      pull-requests: write  # To create a PR from that branch
+    steps:
+    - uses: actions/checkout@master
+    - name: Install Rust
+      run: |
+        rustup set profile minimal
+        rustup toolchain install stable
+        rustup default stable
+    - name: Install latest mdbook
+      run: |
+        tag=$(curl 'https://api.github.com/repos/rust-lang/mdbook/releases/latest' | jq -r '.tag_name')
+        url="https://github.com/rust-lang/mdbook/releases/download/${tag}/mdbook-${tag}-x86_64-unknown-linux-gnu.tar.gz"
+        mkdir bin
+        curl -sSL $url | tar -xz --directory=bin
+        echo "$(pwd)/bin" >> $GITHUB_PATH
+    - name: Run tests
+      run: cd candle-book && cargo build && mdbook test -L ../target/debug/deps/
--- a/.github/workflows/ci_cuda.yaml
+++ b/.github/workflows/ci_cuda.yaml
+name: CI / cuda
+on:
+  workflow_dispatch:
+  pull_request:
+jobs:
+  test-cuda:
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
+      cancel-in-progress: true
+    runs-on: [single-gpu, nvidia-gpu, t4, ci]
+    container:
+      image: nvidia/cuda:12.3.1-devel-ubuntu22.04
+      options: --gpus 0 
+    if: ${{ github.event.pull_request.head.repo.full_name == github.event.pull_request.base.repo.full_name }}
+    permissions:
+      contents: write
+      packages: write
+      # This is used to complete the identity challenge
+      # with sigstore/fulcio when running outside of PRs.
+      id-token: write
+      security-events: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v3
+      - name: Install dependencies
+        run: apt-get update && apt install curl build-essential libssl-dev protobuf-compiler pkg-config -y
+      - name: Install Rust Stable
+        uses: actions-rust-lang/setup-rust-toolchain@v1
+      - uses: Swatinem/rust-cache@v2
+      - name: Test (cuda)
+        run: cargo test --features cuda
--- a/.github/workflows/maturin.yml
+++ b/.github/workflows/maturin.yml
 Bname: PyO3-Wheels
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
+name: PyO3-CI
+on:
+  workflow_dispatch:
+  push:
+    branches:
+      - main
+    paths:
+      - candle-pyo3/**
+  pull_request:
+    paths:
+      - candle-pyo3/**
+jobs:
+  build_and_test:
+    name: Check everything builds & tests
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest] # For now, only test on Linux
+    steps: 
+      - name: Checkout repository
+        uses: actions/checkout@v2
+      - name: Install Rust
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+      - name: Install Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.11
+          architecture: "x64"
+      - name: Cache Cargo Registry
+        uses: actions/cache@v1
+        with:
+          path: ~/.cargo/registry
+          key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }}
+      - name: Install Protoc
+        uses: arduino/setup-protoc@v2
+        with:
+            version: "25.0"
+            repo-token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Install
+        working-directory: ./candle-pyo3
+        run: |
+          python -m venv .env
+          source .env/bin/activate
+          pip install -U pip
+          pip install pytest maturin black
+          python -m maturin develop -r --features onnx
+      - name: Check style
+        working-directory: ./candle-pyo3
+        run: |
+          source .env/bin/activate
+          python stub.py --check
+          black --check .
+      - name: Run tests
+        working-directory: ./candle-pyo3
+        run: |
+          source .env/bin/activate
+          python -m pytest -s -v tests
\ No newline at end of file
--- a/.github/workflows/rust-ci.yml
+++ b/.github/workflows/rust-ci.yml
+on: 
+  push:
+    branches: 
+      - main
+  pull_request:
+name: Continuous integration
+jobs:
+  check:
+    name: Check
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macOS-latest]
+        rust: [stable]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: ${{ matrix.rust }}
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: check
+          args: --workspace
+  test:
+    name: Test Suite
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macOS-latest]
+        rust: [stable]
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: ${{ matrix.rust }}
+          override: true
+      - uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --workspace
+  fmt:
+    name: Rustfmt
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - run: rustup component add rustfmt
+      - uses: actions-rs/cargo@v1
+        with:
+          command: fmt
+          args: --all -- --check
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+      - run: rustup component add clippy
+      - uses: actions-rs/cargo@v1
+        with:
+          command: clippy
+          args: --workspace --tests --examples -- -D warnings
--- a/.gitignore
+++ b/.gitignore
+# Generated by Cargo
+# will have compiled files and executables
+debug/
+data/
+dist/
+target/
+# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
+# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
+Cargo.lock
+# These are backup files generated by rustfmt
+**/*.rs.bk
+# MSVC Windows builds of rustc generate these, which store debugging information
+*.pdb
+*tokenizer*.json
+*.npz
+perf.data
+flamegraph.svg
+*.dylib
+*.so
+*.swp
+*.swo
+trace-*.json
+candle-wasm-examples/*/build
+candle-wasm-examples/*/*.bin
+candle-wasm-examples/*/*.jpeg
+candle-wasm-examples/*/audios/*.wav
+candle-wasm-examples/**/*.safetensors
+candle-wasm-examples/**/*.gguf
+candle-wasm-examples/*/package-lock.json
+candle-wasm-examples/**/config*.json
+.DS_Store
+.idea/*
--- a/.gitmodules
+++ b/.gitmodules
+[submodule "candle-flash-attn/cutlass"]
+	path = candle-flash-attn/cutlass
+	url = ssh://git@10.0.54.20:10022/dcutoolkit/mathlibs/cutlass_3.2.1.git
+	branch = f-candle
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+  - repo: https://github.com/Narsil/pre-commit-rust
+    rev: 2eed6366172ef2a5186e8785ec0e67243d7d73d0
+    hooks:
+      - id: fmt
+        name: "Rust (fmt)"
+      - id: clippy
+        name: "Rust (clippy)"
+        args:
+          [
+            "--tests",
+            "--examples",
+            "--",
+            "-Dwarnings",
+          ]
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.formatting.provider": "none",
+    "python.testing.pytestArgs": [
+        "candle-pyo3"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
\ No newline at end of file
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
+# Changelog
+This documents the main changes to the `candle` crate.
+## v0.3.1 - Unreleased
+### Added
+### Modified
+## v0.3.0 - 2023-10-01
+### Added
+- Added the Mistral 7b v0.1 model
+  [983](https://github.com/huggingface/candle/pull/983).
+- Quantized version of the Mistral model
+  [1009](https://github.com/huggingface/candle/pull/1009).
+- Add the gelu-erf op and activation function
+  [969](https://github.com/huggingface/candle/pull/969).
+- Add the mixformer/phi-v1.5 model
+  [930](https://github.com/huggingface/candle/pull/930).
+- Add the sclice-scatter op
+  [927](https://github.com/huggingface/candle/pull/927).
+- Add the Wuerstchen diffusion model
+  [911](https://github.com/huggingface/candle/pull/911).
+### Modified
+- Support for simd128 intrinsics in some quantized vecdots
+  [982](https://github.com/huggingface/candle/pull/982).
+- Optimize the index-select cuda kernel
+  [976](https://github.com/huggingface/candle/pull/976).
+- Self-contained safetensor wrappers
+  [946](https://github.com/huggingface/candle/pull/946).
+## v0.2.2 - 2023-09-18
+### Added
+- Support for `top_p` sampling
+  [819](https://github.com/huggingface/candle/pull/819).
+- T5 model including decoding
+  [864](https://github.com/huggingface/candle/pull/864).
+- 1-d upsampling
+  [839](https://github.com/huggingface/candle/pull/839).
+### Modified
+- Bugfix for conv2d
+  [820](https://github.com/huggingface/candle/pull/820).
+- Support tensor based indexing using `.i`
+  [842](https://github.com/huggingface/candle/pull/842).
+## v0.2.1 - 2023-09-11
+### Added
+- Add some RNNs (GRU and LSTM) in `candle-nn`
+  [674](https://github.com/huggingface/candle/pull/674),
+  [688](https://github.com/huggingface/candle/pull/688).
+- gguf v2 support
+  [725](https://github.com/huggingface/candle/pull/725).
+- Quantized llama example in Python using the pyo3 api
+  [716](https://github.com/huggingface/candle/pull/716).
+- `candle-nn` layer for conv2d-transposed
+  [760](https://github.com/huggingface/candle/pull/760).
+- Add the Segment-Anything Model (SAM) as an example
+  [773](https://github.com/huggingface/candle/pull/773).
+- TinyViT backbone for the segment anything example
+  [787](https://github.com/huggingface/candle/pull/787).
+- Shape with holes support
+  [770](https://github.com/huggingface/candle/pull/770).
+### Modified
+- Dilations are now supported in conv-transpose2d.
+  [671](https://github.com/huggingface/candle/pull/671).
+- Interactive mode for the quantized model
+  [690](https://github.com/huggingface/candle/pull/690).
+- Faster softmax operation
+  [747](https://github.com/huggingface/candle/pull/747).
+- Faster convolution operations on CPU and CUDA via im2col
+  [802](https://github.com/huggingface/candle/pull/802).
+- Moving some models to a more central location
+  [796](https://github.com/huggingface/candle/pull/796).
+## v0.2.0 - 2023-08-30
+### Added
+- Add the powf op
+  [664](https://github.com/huggingface/candle/pull/664).
+- Stable Diffusion XL support
+  [647](https://github.com/huggingface/candle/pull/647).
+- Add the conv-transpose2d op
+  [635](https://github.com/huggingface/candle/pull/635).
+- Refactor the VarBuilder api
+  [627](https://github.com/huggingface/candle/pull/627).
+- Add some quantization command
+  [625](https://github.com/huggingface/candle/pull/625).
+- Support more quantized types, e.g. Q2K, Q4K, Q5K...
+  [586](https://github.com/huggingface/candle/pull/586).
+- Add pose estimation to the yolo example
+  [589](https://github.com/huggingface/candle/pull/589).
+- Api to write GGUF files
+  [585](https://github.com/huggingface/candle/pull/585).
+- Support more quantization types
+  [580](https://github.com/huggingface/candle/pull/580).
+- Add EfficientNet as an example Computer Vision model
+  [572](https://github.com/huggingface/candle/pull/572).
+- Add a group parameter to convolutions
+  [566](https://github.com/huggingface/candle/pull/566).
+- New dtype: int64
+  [563](https://github.com/huggingface/candle/pull/563).
+- Handling of the GGUF file format.
+  [559](https://github.com/huggingface/candle/pull/559).
+## v0.1.2 - 2023-08-21
--- a/Cargo.toml
+++ b/Cargo.toml
+[workspace]
+members = [
+    "candle-core",
+    "candle-datasets",
+    "candle-examples",
+    "candle-book",
+    "candle-nn",
+    "candle-pyo3",
+    "candle-transformers",
+    "candle-wasm-examples/*",
+    "candle-wasm-tests",
+    "tensor-tools",
+]
+exclude = [
+   "candle-flash-attn",
+   "candle-kernels",
+   "candle-metal-kernels",
+   "candle-onnx",
+]
+resolver = "2"
+[workspace.package]
+version = "0.5.0"
+edition = "2021"
+description = "Minimalist ML framework."
+repository = "https://github.com/huggingface/candle"
+keywords = ["blas", "tensor", "machine-learning"]
+categories = ["science"]
+license = "MIT OR Apache-2.0"
+[workspace.dependencies]
+ab_glyph = "0.2.23"
+accelerate-src = { version = "0.3.2" }
+anyhow = { version = "1", features = ["backtrace"] }
+byteorder = "1.4.3"
+candle = { path = "./candle-core", package = "candle-core", version = "0.5.0" }
+candle-datasets = { path = "./candle-datasets", version = "0.5.0" }
+candle-flash-attn = { path = "./candle-flash-attn", version = "0.5.0" }
+candle-kernels = { path = "./candle-kernels", version = "0.5.0" }
+candle-metal-kernels = { path = "./candle-metal-kernels", version = "0.5.0" }
+candle-nn = { path = "./candle-nn", version = "0.5.0" }
+candle-onnx = { path = "./candle-onnx", version = "0.5.0" }
+candle-transformers = { path = "./candle-transformers", version = "0.5.0" }
+clap = { version = "4.2.4", features = ["derive"] }
+criterion = { version = "0.5.1", default-features=false }
+cudarc = { version = "0.10.0", features = ["f16"] }
+fancy-regex = "0.13.0"
+gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
+hf-hub = "0.4.1"
+half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_distr"] }
+image = { version = "0.25.0", default-features = false, features = ["jpeg", "png"] }
+imageproc = { version = "0.24.0", default-features = false }
+intel-mkl-src = { version = "0.8.1" }
+libc = { version = "0.2.147" }
+log = "0.4"
+memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }
+num_cpus = "1.15.0"
+num-traits = "0.2.15"
+parquet = { version = "51.0.0" }
+rand = "0.9.0"
+rand_distr = "0.5.1"
+rayon = "1.7.0"
+safetensors = "0.4.1"
+serde = { version = "1.0.171", features = ["derive"] }
+serde_plain = "1.0.2"
+serde_json = "1.0.99"
+thiserror = "1"
+tokenizers = { version = "0.15.0", default-features = false }
+tracing = "0.1.37"
+tracing-chrome = "0.7.1"
+tracing-subscriber = "0.3.7"
+wav = "1.0.0"
+yoke = { version = "0.7.2", features = ["derive"] }
+zip = { version = "0.6.6", default-features = false }
+metal = { version = "0.27.0", features = ["mps"]}
+[profile.release-with-debug]
+inherits = "release"
+debug = true
--- a/LICENSE-APACHE
+++ b/LICENSE-APACHE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/LICENSE-MIT
+++ b/LICENSE-MIT
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
--- a/Makefile
+++ b/Makefile
+.PHONY: clean-ptx clean test
+clean-ptx:
+	find target -name "*.ptx" -type f -delete
+	echo "" > candle-kernels/src/lib.rs
+	touch candle-kernels/build.rs
+	touch candle-examples/build.rs
+	touch candle-flash-attn/build.rs
+clean:
+	cargo clean
+test:
+	cargo test
+all: test
--- a/README.md
+++ b/README.md
+# candle
+[![discord server](https://dcbadge.vercel.app/api/server/hugging-face-879548962464493619)](https://discord.gg/hugging-face-879548962464493619)
+[![Latest version](https://img.shields.io/crates/v/candle-core.svg)](https://crates.io/crates/candle-core)
+[![Documentation](https://docs.rs/candle-core/badge.svg)](https://docs.rs/candle-core)
+![License](https://img.shields.io/crates/l/candle-core.svg)
+Candle is a minimalist ML framework for Rust with a focus on performance (including GPU support) 
+and ease of use. Try our online demos: 
+[whisper](https://huggingface.co/spaces/lmz/candle-whisper),
+[LLaMA2](https://huggingface.co/spaces/lmz/candle-llama2),
+[T5](https://huggingface.co/spaces/radames/Candle-T5-Generation-Wasm),
+[yolo](https://huggingface.co/spaces/lmz/candle-yolo),
+[Segment
+Anything](https://huggingface.co/spaces/radames/candle-segment-anything-wasm).
+## Get started
+Make sure that you have [`candle-core`](https://github.com/huggingface/candle/tree/main/candle-core) correctly installed as described in [**Installation**](https://huggingface.github.io/candle/guide/installation.html).
+Let's see how to run a simple matrix multiplication.
+Write the following to your `myapp/src/main.rs` file:
+```rust
+use candle_core::{Device, Tensor};
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let device = Device::Cpu;
+    let a = Tensor::randn(0f32, 1., (2, 3), &device)?;
+    let b = Tensor::randn(0f32, 1., (3, 4), &device)?;
+    let c = a.matmul(&b)?;
+    println!("{c}");
+    Ok(())
+}
+```
+`cargo run` should display a tensor of shape `Tensor[[2, 4], f32]`.
+Having installed `candle` with Cuda support, simply define the `device` to be on GPU:
+```diff
+- let device = Device::Cpu;
+ let device = Device::new_cuda(0)?;
+```
+For more advanced examples, please have a look at the following section.
+## Check out our examples
+These online demos run entirely in your browser:
+- [yolo](https://huggingface.co/spaces/lmz/candle-yolo): pose estimation and
+  object recognition.
+- [whisper](https://huggingface.co/spaces/lmz/candle-whisper): speech recognition.
+- [LLaMA2](https://huggingface.co/spaces/lmz/candle-llama2): text generation.
+- [T5](https://huggingface.co/spaces/radames/Candle-T5-Generation-Wasm): text generation.
+- [Phi-1.5, and Phi-2](https://huggingface.co/spaces/radames/Candle-Phi-1.5-Wasm): text generation.
+- [Segment Anything Model](https://huggingface.co/spaces/radames/candle-segment-anything-wasm): Image segmentation.
+- [BLIP](https://huggingface.co/spaces/radames/Candle-BLIP-Image-Captioning): image captioning.
+We also provide a some command line based examples using state of the art models:
+- [LLaMA and LLaMA-v2](./candle-examples/examples/llama/): general LLM, includes
+  the SOLAR-10.7B variant.
+- [Falcon](./candle-examples/examples/falcon/): general LLM.
+- [Gemma](./candle-examples/examples/gemma/): 2b and 7b general LLMs from Google
+  Deepmind.
+- [Phi-1, Phi-1.5, and Phi-2](./candle-examples/examples/phi/): 1.3b and 2.7b general LLMs with performance on par with LLaMA-v2 7b.
+- [StableLM-3B-4E1T](./candle-examples/examples/stable-lm/): a 3b general LLM
+  pre-trained on 1T tokens of English and code datasets. Also supports
+  StableLM-2, a 1.6b LLM trained on 2T tokens, as well as the code variants.
+- [Mamba](./candle-examples/examples/mamba/): an inference only
+  implementation of the Mamba state space model.
+- [Mistral7b-v0.1](./candle-examples/examples/mistral/): a 7b general LLM with
+  better performance than all publicly available 13b models as of 2023-09-28.
+- [Mixtral8x7b-v0.1](./candle-examples/examples/mixtral/): a sparse mixture of
+  experts 8x7b general LLM with better performance than a Llama 2 70B model with
+  much faster inference.
+- [StarCoder](./candle-examples/examples/bigcode/) and
+  [StarCoder2](./candle-examples/examples/starcoder2/): LLM specialized to code generation.
+- [Qwen1.5](./candle-examples/examples/qwen/): Bilingual (English/Chinese) LLMs.
+- [RWKV v5 and v6](./candle-examples/examples/rwkv/): An RNN with transformer level LLM
+  performance.
+- [Replit-code-v1.5](./candle-examples/examples/replit-code/): a 3.3b LLM specialized for code completion.
+- [Yi-6B / Yi-34B](./candle-examples/examples/yi/): two bilingual
+  (English/Chinese) general LLMs with 6b and 34b parameters.
+- [Quantized LLaMA](./candle-examples/examples/quantized/): quantized version of
+  the LLaMA model using the same quantization techniques as
+  [llama.cpp](https://github.com/ggerganov/llama.cpp).
+<img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/quantized/assets/aoc.gif" width="600">
+- [Stable Diffusion](./candle-examples/examples/stable-diffusion/): text to
+  image generative model, support for the 1.5, 2.1, SDXL 1.0 and Turbo versions.
+<img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/stable-diffusion/assets/stable-diffusion-xl.jpg" width="200">
+- [Wuerstchen](./candle-examples/examples/wuerstchen/): another text to
+  image generative model.
+<img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/wuerstchen/assets/cat.jpg" width="200">
+- [yolo-v3](./candle-examples/examples/yolo-v3/) and
+  [yolo-v8](./candle-examples/examples/yolo-v8/): object detection and pose
+  estimation models.
+<img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.od.jpg" width="200"><img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/yolo-v8/assets/bike.pose.jpg" width="200">
+- [segment-anything](./candle-examples/examples/segment-anything/): image
+  segmentation model with prompt.
+<img src="https://github.com/huggingface/candle/raw/main/candle-examples/examples/segment-anything/assets/sam_merged.jpg" width="200">
+- [SegFormer](./candle-examples/examples/segformer/): transformer based semantic segmantation model.
+- [Whisper](./candle-examples/examples/whisper/): speech recognition model.
+- [EnCodec](./candle-examples/examples/encodec/): high-quality audio compression
+  model using residual vector quantization.
+- [MetaVoice](./candle-examples/examples/metavoice/): foundational model for
+  text-to-speech.
+- [T5](./candle-examples/examples/t5), [Bert](./candle-examples/examples/bert/),
+  [JinaBert](./candle-examples/examples/jina-bert/) : useful for sentence embeddings.
+- [DINOv2](./candle-examples/examples/dinov2/): computer vision model trained
+  using self-supervision (can be used for imagenet classification, depth
+  evaluation, segmentation).
+- [VGG](./candle-examples/examples/vgg/),
+  [RepVGG](./candle-examples/examples/repvgg): computer vision models.
+- [BLIP](./candle-examples/examples/blip/): image to text model, can be used to
+  generate captions for an image.
+- [CLIP](./candle-examples/examples/clip/): multi-model vision and language
+  model.
+- [TrOCR](./candle-examples/examples/trocr/): a transformer OCR model, with
+  dedicated submodels for hand-writing and printed recognition.
+- [Marian-MT](./candle-examples/examples/marian-mt/): neural machine translation
+  model, generates the translated text from the input text.
+- [Moondream](./candle-examples/examples/moondream/): tiny computer-vision model 
+  that can answer real-world questions about images.
+Run them using commands like:
+```
+cargo run --example quantized --release
+```
+In order to use **CUDA** add `--features cuda` to the example command line. If
+you have cuDNN installed, use `--features cudnn` for even more speedups.
+There are also some wasm examples for whisper and
+[llama2.c](https://github.com/karpathy/llama2.c). You can either build them with
+`trunk` or try them online:
+[whisper](https://huggingface.co/spaces/lmz/candle-whisper),
+[llama2](https://huggingface.co/spaces/lmz/candle-llama2),
+[T5](https://huggingface.co/spaces/radames/Candle-T5-Generation-Wasm),
+[Phi-1.5, and Phi-2](https://huggingface.co/spaces/radames/Candle-Phi-1.5-Wasm),
+[Segment Anything Model](https://huggingface.co/spaces/radames/candle-segment-anything-wasm).
+For LLaMA2, run the following command to retrieve the weight files and start a
+test server:
+```bash
+cd candle-wasm-examples/llama2-c
+wget https://huggingface.co/spaces/lmz/candle-llama2/resolve/main/model.bin
+wget https://huggingface.co/spaces/lmz/candle-llama2/resolve/main/tokenizer.json
+trunk serve --release --port 8081
+```
+And then head over to
+[http://localhost:8081/](http://localhost:8081/).
+<!--- ANCHOR: useful_libraries --->
+## Useful External Resources
+- [`candle-tutorial`](https://github.com/ToluClassics/candle-tutorial): A
+  very detailed tutorial showing how to convert a PyTorch model to Candle.
+- [`candle-lora`](https://github.com/EricLBuehler/candle-lora): Efficient and
+  ergonomic LoRA implementation for Candle. `candle-lora` has      
+  out-of-the-box LoRA support for many models from Candle, which can be found
+  [here](https://github.com/EricLBuehler/candle-lora/tree/master/candle-lora-transformers/examples).
+- [`optimisers`](https://github.com/KGrewal1/optimisers): A collection of optimisers
+  including SGD with momentum, AdaGrad, AdaDelta, AdaMax, NAdam, RAdam, and RMSprop.
+- [`candle-vllm`](https://github.com/EricLBuehler/candle-vllm): Efficient platform for inference and
+  serving local LLMs including an OpenAI compatible API server.
+- [`candle-ext`](https://github.com/mokeyish/candle-ext): An extension library to Candle that provides PyTorch functions not currently available in Candle.
+- [`candle-coursera-ml`](https://github.com/vishpat/candle-coursera-ml): Implementation of ML algorithms from Coursera's [Machine Learning Specialization](https://www.coursera.org/specializations/machine-learning-introduction) course.
+- [`kalosm`](https://github.com/floneum/floneum/tree/master/interfaces/kalosm): A multi-modal meta-framework in Rust for interfacing with local pre-trained models with support for controlled generation, custom samplers, in-memory vector databases, audio transcription, and more.
+- [`candle-sampling`](https://github.com/EricLBuehler/candle-sampling): Sampling techniques for Candle.
+- [`gpt-from-scratch-rs`](https://github.com/jeroenvlek/gpt-from-scratch-rs): A port of Andrej Karpathy's _Let's build GPT_ tutorial on YouTube showcasing the Candle API on a toy problem.
+- [`candle-einops`](https://github.com/tomsanbear/candle-einops): A pure rust implementation of the python [einops](https://github.com/arogozhnikov/einops) library.
+If you have an addition to this list, please submit a pull request.
+<!--- ANCHOR_END: useful_libraries --->
+<!--- ANCHOR: features --->
+## Features
+- Simple syntax, looks and feels like PyTorch.
+    - Model training.
+    - Embed user-defined ops/kernels, such as [flash-attention v2](https://github.com/huggingface/candle/blob/89ba005962495f2bfbda286e185e9c3c7f5300a3/candle-flash-attn/src/lib.rs#L152).
+- Backends.
+    - Optimized CPU backend with optional MKL support for x86 and Accelerate for macs.
+    - CUDA backend for efficiently running on GPUs, multiple GPU distribution via NCCL.
+    - WASM support, run your models in a browser.
+- Included models.
+    - Language Models.
+        - LLaMA v1 and v2 with variants such as SOLAR-10.7B.
+        - Falcon.
+        - StarCoder, StarCoder2.
+        - Phi 1, 1.5, and 2.
+        - Mamba, Minimal Mamba
+        - Gemma 2b and 7b.
+        - Mistral 7b v0.1.
+        - Mixtral 8x7b v0.1.
+        - StableLM-3B-4E1T, StableLM-2-1.6B, Stable-Code-3B.
+        - Replit-code-v1.5-3B.
+        - Bert.
+        - Yi-6B and Yi-34B.
+        - Qwen1.5, Qwen1.5 MoE.
+        - RWKV v5 and v6.
+    - Quantized LLMs.
+        - Llama 7b, 13b, 70b, as well as the chat and code variants.
+        - Mistral 7b, and 7b instruct.
+        - Mixtral 8x7b.
+        - Zephyr 7b a and b (Mistral-7b based).
+        - OpenChat 3.5 (Mistral-7b based).
+    - Text to text.
+        - T5 and its variants: FlanT5, UL2, MADLAD400 (translation), CoEdit (Grammar correction).
+        - Marian MT (Machine Translation).
+    - Text to image.
+        - Stable Diffusion v1.5, v2.1, XL v1.0.
+        - Wurstchen v2.
+    - Image to text.
+        - BLIP.
+        - TrOCR.
+    - Audio.
+        - Whisper, multi-lingual speech-to-text.
+        - EnCodec, audio compression model.
+        - MetaVoice-1B, text-to-speech model.
+    - Computer Vision Models.
+        - DINOv2, ConvMixer, EfficientNet, ResNet, ViT, VGG, RepVGG, ConvNeXT,
+          ConvNeXTv2, MobileOne, EfficientVit (MSRA).
+        - yolo-v3, yolo-v8.
+        - Segment-Anything Model (SAM).
+        - SegFormer.
+- File formats: load models from safetensors, npz, ggml, or PyTorch files.
+- Serverless (on CPU), small and fast deployments.
+- Quantization support using the llama.cpp quantized types.
+<!--- ANCHOR_END: features --->
+## How to use
+<!--- ANCHOR: cheatsheet --->
+Cheatsheet:
+|            | Using PyTorch                            | Using Candle                                                     |
+|------------|------------------------------------------|------------------------------------------------------------------|
+| Creation   | `torch.Tensor([[1, 2], [3, 4]])`         | `Tensor::new(&[[1f32, 2.], [3., 4.]], &Device::Cpu)?`           |
+| Creation   | `torch.zeros((2, 2))`                    | `Tensor::zeros((2, 2), DType::F32, &Device::Cpu)?`               |
+| Indexing   | `tensor[:, :4]`                          | `tensor.i((.., ..4))?`                                           |
+| Operations | `tensor.view((2, 2))`                    | `tensor.reshape((2, 2))?`                                        |
+| Operations | `a.matmul(b)`                            | `a.matmul(&b)?`                                                  |
+| Arithmetic | `a + b`                                  | `&a + &b`                                                        |
+| Device     | `tensor.to(device="cuda")`               | `tensor.to_device(&Device::new_cuda(0)?)?`                            |
+| Dtype      | `tensor.to(dtype=torch.float16)`         | `tensor.to_dtype(&DType::F16)?`                                  |
+| Saving     | `torch.save({"A": A}, "model.bin")`      | `candle::safetensors::save(&HashMap::from([("A", A)]), "model.safetensors")?` |
+| Loading    | `weights = torch.load("model.bin")`      | `candle::safetensors::load("model.safetensors", &device)`        |
+<!--- ANCHOR_END: cheatsheet --->
+## Structure
+- [candle-core](./candle-core): Core ops, devices, and `Tensor` struct definition
+- [candle-nn](./candle-nn/): Tools to build real models
+- [candle-examples](./candle-examples/): Examples of using the library in realistic settings
+- [candle-kernels](./candle-kernels/): CUDA custom kernels
+- [candle-datasets](./candle-datasets/): Datasets and data loaders.
+- [candle-transformers](./candle-transformers): transformers-related utilities.
+- [candle-flash-attn](./candle-flash-attn): Flash attention v2 layer.
+- [candle-onnx](./candle-onnx/): ONNX model evaluation.
+## FAQ
+### Why should I use Candle?
+Candle's core goal is to *make serverless inference possible*. Full machine learning frameworks like PyTorch
+are very large, which makes creating instances on a cluster slow. Candle allows deployment of lightweight
+binaries.
+Secondly, Candle lets you *remove Python* from production workloads. Python overhead can seriously hurt performance,
+and the [GIL](https://www.backblaze.com/blog/the-python-gil-past-present-and-future/) is a notorious source of headaches.
+Finally, Rust is cool! A lot of the HF ecosystem already has Rust crates, like [safetensors](https://github.com/huggingface/safetensors) and [tokenizers](https://github.com/huggingface/tokenizers).
+### Other ML frameworks
+- [dfdx](https://github.com/coreylowman/dfdx) is a formidable crate, with shapes being included
+  in types. This prevents a lot of headaches by getting the compiler to complain about shape mismatches right off the bat.
+  However, we found that some features still require nightly, and writing code can be a bit daunting for non rust experts.
+  We're leveraging and contributing to other core crates for the runtime so hopefully both crates can benefit from each
+  other.
+- [burn](https://github.com/burn-rs/burn) is a general crate that can leverage multiple backends so you can choose the best
+  engine for your workload.
+- [tch-rs](https://github.com/LaurentMazare/tch-rs.git) Bindings to the torch library in Rust. Extremely versatile, but they 
+  bring in the entire torch library into the runtime. The main contributor of `tch-rs` is also involved in the development
+  of `candle`.
+### Common Errors
+#### Missing symbols when compiling with the mkl feature.
+If you get some missing symbols when compiling binaries/tests using the mkl
+or accelerate features, e.g. for mkl you get:
+```
+  = note: /usr/bin/ld: (....o): in function `blas::sgemm':
+          .../blas-0.22.0/src/lib.rs:1944: undefined reference to `sgemm_' collect2: error: ld returned 1 exit status
+  = note: some `extern` functions couldn't be found; some native libraries may need to be installed or have their path specified
+  = note: use the `-l` flag to specify native libraries to link
+  = note: use the `cargo:rustc-link-lib` directive to specify the native libraries to link with Cargo
+```
+or for accelerate:
+```
+Undefined symbols for architecture arm64:
+            "_dgemm_", referenced from:
+                candle_core::accelerate::dgemm::h1b71a038552bcabe in libcandle_core...
+            "_sgemm_", referenced from:
+                candle_core::accelerate::sgemm::h2cf21c592cba3c47 in libcandle_core...
+          ld: symbol(s) not found for architecture arm64
+```
+This is likely due to a missing linker flag that was needed to enable the mkl library. You
+can try adding the following for mkl at the top of your binary:
+```rust
+extern crate intel_mkl_src;
+```
+or for accelerate:
+```rust
+extern crate accelerate_src;
+```
+#### Cannot run the LLaMA examples: access to source requires login credentials
+```
+Error: request error: https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/tokenizer.json: status code 401
+```
+This is likely because you're not permissioned for the LLaMA-v2 model. To fix
+this, you have to register on the huggingface-hub, accept the [LLaMA-v2 model
+conditions](https://huggingface.co/meta-llama/Llama-2-7b-hf), and set up your
+authentication token. See issue
+[#350](https://github.com/huggingface/candle/issues/350) for more details.
+#### Missing cute/cutlass headers when compiling flash-attn
+```
+  In file included from kernels/flash_fwd_launch_template.h:11:0,
+                   from kernels/flash_fwd_hdim224_fp16_sm80.cu:5:
+  kernels/flash_fwd_kernel.h:8:10: fatal error: cute/algorithm/copy.hpp: No such file or directory
+   #include <cute/algorithm/copy.hpp>
+            ^~~~~~~~~~~~~~~~~~~~~~~~~
+  compilation terminated.
+  Error: nvcc error while compiling:
+```
+[cutlass](https://github.com/NVIDIA/cutlass) is provided as a git submodule so you may want to run the following command to check it in properly.
+```bash
+git submodule update --init
+```
+#### Compiling with flash-attention fails
+```
+/usr/include/c++/11/bits/std_function.h:530:146: error: parameter packs not expanded with ‘...’:
+```
+This is a bug in gcc-11 triggered by the Cuda compiler. To fix this, install a different, supported gcc version - for example gcc-10, and specify the path to the compiler in the CANDLE_NVCC_CCBIN environment variable.
+```
+env CANDLE_NVCC_CCBIN=/usr/lib/gcc/x86_64-linux-gnu/10 cargo ...
+```
+#### Linking error on windows when running rustdoc or mdbook tests
+```
+Couldn't compile the test.
+---- .\candle-book\src\inference\hub.md - Using_the_hub::Using_in_a_real_model_ (line 50) stdout ----
+error: linking with `link.exe` failed: exit code: 1181
+//very long chain of linking
+ = note: LINK : fatal error LNK1181: cannot open input file 'windows.0.48.5.lib'
+```
+Make sure you link all native libraries that might be located outside a project target, e.g., to run mdbook tests, you should run:
+```
+mdbook test candle-book -L .\target\debug\deps\ `
+-L native=$env:USERPROFILE\.cargo\registry\src\index.crates.io-6f17d22bba15001f\windows_x86_64_msvc-0.42.2\lib `
+-L native=$env:USERPROFILE\.cargo\registry\src\index.crates.io-6f17d22bba15001f\windows_x86_64_msvc-0.48.5\lib
+```
+#### Extremely slow model load time with WSL
+This may be caused by the models being loaded from `/mnt/c`, more details on
+[stackoverflow](https://stackoverflow.com/questions/68972448/why-is-wsl-extremely-slow-when-compared-with-native-windows-npm-yarn-processing).
+#### Tracking down errors
+You can set `RUST_BACKTRACE=1` to be provided with backtraces when a candle
+error is generated.
--- a/candle-book/.gitignore
+++ b/candle-book/.gitignore
+book
--- a/candle-book/Cargo.toml
+++ b/candle-book/Cargo.toml
+[package]
+name = "candle-book"
+version.workspace = true
+edition.workspace = true
+description.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+license.workspace = true
+readme = "README.md"
+[dependencies]
+accelerate-src = { workspace = true, optional = true }
+candle = { workspace = true }
+candle-datasets = { workspace = true }
+candle-nn = { workspace = true }
+candle-transformers = { workspace = true }
+candle-flash-attn = { workspace = true, optional = true }
+safetensors = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+num-traits = { workspace = true }
+intel-mkl-src = { workspace = true, optional = true }
+cudarc = { workspace = true, optional = true }
+half = { workspace = true, optional = true }
+image = { workspace = true, optional = true }
+anyhow = { workspace = true }
+tokio = "1.29.1"
+[dev-dependencies]
+byteorder = { workspace = true }
+hf-hub = { workspace = true, features=["tokio"]}
+clap = { workspace = true }
+memmap2 = { workspace = true }
+rand = { workspace = true }
+tokenizers = { workspace = true, features = ["onig"] }
+tracing = { workspace = true }
+tracing-chrome = { workspace = true }
+tracing-subscriber = { workspace = true }
+wav = { workspace = true }
+# Necessary to disambiguate with tokio in wasm examples which are 1.28.1
+parquet = { workspace = true }
+image = { workspace = true }
+[build-dependencies]
+anyhow = { workspace = true }
+[features]
+default = []