Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yongshk
candle
Commits
25d2752f
Commit
25d2752f
authored
May 29, 2025
by
yongshk
Browse files
Initial commit
parents
Changes
238
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1608 additions
and
0 deletions
+1608
-0
candle-examples/examples/custom-ops/cuda_kernels.rs
candle-examples/examples/custom-ops/cuda_kernels.rs
+1
-0
candle-examples/examples/custom-ops/kernels/layernorm_kernels.cu
...examples/examples/custom-ops/kernels/layernorm_kernels.cu
+35
-0
candle-examples/examples/custom-ops/kernels/reduction_utils.cuh
...-examples/examples/custom-ops/kernels/reduction_utils.cuh
+46
-0
candle-examples/examples/custom-ops/main.rs
candle-examples/examples/custom-ops/main.rs
+94
-0
candle-examples/examples/dinov2/README.md
candle-examples/examples/dinov2/README.md
+19
-0
candle-examples/examples/dinov2/main.rs
candle-examples/examples/dinov2/main.rs
+62
-0
candle-examples/examples/distilbert/README.md
candle-examples/examples/distilbert/README.md
+22
-0
candle-examples/examples/distilbert/main.rs
candle-examples/examples/distilbert/main.rs
+135
-0
candle-examples/examples/efficientnet/main.rs
candle-examples/examples/efficientnet/main.rs
+98
-0
candle-examples/examples/efficientvit/README.md
candle-examples/examples/efficientvit/README.md
+20
-0
candle-examples/examples/efficientvit/main.rs
candle-examples/examples/efficientvit/main.rs
+99
-0
candle-examples/examples/encodec/README.md
candle-examples/examples/encodec/README.md
+25
-0
candle-examples/examples/encodec/audio_io.rs
candle-examples/examples/encodec/audio_io.rs
+275
-0
candle-examples/examples/encodec/jfk-codes.safetensors
candle-examples/examples/encodec/jfk-codes.safetensors
+0
-0
candle-examples/examples/encodec/main.rs
candle-examples/examples/encodec/main.rs
+131
-0
candle-examples/examples/falcon/README.md
candle-examples/examples/falcon/README.md
+3
-0
candle-examples/examples/falcon/main.rs
candle-examples/examples/falcon/main.rs
+194
-0
candle-examples/examples/gemma/README.md
candle-examples/examples/gemma/README.md
+27
-0
candle-examples/examples/gemma/main.rs
candle-examples/examples/gemma/main.rs
+277
-0
candle-examples/examples/jina-bert/README.md
candle-examples/examples/jina-bert/README.md
+45
-0
No files found.
Too many changes to show.
To preserve performance only
238 of 238+
files are displayed.
Plain diff
Email patch
candle-examples/examples/custom-ops/cuda_kernels.rs
0 → 100644
View file @
25d2752f
pub
const
LAYERNORM_KERNELS
:
&
str
=
include_str!
(
concat!
(
env!
(
"OUT_DIR"
),
"/layernorm_kernels.ptx"
));
candle-examples/examples/custom-ops/kernels/layernorm_kernels.cu
0 → 100644
View file @
25d2752f
#include <stdint.h>
#include "reduction_utils.cuh"
template
<
typename
scalar_t
>
__device__
void
rms_norm_kernel
(
scalar_t
*
__restrict__
out
,
// [num_tokens, hidden_size]
const
scalar_t
*
__restrict__
input
,
// [num_tokens, hidden_size]
const
float
epsilon
,
const
uint32_t
num_tokens
,
const
uint32_t
hidden_size
)
{
__shared__
float
s_variance
;
float
variance
=
0.0
f
;
for
(
int
idx
=
threadIdx
.
x
;
idx
<
hidden_size
;
idx
+=
blockDim
.
x
)
{
const
float
x
=
(
float
)
input
[
blockIdx
.
x
*
hidden_size
+
idx
];
variance
+=
x
*
x
;
}
variance
=
blockReduceSum
<
float
>
(
variance
);
if
(
threadIdx
.
x
==
0
)
{
s_variance
=
rsqrtf
(
variance
/
hidden_size
+
epsilon
);
}
__syncthreads
();
for
(
int
idx
=
threadIdx
.
x
;
idx
<
hidden_size
;
idx
+=
blockDim
.
x
)
{
float
x
=
(
float
)
input
[
blockIdx
.
x
*
hidden_size
+
idx
];
out
[
blockIdx
.
x
*
hidden_size
+
idx
]
=
((
scalar_t
)(
x
*
s_variance
));
}
}
extern
"C"
__global__
void
rms_f32
(
float
*
__restrict__
out
,
// [num_tokens, hidden_size]
const
float
*
__restrict__
input
,
// [num_tokens, hidden_size]
const
float
epsilon
,
const
uint32_t
num_tokens
,
const
uint32_t
hidden_size
)
{
rms_norm_kernel
(
out
,
input
,
epsilon
,
num_tokens
,
hidden_size
);
}
candle-examples/examples/custom-ops/kernels/reduction_utils.cuh
0 → 100644
View file @
25d2752f
/*
* Adapted from
* https://github.com/NVIDIA/FasterTransformer/blob/release/v5.3_tag/src/fastertransformer/kernels/reduce_kernel_utils.cuh
* Copyright (c) 2023, The vLLM team.
* Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
template
<
typename
T
>
__inline__
__device__
T
warpReduceSum
(
T
val
)
{
#pragma unroll
for
(
int
mask
=
16
;
mask
>
0
;
mask
>>=
1
)
val
+=
__shfl_xor_sync
(
0xffffffff
,
val
,
mask
,
32
);
return
val
;
}
/* Calculate the sum of all elements in a block */
template
<
typename
T
>
__inline__
__device__
T
blockReduceSum
(
T
val
)
{
static
__shared__
T
shared
[
32
];
int
lane
=
threadIdx
.
x
&
0x1f
;
int
wid
=
threadIdx
.
x
>>
5
;
val
=
warpReduceSum
<
T
>
(
val
);
if
(
lane
==
0
)
shared
[
wid
]
=
val
;
__syncthreads
();
// Modify from blockDim.x << 5 to blockDim.x / 32. to prevent
// blockDim.x is not divided by 32
val
=
(
threadIdx
.
x
<
(
blockDim
.
x
/
32.
f
))
?
shared
[
lane
]
:
(
T
)(
0.0
f
);
val
=
warpReduceSum
<
T
>
(
val
);
return
val
;
}
candle-examples/examples/custom-ops/main.rs
0 → 100644
View file @
25d2752f
// This example illustrates how to implement custom operations. These operations can provide their
// own forward pass (CPU and GPU versions) as well as their backward pass.
//
// In this example we add the RMS normalization operation and implement it for f32.
#[cfg(any(feature
=
"mkl"
,
feature
=
"mkl-dynamic"
))]
extern
crate
intel_mkl_src
;
#[rustfmt::skip]
#[cfg(feature
=
"cuda"
)]
mod
cuda_kernels
;
use
clap
::
Parser
;
use
candle
::{
CpuStorage
,
CustomOp1
,
Layout
,
Result
,
Shape
,
Tensor
};
#[derive(Parser,
Debug)]
#[command(author,
version,
about,
long_about
=
None)]
struct
Args
{
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
}
struct
LayerNorm
{
eps
:
f32
,
}
impl
CustomOp1
for
LayerNorm
{
fn
name
(
&
self
)
->
&
'static
str
{
"layer-norm"
}
fn
cpu_fwd
(
&
self
,
storage
:
&
CpuStorage
,
layout
:
&
Layout
)
->
Result
<
(
CpuStorage
,
Shape
)
>
{
let
(
dim1
,
dim2
)
=
layout
.shape
()
.dims2
()
?
;
let
slice
=
storage
.as_slice
::
<
f32
>
()
?
;
let
src
=
match
layout
.contiguous_offsets
()
{
None
=>
candle
::
bail!
(
"input has to be contiguous"
),
Some
((
o1
,
o2
))
=>
&
slice
[
o1
..
o2
],
};
let
mut
dst
=
Vec
::
with_capacity
(
dim1
*
dim2
);
for
idx1
in
0
..
dim1
{
let
src
=
&
src
[
idx1
*
dim2
..
(
idx1
+
1
)
*
dim2
];
let
variance
=
src
.iter
()
.map
(|
x
|
x
*
x
)
.sum
::
<
f32
>
();
let
s_variance
=
1f32
/
(
variance
/
dim2
as
f32
+
self
.eps
)
.sqrt
();
dst
.extend
(
src
.iter
()
.map
(|
x
|
x
*
s_variance
))
}
let
storage
=
candle
::
WithDType
::
to_cpu_storage_owned
(
dst
);
Ok
((
storage
,
layout
.shape
()
.clone
()))
}
#[cfg(feature
=
"cuda"
)]
fn
cuda_fwd
(
&
self
,
storage
:
&
candle
::
CudaStorage
,
layout
:
&
Layout
,
)
->
Result
<
(
candle
::
CudaStorage
,
Shape
)
>
{
use
candle
::
backend
::
BackendStorage
;
use
candle
::
cuda_backend
::
cudarc
::
driver
::{
LaunchAsync
,
LaunchConfig
};
use
candle
::
cuda_backend
::
WrapErr
;
let
(
d1
,
d2
)
=
layout
.shape
()
.dims2
()
?
;
let
d1
=
d1
as
u32
;
let
d2
=
d2
as
u32
;
let
dev
=
storage
.device
()
.clone
();
let
slice
=
storage
.as_cuda_slice
::
<
f32
>
()
?
;
let
slice
=
match
layout
.contiguous_offsets
()
{
None
=>
candle
::
bail!
(
"input has to be contiguous"
),
Some
((
o1
,
o2
))
=>
slice
.slice
(
o1
..
o2
),
};
let
elem_count
=
layout
.shape
()
.elem_count
();
let
dst
=
unsafe
{
dev
.alloc
::
<
f32
>
(
elem_count
)
}
.w
()
?
;
let
func
=
dev
.get_or_load_func
(
"rms_f32"
,
cuda_kernels
::
LAYERNORM_KERNELS
)
?
;
let
params
=
(
&
dst
,
&
slice
,
self
.eps
,
d1
,
d2
);
let
cfg
=
LaunchConfig
{
grid_dim
:
(
d1
,
1
,
1
),
block_dim
:
(
d2
,
1
,
1
),
shared_mem_bytes
:
0
,
};
unsafe
{
func
.launch
(
cfg
,
params
)
}
.w
()
?
;
let
dst
=
candle
::
CudaStorage
::
wrap_cuda_slice
(
dst
,
dev
);
Ok
((
dst
,
layout
.shape
()
.clone
()))
}
}
fn
main
()
->
anyhow
::
Result
<
()
>
{
let
args
=
Args
::
parse
();
let
device
=
candle_examples
::
device
(
args
.cpu
)
?
;
let
t
=
Tensor
::
arange
(
0f32
,
14f32
,
&
device
)
?
.reshape
((
2
,
7
))
?
;
println!
(
"{t}"
);
let
t
=
t
.apply_op1
(
LayerNorm
{
eps
:
1e-5
})
?
;
println!
(
"{t}"
);
Ok
(())
}
candle-examples/examples/dinov2/README.md
0 → 100644
View file @
25d2752f
# candle-dinov2
[
DINOv2
](
https://github.com/facebookresearch/dinov2
)
is a computer vision model.
In this example, it is used as an ImageNet classifier: the model returns the
probability for the image to belong to each of the 1000 ImageNet categories.
## Running some example
```
bash
cargo run
--example
dinov2
--release
--
--image
candle-examples/examples/yolo-v8/assets/bike.jpg
>
mountain bike, all-terrain bike, off-roader: 43.67%
>
bicycle-built-for-two, tandem bicycle, tandem: 33.20%
>
crash helmet : 13.23%
>
unicycle, monocycle : 2.44%
>
maillot : 2.42%
```

candle-examples/examples/dinov2/main.rs
0 → 100644
View file @
25d2752f
//! DINOv2: Learning Robust Visual Features without Supervision
//! https://github.com/facebookresearch/dinov2
#[cfg(any(feature
=
"mkl"
,
feature
=
"mkl-dynamic"
))]
extern
crate
intel_mkl_src
;
#[cfg(feature
=
"accelerate"
)]
extern
crate
accelerate_src
;
use
clap
::
Parser
;
use
candle
::{
DType
,
IndexOp
,
D
};
use
candle_nn
::{
Module
,
VarBuilder
};
use
candle_transformers
::
models
::
dinov2
;
#[derive(Parser)]
struct
Args
{
#[arg(long)]
model
:
Option
<
String
>
,
#[arg(long)]
image
:
String
,
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
}
pub
fn
main
()
->
anyhow
::
Result
<
()
>
{
let
args
=
Args
::
parse
();
let
device
=
candle_examples
::
device
(
args
.cpu
)
?
;
let
image
=
candle_examples
::
imagenet
::
load_image224
(
args
.image
)
?
.to_device
(
&
device
)
?
;
println!
(
"loaded image {image:?}"
);
let
model_file
=
match
args
.model
{
None
=>
{
let
api
=
hf_hub
::
api
::
sync
::
Api
::
new
()
?
;
let
api
=
api
.model
(
"lmz/candle-dino-v2"
.into
());
api
.get
(
"dinov2_vits14.safetensors"
)
?
}
Some
(
model
)
=>
model
.into
(),
};
let
vb
=
unsafe
{
VarBuilder
::
from_mmaped_safetensors
(
&
[
model_file
],
DType
::
F32
,
&
device
)
?
};
let
model
=
dinov2
::
vit_small
(
vb
)
?
;
println!
(
"model built"
);
let
logits
=
model
.forward
(
&
image
.unsqueeze
(
0
)
?
)
?
;
let
prs
=
candle_nn
::
ops
::
softmax
(
&
logits
,
D
::
Minus1
)
?
.i
(
0
)
?
.to_vec1
::
<
f32
>
()
?
;
let
mut
prs
=
prs
.iter
()
.enumerate
()
.collect
::
<
Vec
<
_
>>
();
prs
.sort_by
(|(
_
,
p1
),
(
_
,
p2
)|
p2
.total_cmp
(
p1
));
for
&
(
category_idx
,
pr
)
in
prs
.iter
()
.take
(
5
)
{
println!
(
"{:24}: {:.2}%"
,
candle_examples
::
imagenet
::
CLASSES
[
category_idx
],
100
.
*
pr
);
}
Ok
(())
}
candle-examples/examples/distilbert/README.md
0 → 100644
View file @
25d2752f
# candle-distilbert
DistilBert is a distiled version of the Bert model.
## Sentence embeddings
DistilBert is used to compute the sentence embeddings for a prompt. The model weights
are downloaded from the hub on the first run.
```
bash
cargo run
--example
distilbert
--release
--
--prompt
"Here is a test sentence"
>
[[[
0.5109, 0.1280,
-0
.2635, ..., 0.3462,
-1
.0434, 0.1441],
>
[
0.1735, 0.0818,
-0
.5549, ..., 0.3472,
-0
.8264,
-0
.0244],
>
[
0.0702,
-0
.1311,
-0
.4914, ..., 0.3483,
-0
.6194, 0.1829],
>
...
>
[
0.2993,
-0
.0106,
-0
.4640, ..., 0.2844,
-0
.6732, 0.0042],
>
[
0.1066,
-0
.0081,
-0
.4299, ..., 0.3435,
-0
.7729, 0.0190],
>
[
0.8903, 0.2055,
-0
.2541, ..., 0.3208,
-0
.6585, 0.0586]]]
>
Tensor[[1, 7, 768], f32]
```
candle-examples/examples/distilbert/main.rs
0 → 100644
View file @
25d2752f
#[cfg(feature
=
"mkl"
)]
extern
crate
intel_mkl_src
;
#[cfg(feature
=
"accelerate"
)]
extern
crate
accelerate_src
;
use
candle_transformers
::
models
::
distilbert
::{
Config
,
DistilBertModel
,
DTYPE
};
use
anyhow
::{
Error
as
E
,
Result
};
use
candle
::{
Device
,
Tensor
};
use
candle_nn
::
VarBuilder
;
use
clap
::
Parser
;
use
hf_hub
::{
api
::
sync
::
Api
,
Repo
,
RepoType
};
use
tokenizers
::
Tokenizer
;
#[derive(Parser,
Debug)]
#[command(author,
version,
about,
long_about
=
None)]
struct
Args
{
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
/// Enable tracing (generates a trace-timestamp.json file).
#[arg(long)]
tracing
:
bool
,
/// The model to use, check out available models: https://huggingface.co/models?library=sentence-transformers&sort=trending
#[arg(long)]
model_id
:
Option
<
String
>
,
#[arg(long)]
revision
:
Option
<
String
>
,
/// When set, compute embeddings for this prompt.
#[arg(long)]
prompt
:
String
,
/// Use the pytorch weights rather than the safetensors ones
#[arg(long)]
use_pth
:
bool
,
/// The number of times to run the prompt.
#[arg(long,
default_value
=
"1"
)]
n
:
usize
,
/// L2 normalization for embeddings.
#[arg(long,
default_value
=
"true"
)]
normalize_embeddings
:
bool
,
}
impl
Args
{
fn
build_model_and_tokenizer
(
&
self
)
->
Result
<
(
DistilBertModel
,
Tokenizer
)
>
{
let
device
=
candle_examples
::
device
(
self
.cpu
)
?
;
let
default_model
=
"distilbert-base-uncased"
.to_string
();
let
default_revision
=
"main"
.to_string
();
let
(
model_id
,
revision
)
=
match
(
self
.model_id
.to_owned
(),
self
.revision
.to_owned
())
{
(
Some
(
model_id
),
Some
(
revision
))
=>
(
model_id
,
revision
),
(
Some
(
model_id
),
None
)
=>
(
model_id
,
"main"
.to_string
()),
(
None
,
Some
(
revision
))
=>
(
default_model
,
revision
),
(
None
,
None
)
=>
(
default_model
,
default_revision
),
};
let
repo
=
Repo
::
with_revision
(
model_id
,
RepoType
::
Model
,
revision
);
let
(
config_filename
,
tokenizer_filename
,
weights_filename
)
=
{
let
api
=
Api
::
new
()
?
;
let
api
=
api
.repo
(
repo
);
let
config
=
api
.get
(
"config.json"
)
?
;
let
tokenizer
=
api
.get
(
"tokenizer.json"
)
?
;
let
weights
=
if
self
.use_pth
{
api
.get
(
"pytorch_model.bin"
)
?
}
else
{
api
.get
(
"model.safetensors"
)
?
};
(
config
,
tokenizer
,
weights
)
};
let
config
=
std
::
fs
::
read_to_string
(
config_filename
)
?
;
let
config
:
Config
=
serde_json
::
from_str
(
&
config
)
?
;
let
tokenizer
=
Tokenizer
::
from_file
(
tokenizer_filename
)
.map_err
(
E
::
msg
)
?
;
let
vb
=
if
self
.use_pth
{
VarBuilder
::
from_pth
(
&
weights_filename
,
DTYPE
,
&
device
)
?
}
else
{
unsafe
{
VarBuilder
::
from_mmaped_safetensors
(
&
[
weights_filename
],
DTYPE
,
&
device
)
?
}
};
let
model
=
DistilBertModel
::
load
(
vb
,
&
config
)
?
;
Ok
((
model
,
tokenizer
))
}
}
fn
get_mask
(
size
:
usize
,
device
:
&
Device
)
->
Tensor
{
let
mask
:
Vec
<
_
>
=
(
0
..
size
)
.flat_map
(|
i
|
(
0
..
size
)
.map
(
move
|
j
|
u8
::
from
(
j
>
i
)))
.collect
();
Tensor
::
from_slice
(
&
mask
,
(
size
,
size
),
device
)
.unwrap
()
}
fn
main
()
->
Result
<
()
>
{
use
tracing_chrome
::
ChromeLayerBuilder
;
use
tracing_subscriber
::
prelude
::
*
;
let
args
=
Args
::
parse
();
let
_
guard
=
if
args
.tracing
{
println!
(
"tracing..."
);
let
(
chrome_layer
,
guard
)
=
ChromeLayerBuilder
::
new
()
.build
();
tracing_subscriber
::
registry
()
.with
(
chrome_layer
)
.init
();
Some
(
guard
)
}
else
{
None
};
let
(
model
,
mut
tokenizer
)
=
args
.build_model_and_tokenizer
()
?
;
let
device
=
&
model
.device
;
let
tokenizer
=
tokenizer
.with_padding
(
None
)
.with_truncation
(
None
)
.map_err
(
E
::
msg
)
?
;
let
tokens
=
tokenizer
.encode
(
args
.prompt
,
true
)
.map_err
(
E
::
msg
)
?
.get_ids
()
.to_vec
();
let
token_ids
=
Tensor
::
new
(
&
tokens
[
..
],
device
)
?
.unsqueeze
(
0
)
?
;
let
mask
=
get_mask
(
tokens
.len
(),
device
);
println!
(
"token_ids: {:?}"
,
token_ids
.to_vec2
::
<
u32
>
());
println!
(
"mask: {:?}"
,
mask
.to_vec2
::
<
u8
>
());
let
ys
=
model
.forward
(
&
token_ids
,
&
mask
)
?
;
println!
(
"{ys}"
);
Ok
(())
}
pub
fn
normalize_l2
(
v
:
&
Tensor
)
->
Result
<
Tensor
>
{
Ok
(
v
.broadcast_div
(
&
v
.sqr
()
?
.sum_keepdim
(
1
)
?
.sqrt
()
?
)
?
)
}
candle-examples/examples/efficientnet/main.rs
0 → 100644
View file @
25d2752f
//! EfficientNet implementation.
//!
//! https://arxiv.org/abs/1905.11946
#[cfg(any(feature
=
"mkl"
,
feature
=
"mkl-dynamic"
))]
extern
crate
intel_mkl_src
;
#[cfg(feature
=
"accelerate"
)]
extern
crate
accelerate_src
;
use
candle
::{
DType
,
IndexOp
,
D
};
use
candle_nn
::{
Module
,
VarBuilder
};
use
candle_transformers
::
models
::
efficientnet
::{
EfficientNet
,
MBConvConfig
};
use
clap
::{
Parser
,
ValueEnum
};
#[derive(Clone,
Copy,
Debug,
ValueEnum)]
enum
Which
{
B0
,
B1
,
B2
,
B3
,
B4
,
B5
,
B6
,
B7
,
}
#[derive(Parser)]
struct
Args
{
#[arg(long)]
model
:
Option
<
String
>
,
#[arg(long)]
image
:
String
,
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
/// Variant of the model to use.
#[arg(value_enum,
long,
default_value_t
=
Which::B2)]
which
:
Which
,
}
pub
fn
main
()
->
anyhow
::
Result
<
()
>
{
let
args
=
Args
::
parse
();
let
device
=
candle_examples
::
device
(
args
.cpu
)
?
;
let
image
=
candle_examples
::
imagenet
::
load_image224
(
args
.image
)
?
.to_device
(
&
device
)
?
;
println!
(
"loaded image {image:?}"
);
let
model_file
=
match
args
.model
{
None
=>
{
let
api
=
hf_hub
::
api
::
sync
::
Api
::
new
()
?
;
let
api
=
api
.model
(
"lmz/candle-efficientnet"
.into
());
let
filename
=
match
args
.which
{
Which
::
B0
=>
"efficientnet-b0.safetensors"
,
Which
::
B1
=>
"efficientnet-b1.safetensors"
,
Which
::
B2
=>
"efficientnet-b2.safetensors"
,
Which
::
B3
=>
"efficientnet-b3.safetensors"
,
Which
::
B4
=>
"efficientnet-b4.safetensors"
,
Which
::
B5
=>
"efficientnet-b5.safetensors"
,
Which
::
B6
=>
"efficientnet-b6.safetensors"
,
Which
::
B7
=>
"efficientnet-b7.safetensors"
,
};
api
.get
(
filename
)
?
}
Some
(
model
)
=>
model
.into
(),
};
let
vb
=
unsafe
{
VarBuilder
::
from_mmaped_safetensors
(
&
[
model_file
],
DType
::
F32
,
&
device
)
?
};
let
cfg
=
match
args
.which
{
Which
::
B0
=>
MBConvConfig
::
b0
(),
Which
::
B1
=>
MBConvConfig
::
b1
(),
Which
::
B2
=>
MBConvConfig
::
b2
(),
Which
::
B3
=>
MBConvConfig
::
b3
(),
Which
::
B4
=>
MBConvConfig
::
b4
(),
Which
::
B5
=>
MBConvConfig
::
b5
(),
Which
::
B6
=>
MBConvConfig
::
b6
(),
Which
::
B7
=>
MBConvConfig
::
b7
(),
};
let
model
=
EfficientNet
::
new
(
vb
,
cfg
,
candle_examples
::
imagenet
::
CLASS_COUNT
as
usize
)
?
;
println!
(
"model built"
);
let
logits
=
model
.forward
(
&
image
.unsqueeze
(
0
)
?
)
?
;
let
prs
=
candle_nn
::
ops
::
softmax
(
&
logits
,
D
::
Minus1
)
?
.i
(
0
)
?
.to_vec1
::
<
f32
>
()
?
;
let
mut
prs
=
prs
.iter
()
.enumerate
()
.collect
::
<
Vec
<
_
>>
();
prs
.sort_by
(|(
_
,
p1
),
(
_
,
p2
)|
p2
.total_cmp
(
p1
));
for
&
(
category_idx
,
pr
)
in
prs
.iter
()
.take
(
5
)
{
println!
(
"{:24}: {:.2}%"
,
candle_examples
::
imagenet
::
CLASSES
[
category_idx
],
100
.
*
pr
);
}
Ok
(())
}
candle-examples/examples/efficientvit/README.md
0 → 100644
View file @
25d2752f
# candle-efficientvit
[
EfficientViT: Memory Efficient Vision Transformer with Cascaded Group Attention
](
https://arxiv.org/abs/2305.07027
)
.
This candle implementation uses a pre-trained EfficientViT (from Microsoft Research Asia) network for inference.
The classification head has been trained on the ImageNet dataset and returns the probabilities for the top-5 classes.
## Running an example
```
$
cargo
run
--
example
efficientvit
--
release
--
--
image
candle
-
examples
/
examples
/
yolo
-
v8
/
assets
/
bike
.
jpg
--
which
m1
loaded
image
Tensor
[
dims
3
,
224
,
224
;
f32
]
model
built
mountain
bike
,
all
-
terrain
bike
,
off
-
roader
:
69.80
%
unicycle
,
monocycle
:
13.03
%
bicycle
-
built
-
for
-
two
,
tandem
bicycle
,
tandem
:
9.28
%
crash
helmet
:
2.25
%
alp
:
0.46
%
```
candle-examples/examples/efficientvit/main.rs
0 → 100644
View file @
25d2752f
#[cfg(feature
=
"mkl"
)]
extern
crate
intel_mkl_src
;
#[cfg(feature
=
"accelerate"
)]
extern
crate
accelerate_src
;
use
clap
::{
Parser
,
ValueEnum
};
use
candle
::{
DType
,
IndexOp
,
D
};
use
candle_nn
::{
Module
,
VarBuilder
};
use
candle_transformers
::
models
::
efficientvit
;
#[derive(Clone,
Copy,
Debug,
ValueEnum)]
enum
Which
{
M0
,
M1
,
M2
,
M3
,
M4
,
M5
,
}
impl
Which
{
fn
model_filename
(
&
self
)
->
String
{
let
name
=
match
self
{
Self
::
M0
=>
"m0"
,
Self
::
M1
=>
"m1"
,
Self
::
M2
=>
"m2"
,
Self
::
M3
=>
"m3"
,
Self
::
M4
=>
"m4"
,
Self
::
M5
=>
"m5"
,
};
format!
(
"timm/efficientvit_{}.r224_in1k"
,
name
)
}
fn
config
(
&
self
)
->
efficientvit
::
Config
{
match
self
{
Self
::
M0
=>
efficientvit
::
Config
::
m0
(),
Self
::
M1
=>
efficientvit
::
Config
::
m1
(),
Self
::
M2
=>
efficientvit
::
Config
::
m2
(),
Self
::
M3
=>
efficientvit
::
Config
::
m3
(),
Self
::
M4
=>
efficientvit
::
Config
::
m4
(),
Self
::
M5
=>
efficientvit
::
Config
::
m5
(),
}
}
}
#[derive(Parser)]
struct
Args
{
#[arg(long)]
model
:
Option
<
String
>
,
#[arg(long)]
image
:
String
,
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
#[arg(value_enum,
long,
default_value_t=Which::M0)]
which
:
Which
,
}
pub
fn
main
()
->
anyhow
::
Result
<
()
>
{
let
args
=
Args
::
parse
();
let
device
=
candle_examples
::
device
(
args
.cpu
)
?
;
let
image
=
candle_examples
::
imagenet
::
load_image224
(
args
.image
)
?
.to_device
(
&
device
)
?
;
println!
(
"loaded image {image:?}"
);
let
model_file
=
match
args
.model
{
None
=>
{
let
model_name
=
args
.which
.model_filename
();
let
api
=
hf_hub
::
api
::
sync
::
Api
::
new
()
?
;
let
api
=
api
.model
(
model_name
);
api
.get
(
"model.safetensors"
)
?
}
Some
(
model
)
=>
model
.into
(),
};
let
vb
=
unsafe
{
VarBuilder
::
from_mmaped_safetensors
(
&
[
model_file
],
DType
::
F32
,
&
device
)
?
};
let
model
=
efficientvit
::
efficientvit
(
&
args
.which
.config
(),
1000
,
vb
)
?
;
println!
(
"model built"
);
let
logits
=
model
.forward
(
&
image
.unsqueeze
(
0
)
?
)
?
;
let
prs
=
candle_nn
::
ops
::
softmax
(
&
logits
,
D
::
Minus1
)
?
.i
(
0
)
?
.to_vec1
::
<
f32
>
()
?
;
let
mut
prs
=
prs
.iter
()
.enumerate
()
.collect
::
<
Vec
<
_
>>
();
prs
.sort_by
(|(
_
,
p1
),
(
_
,
p2
)|
p2
.total_cmp
(
p1
));
for
&
(
category_idx
,
pr
)
in
prs
.iter
()
.take
(
5
)
{
println!
(
"{:24}: {:.2}%"
,
candle_examples
::
imagenet
::
CLASSES
[
category_idx
],
100
.
*
pr
);
}
Ok
(())
}
candle-examples/examples/encodec/README.md
0 → 100644
View file @
25d2752f
# candle-endocec
[
EnCodec
](
https://huggingface.co/facebook/encodec_24khz
)
is a high-quality audio
compression model using an encoder/decoder architecture with residual vector
quantization.
## Running one example
```
bash
cargo run
--example
encodec
--features
symphonia
--release
--
code-to-audio
\
candle-examples/examples/encodec/jfk-codes.safetensors
\
jfk.wav
```
This decodes the EnCodec tokens stored in
`jfk-codes.safetensors`
and generates
an output wav file containing the audio data.
Instead of
`code-to-audio`
one can use:
-
`audio-to-audio in.mp3 out.wav`
: encodes the input audio file then decodes it to a wav file.
-
`audio-to-code in.mp3 out.safetensors`
: generates a safetensors file
containing EnCodec tokens for the input audio file.
If the audio output file name is set to
`-`
, the audio content directly gets
played on default audio output device. If the audio input file is set to
`-`
, the audio
gets recorded from the default audio input.
candle-examples/examples/encodec/audio_io.rs
0 → 100644
View file @
25d2752f
#![allow(unused)]
use
anyhow
::{
Context
,
Result
};
use
std
::
sync
::{
Arc
,
Mutex
};
pub
const
SAMPLE_RATE
:
usize
=
24_000
;
pub
(
crate
)
struct
AudioOutputData_
{
resampled_data
:
std
::
collections
::
VecDeque
<
f32
>
,
resampler
:
rubato
::
FastFixedIn
<
f32
>
,
output_buffer
:
Vec
<
f32
>
,
input_buffer
:
Vec
<
f32
>
,
input_len
:
usize
,
}
impl
AudioOutputData_
{
pub
(
crate
)
fn
new
(
input_sample_rate
:
usize
,
output_sample_rate
:
usize
)
->
Result
<
Self
>
{
use
rubato
::
Resampler
;
let
resampled_data
=
std
::
collections
::
VecDeque
::
with_capacity
(
output_sample_rate
*
10
);
let
resample_ratio
=
output_sample_rate
as
f64
/
input_sample_rate
as
f64
;
let
resampler
=
rubato
::
FastFixedIn
::
new
(
resample_ratio
,
f64
::
max
(
resample_ratio
,
1.0
),
rubato
::
PolynomialDegree
::
Septic
,
1024
,
1
,
)
?
;
let
input_buffer
=
resampler
.input_buffer_allocate
(
true
)
.remove
(
0
);
let
output_buffer
=
resampler
.output_buffer_allocate
(
true
)
.remove
(
0
);
Ok
(
Self
{
resampled_data
,
resampler
,
input_buffer
,
output_buffer
,
input_len
:
0
,
})
}
pub
fn
reset
(
&
mut
self
)
{
use
rubato
::
Resampler
;
self
.output_buffer
.fill
(
0
.
);
self
.input_buffer
.fill
(
0
.
);
self
.resampler
.reset
();
self
.resampled_data
.clear
();
}
pub
(
crate
)
fn
take_all
(
&
mut
self
)
->
Vec
<
f32
>
{
let
mut
data
=
Vec
::
with_capacity
(
self
.resampled_data
.len
());
while
let
Some
(
elem
)
=
self
.resampled_data
.pop_back
()
{
data
.push
(
elem
);
}
data
}
pub
(
crate
)
fn
is_empty
(
&
self
)
->
bool
{
self
.resampled_data
.is_empty
()
}
// Assumes that the input buffer is large enough.
fn
push_input_buffer
(
&
mut
self
,
samples
:
&
[
f32
])
{
self
.input_buffer
[
self
.input_len
..
self
.input_len
+
samples
.len
()]
.copy_from_slice
(
samples
);
self
.input_len
+=
samples
.len
()
}
pub
(
crate
)
fn
push_samples
(
&
mut
self
,
samples
:
&
[
f32
])
->
Result
<
()
>
{
use
rubato
::
Resampler
;
let
mut
pos_in
=
0
;
loop
{
let
rem
=
self
.input_buffer
.len
()
-
self
.input_len
;
let
pos_end
=
usize
::
min
(
pos_in
+
rem
,
samples
.len
());
self
.push_input_buffer
(
&
samples
[
pos_in
..
pos_end
]);
pos_in
=
pos_end
;
if
self
.input_len
<
self
.input_buffer
.len
()
{
break
;
}
let
(
_
,
out_len
)
=
self
.resampler
.process_into_buffer
(
&
[
&
self
.input_buffer
],
&
mut
[
&
mut
self
.output_buffer
],
None
,
)
?
;
for
&
elem
in
self
.output_buffer
[
..
out_len
]
.iter
()
{
self
.resampled_data
.push_front
(
elem
)
}
self
.input_len
=
0
;
}
Ok
(())
}
}
type
AudioOutputData
=
Arc
<
Mutex
<
AudioOutputData_
>>
;
pub
(
crate
)
fn
setup_output_stream
()
->
Result
<
(
cpal
::
Stream
,
AudioOutputData
)
>
{
use
cpal
::
traits
::{
DeviceTrait
,
HostTrait
,
StreamTrait
};
println!
(
"Setup audio output stream!"
);
let
host
=
cpal
::
default_host
();
let
device
=
host
.default_output_device
()
.context
(
"no output device available"
)
?
;
let
mut
supported_configs_range
=
device
.supported_output_configs
()
?
;
let
config_range
=
match
supported_configs_range
.find
(|
c
|
c
.channels
()
==
1
)
{
// On macOS, it's commonly the case that there are only stereo outputs.
None
=>
device
.supported_output_configs
()
?
.next
()
.context
(
"no audio output available"
)
?
,
Some
(
config_range
)
=>
config_range
,
};
let
sample_rate
=
cpal
::
SampleRate
(
SAMPLE_RATE
as
u32
)
.clamp
(
config_range
.min_sample_rate
(),
config_range
.max_sample_rate
(),
);
let
config
:
cpal
::
StreamConfig
=
config_range
.with_sample_rate
(
sample_rate
)
.into
();
let
channels
=
config
.channels
as
usize
;
println!
(
"cpal device: {} {} {config:?}"
,
device
.name
()
.unwrap_or_else
(|
_
|
"unk"
.to_string
()),
config
.sample_rate
.0
);
let
audio_data
=
Arc
::
new
(
Mutex
::
new
(
AudioOutputData_
::
new
(
SAMPLE_RATE
,
config
.sample_rate
.0
as
usize
,
)
?
));
let
ad
=
audio_data
.clone
();
let
stream
=
device
.build_output_stream
(
&
config
,
move
|
data
:
&
mut
[
f32
],
_
:
&
cpal
::
OutputCallbackInfo
|
{
data
.fill
(
0
.
);
let
mut
ad
=
ad
.lock
()
.unwrap
();
let
mut
last_elem
=
0f32
;
for
(
idx
,
elem
)
in
data
.iter_mut
()
.enumerate
()
{
if
idx
%
channels
==
0
{
match
ad
.resampled_data
.pop_back
()
{
None
=>
break
,
Some
(
v
)
=>
{
last_elem
=
v
;
*
elem
=
v
}
}
}
else
{
*
elem
=
last_elem
}
}
},
move
|
err
|
eprintln!
(
"cpal error: {err}"
),
None
,
// None=blocking, Some(Duration)=timeout
)
?
;
stream
.play
()
?
;
Ok
((
stream
,
audio_data
))
}
pub
(
crate
)
fn
setup_input_stream
()
->
Result
<
(
cpal
::
Stream
,
AudioOutputData
)
>
{
use
cpal
::
traits
::{
DeviceTrait
,
HostTrait
,
StreamTrait
};
println!
(
"Setup audio input stream!"
);
let
host
=
cpal
::
default_host
();
let
device
=
host
.default_input_device
()
.context
(
"no input device available"
)
?
;
let
mut
supported_configs_range
=
device
.supported_input_configs
()
?
;
let
config_range
=
supported_configs_range
.find
(|
c
|
c
.channels
()
==
1
)
.context
(
"no audio input available"
)
?
;
let
sample_rate
=
cpal
::
SampleRate
(
SAMPLE_RATE
as
u32
)
.clamp
(
config_range
.min_sample_rate
(),
config_range
.max_sample_rate
(),
);
let
config
:
cpal
::
StreamConfig
=
config_range
.with_sample_rate
(
sample_rate
)
.into
();
println!
(
"cpal device: {} {} {config:?}"
,
device
.name
()
.unwrap_or_else
(|
_
|
"unk"
.to_string
()),
config
.sample_rate
.0
);
let
audio_data
=
Arc
::
new
(
Mutex
::
new
(
AudioOutputData_
::
new
(
config
.sample_rate
.0
as
usize
,
SAMPLE_RATE
,
)
?
));
let
ad
=
audio_data
.clone
();
let
stream
=
device
.build_input_stream
(
&
config
,
move
|
data
:
&
[
f32
],
_
:
&
cpal
::
InputCallbackInfo
|
{
let
mut
ad
=
ad
.lock
()
.unwrap
();
if
let
Err
(
err
)
=
ad
.push_samples
(
data
)
{
eprintln!
(
"error processing audio input {err:?}"
)
}
},
move
|
err
|
eprintln!
(
"cpal error: {err}"
),
None
,
// None=blocking, Some(Duration)=timeout
)
?
;
stream
.play
()
?
;
Ok
((
stream
,
audio_data
))
}
fn
conv
<
T
>
(
samples
:
&
mut
Vec
<
f32
>
,
data
:
std
::
borrow
::
Cow
<
symphonia
::
core
::
audio
::
AudioBuffer
<
T
>>
)
where
T
:
symphonia
::
core
::
sample
::
Sample
,
f32
:
symphonia
::
core
::
conv
::
FromSample
<
T
>
,
{
use
symphonia
::
core
::
audio
::
Signal
;
use
symphonia
::
core
::
conv
::
FromSample
;
samples
.extend
(
data
.chan
(
0
)
.iter
()
.map
(|
v
|
f32
::
from_sample
(
*
v
)))
}
pub
(
crate
)
fn
pcm_decode
<
P
:
AsRef
<
std
::
path
::
Path
>>
(
path
:
P
)
->
Result
<
(
Vec
<
f32
>
,
u32
)
>
{
use
symphonia
::
core
::
audio
::{
AudioBufferRef
,
Signal
};
let
src
=
std
::
fs
::
File
::
open
(
path
)
?
;
let
mss
=
symphonia
::
core
::
io
::
MediaSourceStream
::
new
(
Box
::
new
(
src
),
Default
::
default
());
let
hint
=
symphonia
::
core
::
probe
::
Hint
::
new
();
let
meta_opts
:
symphonia
::
core
::
meta
::
MetadataOptions
=
Default
::
default
();
let
fmt_opts
:
symphonia
::
core
::
formats
::
FormatOptions
=
Default
::
default
();
let
probed
=
symphonia
::
default
::
get_probe
()
.format
(
&
hint
,
mss
,
&
fmt_opts
,
&
meta_opts
)
?
;
let
mut
format
=
probed
.format
;
let
track
=
format
.tracks
()
.iter
()
.find
(|
t
|
t
.codec_params.codec
!=
symphonia
::
core
::
codecs
::
CODEC_TYPE_NULL
)
.expect
(
"no supported audio tracks"
);
let
mut
decoder
=
symphonia
::
default
::
get_codecs
()
.make
(
&
track
.codec_params
,
&
Default
::
default
())
.expect
(
"unsupported codec"
);
let
track_id
=
track
.id
;
let
sample_rate
=
track
.codec_params.sample_rate
.unwrap_or
(
0
);
let
mut
pcm_data
=
Vec
::
new
();
while
let
Ok
(
packet
)
=
format
.next_packet
()
{
while
!
format
.metadata
()
.is_latest
()
{
format
.metadata
()
.pop
();
}
if
packet
.track_id
()
!=
track_id
{
continue
;
}
match
decoder
.decode
(
&
packet
)
?
{
AudioBufferRef
::
F32
(
buf
)
=>
pcm_data
.extend
(
buf
.chan
(
0
)),
AudioBufferRef
::
U8
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
U16
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
U24
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
U32
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
S8
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
S16
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
S24
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
S32
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
AudioBufferRef
::
F64
(
data
)
=>
conv
(
&
mut
pcm_data
,
data
),
}
}
Ok
((
pcm_data
,
sample_rate
))
}
pub
(
crate
)
fn
resample
(
pcm_in
:
&
[
f32
],
sr_in
:
usize
,
sr_out
:
usize
)
->
Result
<
Vec
<
f32
>>
{
use
rubato
::
Resampler
;
let
mut
pcm_out
=
Vec
::
with_capacity
((
pcm_in
.len
()
as
f64
*
sr_out
as
f64
/
sr_in
as
f64
)
as
usize
+
1024
);
let
mut
resampler
=
rubato
::
FftFixedInOut
::
<
f32
>
::
new
(
sr_in
,
sr_out
,
1024
,
1
)
?
;
let
mut
output_buffer
=
resampler
.output_buffer_allocate
(
true
);
let
mut
pos_in
=
0
;
while
pos_in
+
resampler
.input_frames_next
()
<
pcm_in
.len
()
{
let
(
in_len
,
out_len
)
=
resampler
.process_into_buffer
(
&
[
&
pcm_in
[
pos_in
..
]],
&
mut
output_buffer
,
None
)
?
;
pos_in
+=
in_len
;
pcm_out
.extend_from_slice
(
&
output_buffer
[
0
][
..
out_len
]);
}
if
pos_in
<
pcm_in
.len
()
{
let
(
_
in_len
,
out_len
)
=
resampler
.process_partial_into_buffer
(
Some
(
&
[
&
pcm_in
[
pos_in
..
]]),
&
mut
output_buffer
,
None
,
)
?
;
pcm_out
.extend_from_slice
(
&
output_buffer
[
0
][
..
out_len
]);
}
Ok
(
pcm_out
)
}
candle-examples/examples/encodec/jfk-codes.safetensors
0 → 100644
View file @
25d2752f
File added
candle-examples/examples/encodec/main.rs
0 → 100644
View file @
25d2752f
#[cfg(feature
=
"mkl"
)]
extern
crate
intel_mkl_src
;
#[cfg(feature
=
"accelerate"
)]
extern
crate
accelerate_src
;
use
anyhow
::
Result
;
use
candle
::{
DType
,
IndexOp
,
Tensor
};
use
candle_nn
::
VarBuilder
;
use
candle_transformers
::
models
::
encodec
::{
Config
,
Model
};
use
clap
::{
Parser
,
ValueEnum
};
use
hf_hub
::
api
::
sync
::
Api
;
mod
audio_io
;
#[derive(Clone,
Debug,
Copy,
PartialEq,
Eq,
ValueEnum)]
enum
Action
{
AudioToAudio
,
AudioToCode
,
CodeToAudio
,
}
#[derive(Parser,
Debug)]
#[command(author,
version,
about,
long_about
=
None)]
struct
Args
{
/// The action to be performed, specifies the format for the input and output data.
action
:
Action
,
/// The input file, either an audio file or some encodec tokens stored as safetensors.
in_file
:
String
,
/// The output file, either a wave audio file or some encodec tokens stored as safetensors.
out_file
:
String
,
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
/// The model weight file, in safetensor format.
#[arg(long)]
model
:
Option
<
String
>
,
}
fn
main
()
->
Result
<
()
>
{
let
args
=
Args
::
parse
();
let
device
=
candle_examples
::
device
(
args
.cpu
)
?
;
let
model
=
match
args
.model
{
Some
(
model
)
=>
std
::
path
::
PathBuf
::
from
(
model
),
None
=>
Api
::
new
()
?
.model
(
"facebook/encodec_24khz"
.to_string
())
.get
(
"model.safetensors"
)
?
,
};
let
vb
=
unsafe
{
VarBuilder
::
from_mmaped_safetensors
(
&
[
model
],
DType
::
F32
,
&
device
)
?
};
let
config
=
Config
::
default
();
let
model
=
Model
::
new
(
&
config
,
vb
)
?
;
let
codes
=
match
args
.action
{
Action
::
CodeToAudio
=>
{
let
codes
=
candle
::
safetensors
::
load
(
args
.in_file
,
&
device
)
?
;
codes
.get
(
"codes"
)
.expect
(
"no codes in input file"
)
.clone
()
}
Action
::
AudioToCode
|
Action
::
AudioToAudio
=>
{
let
pcm
=
if
args
.in_file
==
"-"
{
println!
(
">>>> RECORDING AUDIO, PRESS ENTER ONCE DONE <<<<"
);
let
(
stream
,
input_audio
)
=
audio_io
::
setup_input_stream
()
?
;
let
mut
pcms
=
vec!
[];
let
stdin
=
std
::
thread
::
spawn
(||
{
let
mut
s
=
String
::
new
();
std
::
io
::
stdin
()
.read_line
(
&
mut
s
)
});
while
!
stdin
.is_finished
()
{
let
input
=
input_audio
.lock
()
.unwrap
()
.take_all
();
if
input
.is_empty
()
{
std
::
thread
::
sleep
(
std
::
time
::
Duration
::
from_millis
(
100
));
continue
;
}
pcms
.push
(
input
)
}
drop
(
stream
);
pcms
.concat
()
}
else
{
let
(
pcm
,
sample_rate
)
=
audio_io
::
pcm_decode
(
args
.in_file
)
?
;
if
sample_rate
!=
24_000
{
println!
(
"WARNING: encodec uses a 24khz sample rate, input uses {sample_rate}, resampling..."
);
audio_io
::
resample
(
&
pcm
,
sample_rate
as
usize
,
24_000
)
?
}
else
{
pcm
}
};
let
pcm_len
=
pcm
.len
();
let
pcm
=
Tensor
::
from_vec
(
pcm
,
(
1
,
1
,
pcm_len
),
&
device
)
?
;
println!
(
"input pcm shape: {:?}"
,
pcm
.shape
());
model
.encode
(
&
pcm
)
?
}
};
println!
(
"codes shape: {:?}"
,
codes
.shape
());
match
args
.action
{
Action
::
AudioToCode
=>
{
codes
.save_safetensors
(
"codes"
,
&
args
.out_file
)
?
;
}
Action
::
AudioToAudio
|
Action
::
CodeToAudio
=>
{
let
pcm
=
model
.decode
(
&
codes
)
?
;
println!
(
"output pcm shape: {:?}"
,
pcm
.shape
());
let
pcm
=
pcm
.i
(
0
)
?
.i
(
0
)
?
;
let
pcm
=
candle_examples
::
audio
::
normalize_loudness
(
&
pcm
,
24_000
,
true
)
?
;
let
pcm
=
pcm
.to_vec1
::
<
f32
>
()
?
;
if
args
.out_file
==
"-"
{
let
(
stream
,
ad
)
=
audio_io
::
setup_output_stream
()
?
;
{
let
mut
ad
=
ad
.lock
()
.unwrap
();
ad
.push_samples
(
&
pcm
)
?
;
}
loop
{
let
ad
=
ad
.lock
()
.unwrap
();
if
ad
.is_empty
()
{
break
;
}
// That's very weird, calling thread::sleep here triggers the stream to stop
// playing (the callback doesn't seem to be called anymore).
// std::thread::sleep(std::time::Duration::from_millis(100));
}
drop
(
stream
)
}
else
{
let
mut
output
=
std
::
fs
::
File
::
create
(
&
args
.out_file
)
?
;
candle_examples
::
wav
::
write_pcm_as_wav
(
&
mut
output
,
&
pcm
,
24_000
)
?
;
}
}
}
Ok
(())
}
candle-examples/examples/falcon/README.md
0 → 100644
View file @
25d2752f
# candle-falcon
Falcon is a general large language model.
candle-examples/examples/falcon/main.rs
0 → 100644
View file @
25d2752f
// TODO: Add an offline mode.
#[cfg(feature
=
"accelerate"
)]
extern
crate
accelerate_src
;
#[cfg(any(feature
=
"mkl"
,
feature
=
"mkl-dynamic"
))]
extern
crate
intel_mkl_src
;
use
anyhow
::{
Error
as
E
,
Result
};
use
candle
::{
DType
,
Device
,
Tensor
};
use
candle_nn
::
VarBuilder
;
use
candle_transformers
::
generation
::
LogitsProcessor
;
use
clap
::
Parser
;
use
hf_hub
::{
api
::
sync
::
Api
,
Repo
,
RepoType
};
use
tokenizers
::
Tokenizer
;
use
candle_transformers
::
models
::
falcon
::{
Config
,
Falcon
};
struct
TextGeneration
{
model
:
Falcon
,
device
:
Device
,
tokenizer
:
Tokenizer
,
logits_processor
:
LogitsProcessor
,
repeat_penalty
:
f32
,
repeat_last_n
:
usize
,
}
struct
GenerationOptions
{
temp
:
Option
<
f64
>
,
top_p
:
Option
<
f64
>
,
repeat_penalty
:
f32
,
repeat_last_n
:
usize
,
}
impl
TextGeneration
{
fn
new
(
model
:
Falcon
,
tokenizer
:
Tokenizer
,
generation_options
:
GenerationOptions
,
seed
:
u64
,
device
:
&
Device
,
)
->
Self
{
let
logits_processor
=
LogitsProcessor
::
new
(
seed
,
generation_options
.temp
,
generation_options
.top_p
);
let
repeat_penalty
=
generation_options
.repeat_penalty
;
let
repeat_last_n
=
generation_options
.repeat_last_n
;
Self
{
model
,
tokenizer
,
logits_processor
,
device
:
device
.clone
(),
repeat_penalty
,
repeat_last_n
,
}
}
fn
run
(
&
mut
self
,
prompt
:
&
str
,
sample_len
:
usize
)
->
Result
<
()
>
{
println!
(
"starting the inference loop"
);
let
mut
tokens
=
self
.tokenizer
.encode
(
prompt
,
true
)
.map_err
(
E
::
msg
)
?
.get_ids
()
.to_vec
();
let
mut
new_tokens
=
vec!
[];
let
start_gen
=
std
::
time
::
Instant
::
now
();
for
index
in
0
..
sample_len
{
let
start_gen
=
std
::
time
::
Instant
::
now
();
let
context_size
=
if
self
.model
.config
()
.use_cache
&&
index
>
0
{
1
}
else
{
tokens
.len
()
};
let
ctxt
=
&
tokens
[
tokens
.len
()
.saturating_sub
(
context_size
)
..
];
let
input
=
Tensor
::
new
(
ctxt
,
&
self
.device
)
?
.unsqueeze
(
0
)
?
;
let
logits
=
self
.model
.forward
(
&
input
)
?
;
let
logits
=
logits
.squeeze
(
0
)
?
.to_dtype
(
DType
::
F32
)
?
;
let
logits
=
if
self
.repeat_penalty
==
1
.
{
logits
}
else
{
let
start_at
=
tokens
.len
()
.saturating_sub
(
self
.repeat_last_n
);
candle_transformers
::
utils
::
apply_repeat_penalty
(
&
logits
,
self
.repeat_penalty
,
&
tokens
[
start_at
..
],
)
?
};
let
next_token
=
self
.logits_processor
.sample
(
&
logits
)
?
;
tokens
.push
(
next_token
);
new_tokens
.push
(
next_token
);
println!
(
"> {:?}"
,
start_gen
.elapsed
());
println!
(
"{} token: {} '{}'"
,
index
+
1
,
next_token
,
self
.tokenizer
.decode
(
&
[
next_token
],
true
)
.map_err
(
E
::
msg
)
?
);
}
let
dt
=
start_gen
.elapsed
();
println!
(
"{sample_len} tokens generated ({} token/s)
\n
----
\n
{}
\n
----"
,
sample_len
as
f64
/
dt
.as_secs_f64
(),
self
.tokenizer
.decode
(
&
new_tokens
,
true
)
.map_err
(
E
::
msg
)
?
);
Ok
(())
}
}
#[derive(Parser,
Debug)]
#[command(author,
version,
about,
long_about
=
None)]
struct
Args
{
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
#[arg(long)]
prompt
:
String
,
/// Use f32 computations rather than bf16.
#[arg(long)]
use_f32
:
bool
,
/// The temperature used to generate samples.
#[arg(long)]
temperature
:
Option
<
f64
>
,
/// Nucleus sampling probability cutoff.
#[arg(long)]
top_p
:
Option
<
f64
>
,
/// The seed to use when generating random samples.
#[arg(long,
default_value_t
=
299792458
)]
seed
:
u64
,
/// The length of the sample to generate (in tokens).
#[arg(long,
default_value_t
=
100
)]
sample_len
:
usize
,
#[arg(long,
default_value
=
"tiiuae/falcon-7b"
)]
model_id
:
String
,
#[arg(long,
default_value
=
"refs/pr/43"
)]
revision
:
String
,
/// Penalty to be applied for repeating tokens, 1. means no penalty.
#[arg(long,
default_value_t
=
1.0
)]
repeat_penalty
:
f32
,
/// The context size to consider for the repeat penalty.
#[arg(long,
default_value_t
=
64
)]
repeat_last_n
:
usize
,
}
fn
main
()
->
Result
<
()
>
{
let
args
=
Args
::
parse
();
let
device
=
candle_examples
::
device
(
args
.cpu
)
?
;
let
start
=
std
::
time
::
Instant
::
now
();
let
api
=
Api
::
new
()
?
;
let
repo
=
api
.repo
(
Repo
::
with_revision
(
args
.model_id
,
RepoType
::
Model
,
args
.revision
,
));
let
tokenizer_filename
=
repo
.get
(
"tokenizer.json"
)
?
;
let
filenames
=
candle_examples
::
hub_load_safetensors
(
&
repo
,
"model.safetensors.index.json"
)
?
;
println!
(
"retrieved the files in {:?}"
,
start
.elapsed
());
let
tokenizer
=
Tokenizer
::
from_file
(
tokenizer_filename
)
.map_err
(
E
::
msg
)
?
;
let
start
=
std
::
time
::
Instant
::
now
();
let
dtype
=
if
args
.use_f32
{
DType
::
F32
}
else
{
DType
::
BF16
};
let
vb
=
unsafe
{
VarBuilder
::
from_mmaped_safetensors
(
&
filenames
,
dtype
,
&
device
)
?
};
let
config
=
Config
::
falcon7b
();
config
.validate
()
?
;
let
model
=
Falcon
::
load
(
vb
,
config
)
?
;
println!
(
"loaded the model in {:?}"
,
start
.elapsed
());
let
generation_options
=
GenerationOptions
{
temp
:
args
.temperature
,
top_p
:
args
.top_p
,
repeat_penalty
:
args
.repeat_penalty
,
repeat_last_n
:
args
.repeat_last_n
,
};
let
mut
pipeline
=
TextGeneration
::
new
(
model
,
tokenizer
,
generation_options
,
args
.seed
,
&
device
);
pipeline
.run
(
&
args
.prompt
,
args
.sample_len
)
?
;
Ok
(())
}
candle-examples/examples/gemma/README.md
0 → 100644
View file @
25d2752f
# candle-gemma: 2b and 7b LLMs from Google DeepMind
[
Gemma
](
https://ai.google.dev/gemma/docs
)
is a collection of lightweight open
models published by Google Deepmind with a 2b and a 7b variant.
In order to use the example below, you have to accept the license on the
[
HuggingFace Hub Gemma repo
](
https://huggingface.co/google/gemma-7b
)
and set up
your access token via the
[
HuggingFace cli login
command
](
https://huggingface.co/docs/huggingface_hub/guides/cli#huggingface-cli-login
)
.
## Running the example
```
bash
$
cargo run
--example
gemma
--release
--
--prompt
"fn count_primes(max_n: usize)"
fn count_primes
(
max_n: usize
)
-> usize
{
let
mut primes
=
vec![true
;
max_n]
;
for
i
in
2..
=
max_n
{
if
primes[i]
{
for
j
in
i
*
i..max_n
{
primes[j]
=
false
;
}
}
}
primes.len
()
}
```
candle-examples/examples/gemma/main.rs
0 → 100644
View file @
25d2752f
#[cfg(feature
=
"mkl"
)]
extern
crate
intel_mkl_src
;
#[cfg(feature
=
"accelerate"
)]
extern
crate
accelerate_src
;
use
anyhow
::{
Error
as
E
,
Result
};
use
clap
::
Parser
;
use
candle_transformers
::
models
::
gemma
::{
Config
,
Model
};
use
candle
::{
DType
,
Device
,
Tensor
};
use
candle_examples
::
token_output_stream
::
TokenOutputStream
;
use
candle_nn
::
VarBuilder
;
use
candle_transformers
::
generation
::
LogitsProcessor
;
use
hf_hub
::{
api
::
sync
::
Api
,
Repo
,
RepoType
};
use
tokenizers
::
Tokenizer
;
#[derive(Clone,
Debug,
Copy,
PartialEq,
Eq,
clap::ValueEnum)]
enum
Which
{
#[value(name
=
"2b"
)]
Base2B
,
#[value(name
=
"7b"
)]
Base7B
,
#[value(name
=
"2b-it"
)]
Instruct2B
,
#[value(name
=
"7b-it"
)]
Instruct7B
,
#[value(name
=
"1.1-2b-it"
)]
InstructV1_1_2B
,
#[value(name
=
"1.1-7b-it"
)]
InstructV1_1_7B
,
}
struct
TextGeneration
{
model
:
Model
,
device
:
Device
,
tokenizer
:
TokenOutputStream
,
logits_processor
:
LogitsProcessor
,
repeat_penalty
:
f32
,
repeat_last_n
:
usize
,
}
impl
TextGeneration
{
#[allow(clippy::too_many_arguments)]
fn
new
(
model
:
Model
,
tokenizer
:
Tokenizer
,
seed
:
u64
,
temp
:
Option
<
f64
>
,
top_p
:
Option
<
f64
>
,
repeat_penalty
:
f32
,
repeat_last_n
:
usize
,
device
:
&
Device
,
)
->
Self
{
let
logits_processor
=
LogitsProcessor
::
new
(
seed
,
temp
,
top_p
);
Self
{
model
,
tokenizer
:
TokenOutputStream
::
new
(
tokenizer
),
logits_processor
,
repeat_penalty
,
repeat_last_n
,
device
:
device
.clone
(),
}
}
fn
run
(
&
mut
self
,
prompt
:
&
str
,
sample_len
:
usize
)
->
Result
<
()
>
{
use
std
::
io
::
Write
;
self
.tokenizer
.clear
();
let
mut
tokens
=
self
.tokenizer
.tokenizer
()
.encode
(
prompt
,
true
)
.map_err
(
E
::
msg
)
?
.get_ids
()
.to_vec
();
for
&
t
in
tokens
.iter
()
{
if
let
Some
(
t
)
=
self
.tokenizer
.next_token
(
t
)
?
{
print!
(
"{t}"
)
}
}
std
::
io
::
stdout
()
.flush
()
?
;
let
mut
generated_tokens
=
0u
size
;
let
eos_token
=
match
self
.tokenizer
.get_token
(
"<eos>"
)
{
Some
(
token
)
=>
token
,
None
=>
anyhow
::
bail!
(
"cannot find the <eos> token"
),
};
let
start_gen
=
std
::
time
::
Instant
::
now
();
for
index
in
0
..
sample_len
{
let
context_size
=
if
index
>
0
{
1
}
else
{
tokens
.len
()
};
let
start_pos
=
tokens
.len
()
.saturating_sub
(
context_size
);
let
ctxt
=
&
tokens
[
start_pos
..
];
let
input
=
Tensor
::
new
(
ctxt
,
&
self
.device
)
?
.unsqueeze
(
0
)
?
;
let
logits
=
self
.model
.forward
(
&
input
,
start_pos
)
?
;
let
logits
=
logits
.squeeze
(
0
)
?
.squeeze
(
0
)
?
.to_dtype
(
DType
::
F32
)
?
;
let
logits
=
if
self
.repeat_penalty
==
1
.
{
logits
}
else
{
let
start_at
=
tokens
.len
()
.saturating_sub
(
self
.repeat_last_n
);
candle_transformers
::
utils
::
apply_repeat_penalty
(
&
logits
,
self
.repeat_penalty
,
&
tokens
[
start_at
..
],
)
?
};
let
next_token
=
self
.logits_processor
.sample
(
&
logits
)
?
;
tokens
.push
(
next_token
);
generated_tokens
+=
1
;
if
next_token
==
eos_token
{
break
;
}
if
let
Some
(
t
)
=
self
.tokenizer
.next_token
(
next_token
)
?
{
print!
(
"{t}"
);
std
::
io
::
stdout
()
.flush
()
?
;
}
}
let
dt
=
start_gen
.elapsed
();
if
let
Some
(
rest
)
=
self
.tokenizer
.decode_rest
()
.map_err
(
E
::
msg
)
?
{
print!
(
"{rest}"
);
}
std
::
io
::
stdout
()
.flush
()
?
;
println!
(
"
\n
{generated_tokens} tokens generated ({:.2} token/s)"
,
generated_tokens
as
f64
/
dt
.as_secs_f64
(),
);
Ok
(())
}
}
#[derive(Parser,
Debug)]
#[command(author,
version,
about,
long_about
=
None)]
struct
Args
{
/// Run on CPU rather than on GPU.
#[arg(long)]
cpu
:
bool
,
/// Enable tracing (generates a trace-timestamp.json file).
#[arg(long)]
tracing
:
bool
,
#[arg(long)]
prompt
:
String
,
/// The temperature used to generate samples.
#[arg(long)]
temperature
:
Option
<
f64
>
,
/// Nucleus sampling probability cutoff.
#[arg(long)]
top_p
:
Option
<
f64
>
,
/// The seed to use when generating random samples.
#[arg(long,
default_value_t
=
299792458
)]
seed
:
u64
,
/// The length of the sample to generate (in tokens).
#[arg(long,
short
=
'n'
,
default_value_t
=
10000
)]
sample_len
:
usize
,
#[arg(long)]
model_id
:
Option
<
String
>
,
#[arg(long,
default_value
=
"main"
)]
revision
:
String
,
#[arg(long)]
tokenizer_file
:
Option
<
String
>
,
#[arg(long)]
config_file
:
Option
<
String
>
,
#[arg(long)]
weight_files
:
Option
<
String
>
,
/// Penalty to be applied for repeating tokens, 1. means no penalty.
#[arg(long,
default_value_t
=
1.1
)]
repeat_penalty
:
f32
,
/// The context size to consider for the repeat penalty.
#[arg(long,
default_value_t
=
64
)]
repeat_last_n
:
usize
,
/// The model to use.
#[arg(long,
default_value
=
"2b"
)]
which
:
Which
,
}
fn
main
()
->
Result
<
()
>
{
use
tracing_chrome
::
ChromeLayerBuilder
;
use
tracing_subscriber
::
prelude
::
*
;
let
args
=
Args
::
parse
();
let
_
guard
=
if
args
.tracing
{
let
(
chrome_layer
,
guard
)
=
ChromeLayerBuilder
::
new
()
.build
();
tracing_subscriber
::
registry
()
.with
(
chrome_layer
)
.init
();
Some
(
guard
)
}
else
{
None
};
println!
(
"avx: {}, neon: {}, simd128: {}, f16c: {}"
,
candle
::
utils
::
with_avx
(),
candle
::
utils
::
with_neon
(),
candle
::
utils
::
with_simd128
(),
candle
::
utils
::
with_f16c
()
);
println!
(
"temp: {:.2} repeat-penalty: {:.2} repeat-last-n: {}"
,
args
.temperature
.unwrap_or
(
0
.
),
args
.repeat_penalty
,
args
.repeat_last_n
);
let
start
=
std
::
time
::
Instant
::
now
();
let
api
=
Api
::
new
()
?
;
let
model_id
=
match
&
args
.model_id
{
Some
(
model_id
)
=>
model_id
.to_string
(),
None
=>
match
args
.which
{
Which
::
InstructV1_1_2B
=>
"google/gemma-1.1-2b-it"
.to_string
(),
Which
::
InstructV1_1_7B
=>
"google/gemma-1.1-7b-it"
.to_string
(),
Which
::
Base2B
=>
"google/gemma-2b"
.to_string
(),
Which
::
Base7B
=>
"google/gemma-7b"
.to_string
(),
Which
::
Instruct2B
=>
"google/gemma-2b-it"
.to_string
(),
Which
::
Instruct7B
=>
"google/gemma-7b-it"
.to_string
(),
},
};
let
repo
=
api
.repo
(
Repo
::
with_revision
(
model_id
,
RepoType
::
Model
,
args
.revision
,
));
let
tokenizer_filename
=
match
args
.tokenizer_file
{
Some
(
file
)
=>
std
::
path
::
PathBuf
::
from
(
file
),
None
=>
repo
.get
(
"tokenizer.json"
)
?
,
};
let
config_filename
=
match
args
.config_file
{
Some
(
file
)
=>
std
::
path
::
PathBuf
::
from
(
file
),
None
=>
repo
.get
(
"config.json"
)
?
,
};
let
filenames
=
match
args
.weight_files
{
Some
(
files
)
=>
files
.split
(
','
)
.map
(
std
::
path
::
PathBuf
::
from
)
.collect
::
<
Vec
<
_
>>
(),
None
=>
candle_examples
::
hub_load_safetensors
(
&
repo
,
"model.safetensors.index.json"
)
?
,
};
println!
(
"retrieved the files in {:?}"
,
start
.elapsed
());
let
tokenizer
=
Tokenizer
::
from_file
(
tokenizer_filename
)
.map_err
(
E
::
msg
)
?
;
let
config
:
Config
=
serde_json
::
from_reader
(
std
::
fs
::
File
::
open
(
config_filename
)
?
)
?
;
let
start
=
std
::
time
::
Instant
::
now
();
let
device
=
candle_examples
::
device
(
args
.cpu
)
?
;
let
dtype
=
if
device
.is_cuda
()
{
DType
::
BF16
}
else
{
DType
::
F32
};
let
vb
=
unsafe
{
VarBuilder
::
from_mmaped_safetensors
(
&
filenames
,
dtype
,
&
device
)
?
};
let
model
=
Model
::
new
(
&
config
,
vb
)
?
;
println!
(
"loaded the model in {:?}"
,
start
.elapsed
());
let
mut
pipeline
=
TextGeneration
::
new
(
model
,
tokenizer
,
args
.seed
,
args
.temperature
,
args
.top_p
,
args
.repeat_penalty
,
args
.repeat_last_n
,
&
device
,
);
pipeline
.run
(
&
args
.prompt
,
args
.sample_len
)
?
;
Ok
(())
}
candle-examples/examples/jina-bert/README.md
0 → 100644
View file @
25d2752f
# candle-jina-bert
Jina-Bert is a general large language model with a context size of 8192,
[
model
card
](
https://huggingface.co/jinaai/jina-embeddings-v2-base-en
)
. In this example
it can be used for two different tasks:
-
Compute sentence embeddings for a prompt.
-
Compute similarities between a set of sentences.
## Sentence embeddings
Jina-Bert is used to compute the sentence embeddings for a prompt. The model weights
are downloaded from the hub on the first run.
```
bash
cargo run
--example
jina-bert
--release
--
--prompt
"Here is a test sentence"
>
[[[
0.1595,
-0
.9885, 0.6494, ..., 0.3003,
-0
.6901,
-1
.2355],
>
[
0.0374,
-0
.1798, 1.3359, ..., 0.6731, 0.2133,
-1
.6807],
>
[
0.1700,
-0
.8534, 0.8924, ...,
-0
.1785,
-0
.0727,
-1
.5087],
>
...
>
[
-0
.3113,
-1
.3665, 0.2027, ...,
-0
.2519, 0.1711,
-1
.5811],
>
[
0.0907,
-1
.0492, 0.5382, ..., 0.0242,
-0
.7077,
-1
.0830],
>
[
0.0369,
-0
.6343, 0.6105, ..., 0.0671, 0.3778,
-1
.1505]]]
>
Tensor[[1, 7, 768], f32]
```
## Similarities
In this example, Jina-Bert is used to compute the sentence embeddings for a set of
sentences (hardcoded in the examples). Then cosine similarities are computed for
each sentence pair and they are reported by decreasing values, hence the first
reported pair contains the two sentences that have the highest similarity score.
The sentence embeddings are computed using average pooling through all the
sentence tokens, including some potential padding.
```
bash
cargo run
--example
jina-bert
--release
>
score: 0.94
'The new movie is awesome'
'The new movie is so great'
>
score: 0.81
'The cat sits outside'
'The cat plays in the garden'
>
score: 0.78
'I love pasta'
'Do you like pizza?'
>
score: 0.68
'I love pasta'
'The new movie is awesome'
>
score: 0.67
'A man is playing guitar'
'A woman watches TV'
```
Prev
1
…
5
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment