Commit 9867304a authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #1408 canceled with stages
# This code references https://huggingface.co/JosephusCheung/ASimilarityCalculatior/blob/main/qwerty.py
# Fill in the path of the model to be queried and the root directory of the reference models, and this script will return the similarity between the model to be queried and all reference models.
import os
import logging
logger = logging.getLogger(__name__)
import torch
import torch.nn as nn
import torch.nn.functional as F
def cal_cross_attn(to_q, to_k, to_v, rand_input):
hidden_dim, embed_dim = to_q.shape
attn_to_q = nn.Linear(hidden_dim, embed_dim, bias=False)
attn_to_k = nn.Linear(hidden_dim, embed_dim, bias=False)
attn_to_v = nn.Linear(hidden_dim, embed_dim, bias=False)
attn_to_q.load_state_dict({"weight": to_q})
attn_to_k.load_state_dict({"weight": to_k})
attn_to_v.load_state_dict({"weight": to_v})
return torch.einsum(
"ik, jk -> ik",
F.softmax(
torch.einsum("ij, kj -> ik", attn_to_q(rand_input), attn_to_k(rand_input)),
dim=-1,
),
attn_to_v(rand_input),
)
def model_hash(filename):
try:
with open(filename, "rb") as file:
import hashlib
m = hashlib.sha256()
file.seek(0x100000)
m.update(file.read(0x10000))
return m.hexdigest()[0:8]
except FileNotFoundError:
return "NOFILE"
def eval(model, n, input):
qk = f"enc_p.encoder.attn_layers.{n}.conv_q.weight"
uk = f"enc_p.encoder.attn_layers.{n}.conv_k.weight"
vk = f"enc_p.encoder.attn_layers.{n}.conv_v.weight"
atoq, atok, atov = model[qk][:, :, 0], model[uk][:, :, 0], model[vk][:, :, 0]
attn = cal_cross_attn(atoq, atok, atov, input)
return attn
def main(path, root):
torch.manual_seed(114514)
model_a = torch.load(path, map_location="cpu")["weight"]
logger.info("Query:\t\t%s\t%s" % (path, model_hash(path)))
map_attn_a = {}
map_rand_input = {}
for n in range(6):
hidden_dim, embed_dim, _ = model_a[
f"enc_p.encoder.attn_layers.{n}.conv_v.weight"
].shape
rand_input = torch.randn([embed_dim, hidden_dim])
map_attn_a[n] = eval(model_a, n, rand_input)
map_rand_input[n] = rand_input
del model_a
for name in sorted(list(os.listdir(root))):
path = "%s/%s" % (root, name)
model_b = torch.load(path, map_location="cpu")["weight"]
sims = []
for n in range(6):
attn_a = map_attn_a[n]
attn_b = eval(model_b, n, map_rand_input[n])
sim = torch.mean(torch.cosine_similarity(attn_a, attn_b))
sims.append(sim)
logger.info(
"Reference:\t%s\t%s\t%s"
% (path, model_hash(path), f"{torch.mean(torch.stack(sims)) * 1e2:.2f}%")
)
if __name__ == "__main__":
query_path = r"assets\weights\mi v3.pth"
reference_root = r"assets\weights"
main(query_path, reference_root)
@echo off && chcp 65001
echo working dir is %cd%
echo downloading requirement aria2 check.
echo=
dir /a:d/b | findstr "aria2" > flag.txt
findstr "aria2" flag.txt >nul
if %errorlevel% ==0 (
echo aria2 checked.
echo=
) else (
echo failed. please downloading aria2 from webpage!
echo unzip it and put in this directory!
timeout /T 5
start https://github.com/aria2/aria2/releases/tag/release-1.36.0
echo=
goto end
)
echo envfiles checking start.
echo=
for /f %%x in ('findstr /i /c:"aria2" "flag.txt"') do (set aria2=%%x)&goto endSch
:endSch
set d32=f0D32k.pth
set d40=f0D40k.pth
set d48=f0D48k.pth
set g32=f0G32k.pth
set g40=f0G40k.pth
set g48=f0G48k.pth
set d40v2=f0D40k.pth
set g40v2=f0G40k.pth
set dld32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth
set dld40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth
set dld48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth
set dlg32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth
set dlg40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth
set dlg48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth
set dld40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth
set dlg40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth
set hp2_all=HP2_all_vocals.pth
set hp3_all=HP3_all_vocals.pth
set hp5_only=HP5_only_main_vocal.pth
set VR_DeEchoAggressive=VR-DeEchoAggressive.pth
set VR_DeEchoDeReverb=VR-DeEchoDeReverb.pth
set VR_DeEchoNormal=VR-DeEchoNormal.pth
set onnx_dereverb=vocals.onnx
set dlhp2_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth
set dlhp3_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth
set dlhp5_only=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth
set dlVR_DeEchoAggressive=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth
set dlVR_DeEchoDeReverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth
set dlVR_DeEchoNormal=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth
set dlonnx_dereverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx
set hb=hubert_base.pt
set dlhb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt
set rmvpe=rmvpe.pt
set dlrmvpe=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt
echo dir check start.
echo=
if exist "%~dp0assets\pretrained" (
echo dir .\assets\pretrained checked.
) else (
echo failed. generating dir .\assets\pretrained.
mkdir pretrained
)
if exist "%~dp0assets\pretrained_v2" (
echo dir .\assets\pretrained_v2 checked.
) else (
echo failed. generating dir .\assets\pretrained_v2.
mkdir pretrained_v2
)
if exist "%~dp0assets\uvr5_weights" (
echo dir .\assets\uvr5_weights checked.
) else (
echo failed. generating dir .\assets\uvr5_weights.
mkdir uvr5_weights
)
if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy" (
echo dir .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy checked.
) else (
echo failed. generating dir .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy.
mkdir uvr5_weights\onnx_dereverb_By_FoxJoy
)
echo=
echo dir check finished.
echo=
echo required files check start.
echo checking D32k.pth
if exist "%~dp0assets\pretrained\D32k.pth" (
echo D32k.pth in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d %~dp0assets\pretrained -o D32k.pth
if exist "%~dp0assets\pretrained\D32k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking D40k.pth
if exist "%~dp0assets\pretrained\D40k.pth" (
echo D40k.pth in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d %~dp0assets\pretrained -o D40k.pth
if exist "%~dp0assets\pretrained\D40k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking D40k.pth
if exist "%~dp0assets\pretrained_v2\D40k.pth" (
echo D40k.pth in .\assets\pretrained_v2 checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d %~dp0assets\pretrained_v2 -o D40k.pth
if exist "%~dp0assets\pretrained_v2\D40k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking D48k.pth
if exist "%~dp0assets\pretrained\D48k.pth" (
echo D48k.pth in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d %~dp0assets\pretrained -o D48k.pth
if exist "%~dp0assets\pretrained\D48k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking G32k.pth
if exist "%~dp0assets\pretrained\G32k.pth" (
echo G32k.pth in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d %~dp0assets\pretrained -o G32k.pth
if exist "%~dp0assets\pretrained\G32k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking G40k.pth
if exist "%~dp0assets\pretrained\G40k.pth" (
echo G40k.pth in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d %~dp0assets\pretrained -o G40k.pth
if exist "%~dp0assets\pretrained\G40k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking G40k.pth
if exist "%~dp0assets\pretrained_v2\G40k.pth" (
echo G40k.pth in .\assets\pretrained_v2 checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d %~dp0assets\pretrained_v2 -o G40k.pth
if exist "%~dp0assets\pretrained_v2\G40k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking G48k.pth
if exist "%~dp0assets\pretrained\G48k.pth" (
echo G48k.pth in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d %~dp0assets\pretrained -o G48k.pth
if exist "%~dp0assets\pretrained\G48k.pth" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %d32%
if exist "%~dp0assets\pretrained\%d32%" (
echo %d32% in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld32% -d %~dp0assets\pretrained -o %d32%
if exist "%~dp0assets\pretrained\%d32%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %d40%
if exist "%~dp0assets\pretrained\%d40%" (
echo %d40% in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40% -d %~dp0assets\pretrained -o %d40%
if exist "%~dp0assets\pretrained\%d40%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %d40v2%
if exist "%~dp0assets\pretrained_v2\%d40v2%" (
echo %d40v2% in .\assets\pretrained_v2 checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40v2% -d %~dp0assets\pretrained_v2 -o %d40v2%
if exist "%~dp0assets\pretrained_v2\%d40v2%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %d48%
if exist "%~dp0assets\pretrained\%d48%" (
echo %d48% in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld48% -d %~dp0assets\pretrained -o %d48%
if exist "%~dp0assets\pretrained\%d48%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %g32%
if exist "%~dp0assets\pretrained\%g32%" (
echo %g32% in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg32% -d %~dp0assets\pretrained -o %g32%
if exist "%~dp0assets\pretrained\%g32%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %g40%
if exist "%~dp0assets\pretrained\%g40%" (
echo %g40% in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40% -d %~dp0assets\pretrained -o %g40%
if exist "%~dp0assets\pretrained\%g40%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %g40v2%
if exist "%~dp0assets\pretrained_v2\%g40v2%" (
echo %g40v2% in .\assets\pretrained_v2 checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40v2% -d %~dp0assets\pretrained_v2 -o %g40v2%
if exist "%~dp0assets\pretrained_v2\%g40v2%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %g48%
if exist "%~dp0assets\pretrained\%g48%" (
echo %g48% in .\assets\pretrained checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg48% -d %~dp0assets\pretrained -o %g48%
if exist "%~dp0assets\pretrained\%g48%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %hp2_all%
if exist "%~dp0assets\uvr5_weights\%hp2_all%" (
echo %hp2_all% in .\assets\uvr5_weights checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp2_all% -d %~dp0assets\uvr5_weights -o %hp2_all%
if exist "%~dp0assets\uvr5_weights\%hp2_all%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %hp3_all%
if exist "%~dp0assets\uvr5_weights\%hp3_all%" (
echo %hp3_all% in .\assets\uvr5_weights checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp3_all% -d %~dp0assets\uvr5_weights -o %hp3_all%
if exist "%~dp0assets\uvr5_weights\%hp3_all%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %hp5_only%
if exist "%~dp0assets\uvr5_weights\%hp5_only%" (
echo %hp5_only% in .\assets\uvr5_weights checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp5_only% -d %~dp0assets\uvr5_weights -o %hp5_only%
if exist "%~dp0assets\uvr5_weights\%hp5_only%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %VR_DeEchoAggressive%
if exist "%~dp0assets\uvr5_weights\%VR_DeEchoAggressive%" (
echo %VR_DeEchoAggressive% in .\assets\uvr5_weights checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoAggressive% -d %~dp0assets\uvr5_weights -o %VR_DeEchoAggressive%
if exist "%~dp0assets\uvr5_weights\%VR_DeEchoAggressive%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %VR_DeEchoDeReverb%
if exist "%~dp0assets\uvr5_weights\%VR_DeEchoDeReverb%" (
echo %VR_DeEchoDeReverb% in .\assets\uvr5_weights checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoDeReverb% -d %~dp0assets\uvr5_weights -o %VR_DeEchoDeReverb%
if exist "%~dp0assets\uvr5_weights\%VR_DeEchoDeReverb%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %VR_DeEchoNormal%
if exist "%~dp0assets\uvr5_weights\%VR_DeEchoNormal%" (
echo %VR_DeEchoNormal% in .\assets\uvr5_weights checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoNormal% -d %~dp0assets\uvr5_weights -o %VR_DeEchoNormal%
if exist "%~dp0assets\uvr5_weights\%VR_DeEchoNormal%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %onnx_dereverb%
if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (
echo %onnx_dereverb% in .\assets\uvr5_weights\onnx_dereverb_By_FoxJoy checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlonnx_dereverb% -d %~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy -o %onnx_dereverb%
if exist "%~dp0assets\uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %hb%
if exist "%~dp0assets\hubert\%hb%" (
echo %hb% in .\assets\hubert checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhb% -d %~dp0assets\hubert\ -o %hb%
if exist "%~dp0assets\hubert\%hb%" (echo download successful.) else (echo please try again!
echo=)
)
echo checking %rmvpe%
if exist "%~dp0assets\rmvpe\%rmvpe%" (
echo %rmvpe% in .\assets\rmvpe checked.
echo=
) else (
echo failed. starting download from huggingface.
%~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlrmvpe% -d %~dp0assets\rmvpe\ -o %rmvpe%
if exist "%~dp0assets\rmvpe\%rmvpe%" (echo download successful.) else (echo please try again!
echo=)
)
echo required files check finished.
echo envfiles check complete.
pause
:end
del flag.txt
#!/bin/sh
printf "working dir is %s\n" "$PWD"
echo "downloading requirement aria2 check."
if command -v aria2c > /dev/null 2>&1
then
echo "aria2 command found"
else
echo "failed. please install aria2"
exit 1
fi
echo "dir check start."
check_dir() {
[ -d "$1" ] && printf "dir %s checked\n" "$1" || \
printf "failed. generating dir %s\n" "$1" && mkdir -p "$1"
}
check_dir "./assets/pretrained"
check_dir "./assets/pretrained_v2"
check_dir "./assets/uvr5_weights"
check_dir "./assets/uvr5_weights/onnx_dereverb_By_FoxJoy"
echo "dir check finished."
echo "required files check start."
check_file_pretrained() {
printf "checking %s\n" "$2"
if [ -f "./assets/""$1""/""$2""" ]; then
printf "%s in ./assets/%s checked.\n" "$2" "$1"
else
echo failed. starting download from huggingface.
if command -v aria2c > /dev/null 2>&1; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$1"/"$2" -d ./assets/"$1" -o "$2"
[ -f "./assets/""$1""/""$2""" ] && echo "download successful." || echo "please try again!" && exit 1
else
echo "aria2c command not found. Please install aria2c and try again."
exit 1
fi
fi
}
check_file_special() {
printf "checking %s\n" "$2"
if [ -f "./assets/""$1""/""$2""" ]; then
printf "%s in ./assets/%s checked.\n" "$2" "$1"
else
echo failed. starting download from huggingface.
if command -v aria2c > /dev/null 2>&1; then
aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"$2" -d ./assets/"$1" -o "$2"
[ -f "./assets/""$1""/""$2""" ] && echo "download successful." || echo "please try again!" && exit 1
else
echo "aria2c command not found. Please install aria2c and try again."
exit 1
fi
fi
}
check_file_pretrained pretrained D32k.pth
check_file_pretrained pretrained D40k.pth
check_file_pretrained pretrained D48k.pth
check_file_pretrained pretrained G32k.pth
check_file_pretrained pretrained G40k.pth
check_file_pretrained pretrained G48k.pth
check_file_pretrained pretrained_v2 f0D40k.pth
check_file_pretrained pretrained_v2 f0G40k.pth
check_file_pretrained pretrained_v2 D40k.pth
check_file_pretrained pretrained_v2 G40k.pth
check_file_pretrained uvr5_weights HP2_all_vocals.pth
check_file_pretrained uvr5_weights HP3_all_vocals.pth
check_file_pretrained uvr5_weights HP5_only_main_vocal.pth
check_file_pretrained uvr5_weights VR-DeEchoAggressive.pth
check_file_pretrained uvr5_weights VR-DeEchoDeReverb.pth
check_file_pretrained uvr5_weights VR-DeEchoNormal.pth
check_file_pretrained uvr5_weights "onnx_dereverb_By_FoxJoy/vocals.onnx"
check_file_special rmvpe rmvpe.pt
check_file_special hubert hubert_base.pt
echo "required files check finished."
import os
from pathlib import Path
import requests
RVC_DOWNLOAD_LINK = "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/"
BASE_DIR = Path(__file__).resolve().parent.parent
def dl_model(link, model_name, dir_name):
with requests.get(f"{link}{model_name}") as r:
r.raise_for_status()
os.makedirs(os.path.dirname(dir_name / model_name), exist_ok=True)
with open(dir_name / model_name, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
if __name__ == "__main__":
print("Downloading hubert_base.pt...")
dl_model(RVC_DOWNLOAD_LINK, "hubert_base.pt", BASE_DIR / "assets/hubert")
print("Downloading rmvpe.pt...")
dl_model(RVC_DOWNLOAD_LINK, "rmvpe.pt", BASE_DIR / "assets/rmvpe")
print("Downloading vocals.onnx...")
dl_model(
RVC_DOWNLOAD_LINK + "uvr5_weights/onnx_dereverb_By_FoxJoy/",
"vocals.onnx",
BASE_DIR / "assets/uvr5_weights/onnx_dereverb_By_FoxJoy",
)
rvc_models_dir = BASE_DIR / "assets/pretrained"
print("Downloading pretrained models:")
model_names = [
"D32k.pth",
"D40k.pth",
"D48k.pth",
"G32k.pth",
"G40k.pth",
"G48k.pth",
"f0D32k.pth",
"f0D40k.pth",
"f0D48k.pth",
"f0G32k.pth",
"f0G40k.pth",
"f0G48k.pth",
]
for model in model_names:
print(f"Downloading {model}...")
dl_model(RVC_DOWNLOAD_LINK + "pretrained/", model, rvc_models_dir)
rvc_models_dir = BASE_DIR / "assets/pretrained_v2"
print("Downloading pretrained models v2:")
for model in model_names:
print(f"Downloading {model}...")
dl_model(RVC_DOWNLOAD_LINK + "pretrained_v2/", model, rvc_models_dir)
print("Downloading uvr5_weights:")
rvc_models_dir = BASE_DIR / "assets/uvr5_weights"
model_names = [
"HP2-%E4%BA%BA%E5%A3%B0vocals%2B%E9%9D%9E%E4%BA%BA%E5%A3%B0instrumentals.pth",
"HP2_all_vocals.pth",
"HP3_all_vocals.pth",
"HP5-%E4%B8%BB%E6%97%8B%E5%BE%8B%E4%BA%BA%E5%A3%B0vocals%2B%E5%85%B6%E4%BB%96instrumentals.pth",
"HP5_only_main_vocal.pth",
"VR-DeEchoAggressive.pth",
"VR-DeEchoDeReverb.pth",
"VR-DeEchoNormal.pth",
]
for model in model_names:
print(f"Downloading {model}...")
dl_model(RVC_DOWNLOAD_LINK + "uvr5_weights/", model, rvc_models_dir)
print("All models downloaded!")
import torch
from infer.lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
if __name__ == "__main__":
MoeVS = True # 模型是否为MoeVoiceStudio(原MoeSS)使用
ModelPath = "Shiroha/shiroha.pth" # 模型路径
ExportedPath = "model.onnx" # 输出路径
hidden_channels = 256 # hidden_channels,为768Vec做准备
cpt = torch.load(ModelPath, map_location="cpu")
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
print(*cpt["config"])
test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
test_pitchf = torch.rand(1, 200) # nsf基频
test_ds = torch.LongTensor([0]) # 说话人ID
test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
device = "cpu" # 导出时设备(不影响使用模型)
net_g = SynthesizerTrnMsNSFsidM(
*cpt["config"], is_half=False
) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
net_g.load_state_dict(cpt["weight"], strict=False)
input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
output_names = [
"audio",
]
# net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
torch.onnx.export(
net_g,
(
test_phone.to(device),
test_phone_lengths.to(device),
test_pitch.to(device),
test_pitchf.to(device),
test_ds.to(device),
test_rnd.to(device),
),
ExportedPath,
dynamic_axes={
"phone": [1],
"pitch": [1],
"pitchf": [1],
"rnd": [2],
},
do_constant_folding=False,
opset_version=16,
verbose=False,
input_names=input_names,
output_names=output_names,
)
"""
对源特征进行检索
"""
import os
import logging
logger = logging.getLogger(__name__)
import parselmouth
import torch
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# import torchcrepe
from time import time as ttime
# import pyworld
import librosa
import numpy as np
import soundfile as sf
import torch.nn.functional as F
from fairseq import checkpoint_utils
# from models import SynthesizerTrn256#hifigan_nonsf
# from lib.infer_pack.models import SynthesizerTrn256NSF as SynthesizerTrn256#hifigan_nsf
from infer.lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid as SynthesizerTrn256,
) # hifigan_nsf
from scipy.io import wavfile
# from lib.infer_pack.models import SynthesizerTrnMs256NSFsid_sim as SynthesizerTrn256#hifigan_nsf
# from models import SynthesizerTrn256NSFsim as SynthesizerTrn256#hifigan_nsf
# from models import SynthesizerTrn256NSFsimFlow as SynthesizerTrn256#hifigan_nsf
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = r"E:\codes\py39\vits_vc_gpu_train\assets\hubert\hubert_base.pt" #
logger.info("Load model(s) from {}".format(model_path))
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
[model_path],
suffix="",
)
model = models[0]
model = model.to(device)
model = model.half()
model.eval()
# net_g = SynthesizerTrn256(1025,32,192,192,768,2,6,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2,2],512,[16,16,4,4],183,256,is_half=True)#hifigan#512#256
# net_g = SynthesizerTrn256(1025,32,192,192,768,2,6,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2,2],512,[16,16,4,4],109,256,is_half=True)#hifigan#512#256
net_g = SynthesizerTrn256(
1025,
32,
192,
192,
768,
2,
6,
3,
0,
"1",
[3, 7, 11],
[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
[10, 10, 2, 2],
512,
[16, 16, 4, 4],
183,
256,
is_half=True,
) # hifigan#512#256#no_dropout
# net_g = SynthesizerTrn256(1025,32,192,192,768,2,3,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2,2],512,[16,16,4,4],0)#ts3
# net_g = SynthesizerTrn256(1025,32,192,192,768,2,6,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2],512,[16,16,4],0)#hifigan-ps-sr
#
# net_g = SynthesizerTrn(1025, 32, 192, 192, 768, 2, 6, 3, 0.1, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [5,5], 512, [15,15], 0)#ms
# net_g = SynthesizerTrn(1025, 32, 192, 192, 768, 2, 6, 3, 0.1, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10,10], 512, [16,16], 0)#idwt2
# weights=torch.load("infer/ft-mi_1k-noD.pt")
# weights=torch.load("infer/ft-mi-freeze-vocoder-flow-enc_q_1k.pt")
# weights=torch.load("infer/ft-mi-freeze-vocoder_true_1k.pt")
# weights=torch.load("infer/ft-mi-sim1k.pt")
weights = torch.load("infer/ft-mi-no_opt-no_dropout.pt")
logger.debug(net_g.load_state_dict(weights, strict=True))
net_g.eval().to(device)
net_g.half()
def get_f0(x, p_len, f0_up_key=0):
time_step = 160 / 16000 * 1000
f0_min = 50
f0_max = 1100
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
f0 = (
parselmouth.Sound(x, 16000)
.to_pitch_ac(
time_step=time_step / 1000,
voicing_threshold=0.6,
pitch_floor=f0_min,
pitch_ceiling=f0_max,
)
.selected_array["frequency"]
)
pad_size = (p_len - len(f0) + 1) // 2
if pad_size > 0 or p_len - len(f0) - pad_size > 0:
f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
f0 *= pow(2, f0_up_key / 12)
f0bak = f0.copy()
f0_mel = 1127 * np.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
f0_mel_max - f0_mel_min
) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
# f0_mel[f0_mel > 188] = 188
f0_coarse = np.rint(f0_mel).astype(np.int32)
return f0_coarse, f0bak
import faiss
index = faiss.read_index("infer/added_IVF512_Flat_mi_baseline_src_feat.index")
big_npy = np.load("infer/big_src_feature_mi.npy")
ta0 = ta1 = ta2 = 0
for idx, name in enumerate(
[
"冬之花clip1.wav",
]
): ##
wav_path = "todo-songs/%s" % name #
f0_up_key = -2 #
audio, sampling_rate = sf.read(wav_path)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 16000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
feats = torch.from_numpy(audio).float()
if feats.dim() == 2: # double channels
feats = feats.mean(-1)
assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1)
padding_mask = torch.BoolTensor(feats.shape).fill_(False)
inputs = {
"source": feats.half().to(device),
"padding_mask": padding_mask.to(device),
"output_layer": 9, # layer 9
}
if torch.cuda.is_available():
torch.cuda.synchronize()
t0 = ttime()
with torch.no_grad():
logits = model.extract_features(**inputs)
feats = model.final_proj(logits[0])
####索引优化
npy = feats[0].cpu().numpy().astype("float32")
D, I = index.search(npy, 1)
feats = (
torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device)
)
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
if torch.cuda.is_available():
torch.cuda.synchronize()
t1 = ttime()
# p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存
p_len = min(feats.shape[1], 10000) #
pitch, pitchf = get_f0(audio, p_len, f0_up_key)
p_len = min(feats.shape[1], 10000, pitch.shape[0]) # 太大了爆显存
if torch.cuda.is_available():
torch.cuda.synchronize()
t2 = ttime()
feats = feats[:, :p_len, :]
pitch = pitch[:p_len]
pitchf = pitchf[:p_len]
p_len = torch.LongTensor([p_len]).to(device)
pitch = torch.LongTensor(pitch).unsqueeze(0).to(device)
sid = torch.LongTensor([0]).to(device)
pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device)
with torch.no_grad():
audio = (
net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]
.data.cpu()
.float()
.numpy()
) # nsf
if torch.cuda.is_available():
torch.cuda.synchronize()
t3 = ttime()
ta0 += t1 - t0
ta1 += t2 - t1
ta2 += t3 - t2
# wavfile.write("ft-mi_1k-index256-noD-%s.wav"%name, 40000, audio)##
# wavfile.write("ft-mi-freeze-vocoder-flow-enc_q_1k-%s.wav"%name, 40000, audio)##
# wavfile.write("ft-mi-sim1k-%s.wav"%name, 40000, audio)##
wavfile.write("ft-mi-no_opt-no_dropout-%s.wav" % name, 40000, audio) ##
logger.debug("%.2fs %.2fs %.2fs", ta0, ta1, ta2) #
"""
格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个
"""
import os
import traceback
import logging
logger = logging.getLogger(__name__)
from multiprocessing import cpu_count
import faiss
import numpy as np
from sklearn.cluster import MiniBatchKMeans
# ###########如果是原始特征要先写save
n_cpu = 0
if n_cpu == 0:
n_cpu = cpu_count()
inp_root = r"./logs/anz/3_feature768"
npys = []
listdir_res = list(os.listdir(inp_root))
for name in sorted(listdir_res):
phone = np.load("%s/%s" % (inp_root, name))
npys.append(phone)
big_npy = np.concatenate(npys, 0)
big_npy_idx = np.arange(big_npy.shape[0])
np.random.shuffle(big_npy_idx)
big_npy = big_npy[big_npy_idx]
logger.debug(big_npy.shape) # (6196072, 192)#fp32#4.43G
if big_npy.shape[0] > 2e5:
# if(1):
info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]
logger.info(info)
try:
big_npy = (
MiniBatchKMeans(
n_clusters=10000,
verbose=True,
batch_size=256 * n_cpu,
compute_labels=False,
init="random",
)
.fit(big_npy)
.cluster_centers_
)
except:
info = traceback.format_exc()
logger.warning(info)
np.save("tools/infer/big_src_feature_mi.npy", big_npy)
##################train+add
# big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy")
n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
index = faiss.index_factory(768, "IVF%s,Flat" % n_ivf) # mi
logger.info("Training...")
index_ivf = faiss.extract_index_ivf(index) #
index_ivf.nprobe = 1
index.train(big_npy)
faiss.write_index(
index, "tools/infer/trained_IVF%s_Flat_baseline_src_feat_v2.index" % (n_ivf)
)
logger.info("Adding...")
batch_size_add = 8192
for i in range(0, big_npy.shape[0], batch_size_add):
index.add(big_npy[i : i + batch_size_add])
faiss.write_index(
index, "tools/infer/added_IVF%s_Flat_mi_baseline_src_feat.index" % (n_ivf)
)
"""
大小(都是FP32)
big_src_feature 2.95G
(3098036, 256)
big_emb 4.43G
(6196072, 192)
big_emb双倍是因为求特征要repeat后再加pitch
"""
"""
格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个
"""
import os
import logging
logger = logging.getLogger(__name__)
import faiss
import numpy as np
# ###########如果是原始特征要先写save
inp_root = r"E:\codes\py39\dataset\mi\2-co256"
npys = []
for name in sorted(list(os.listdir(inp_root))):
phone = np.load("%s/%s" % (inp_root, name))
npys.append(phone)
big_npy = np.concatenate(npys, 0)
logger.debug(big_npy.shape) # (6196072, 192)#fp32#4.43G
np.save("infer/big_src_feature_mi.npy", big_npy)
##################train+add
# big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy")
logger.debug(big_npy.shape)
index = faiss.index_factory(256, "IVF512,Flat") # mi
logger.info("Training...")
index_ivf = faiss.extract_index_ivf(index) #
index_ivf.nprobe = 9
index.train(big_npy)
faiss.write_index(index, "infer/trained_IVF512_Flat_mi_baseline_src_feat.index")
logger.info("Adding...")
index.add(big_npy)
faiss.write_index(index, "infer/added_IVF512_Flat_mi_baseline_src_feat.index")
"""
大小(都是FP32)
big_src_feature 2.95G
(3098036, 256)
big_emb 4.43G
(6196072, 192)
big_emb双倍是因为求特征要repeat后再加pitch
"""
import pdb
import torch
# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-suc\G_1000.pth")["model"]#sim_nsf#
# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-freeze-vocoder-flow-enc_q\G_1000.pth")["model"]#sim_nsf#
# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-freeze-vocoder\G_1000.pth")["model"]#sim_nsf#
# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-test\G_1000.pth")["model"]#sim_nsf#
a = torch.load(
r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-no_opt-no_dropout\G_1000.pth"
)[
"model"
] # sim_nsf#
for key in a.keys():
a[key] = a[key].half()
# torch.save(a,"ft-mi-freeze-vocoder_true_1k.pt")#
# torch.save(a,"ft-mi-sim1k.pt")#
torch.save(a, "ft-mi-no_opt-no_dropout.pt") #
import argparse
import os
import sys
print("Command-line arguments:", sys.argv)
now_dir = os.getcwd()
sys.path.append(now_dir)
import sys
import tqdm as tq
from dotenv import load_dotenv
from scipy.io import wavfile
from configs.config import Config
from infer.modules.vc.modules import VC
def arg_parse() -> tuple:
parser = argparse.ArgumentParser()
parser.add_argument("--f0up_key", type=int, default=0)
parser.add_argument("--input_path", type=str, help="input path")
parser.add_argument("--index_path", type=str, help="index path")
parser.add_argument("--f0method", type=str, default="harvest", help="harvest or pm")
parser.add_argument("--opt_path", type=str, help="opt path")
parser.add_argument("--model_name", type=str, help="store in assets/weight_root")
parser.add_argument("--index_rate", type=float, default=0.66, help="index rate")
parser.add_argument("--device", type=str, help="device")
parser.add_argument("--is_half", type=bool, help="use half -> True")
parser.add_argument("--filter_radius", type=int, default=3, help="filter radius")
parser.add_argument("--resample_sr", type=int, default=0, help="resample sr")
parser.add_argument("--rms_mix_rate", type=float, default=1, help="rms mix rate")
parser.add_argument("--protect", type=float, default=0.33, help="protect")
args = parser.parse_args()
sys.argv = sys.argv[:1]
return args
def main():
load_dotenv()
args = arg_parse()
config = Config()
config.device = args.device if args.device else config.device
config.is_half = args.is_half if args.is_half else config.is_half
vc = VC(config)
vc.get_vc(args.model_name)
audios = os.listdir(args.input_path)
for file in tq.tqdm(audios):
if file.endswith(".wav"):
file_path = os.path.join(args.input_path, file)
_, wav_opt = vc.vc_single(
0,
file_path,
args.f0up_key,
None,
args.f0method,
args.index_path,
None,
args.index_rate,
args.filter_radius,
args.resample_sr,
args.rms_mix_rate,
args.protect,
)
out_path = os.path.join(args.opt_path, file)
wavfile.write(out_path, wav_opt[0], wav_opt[1])
if __name__ == "__main__":
main()
import argparse
import os
import sys
now_dir = os.getcwd()
sys.path.append(now_dir)
from dotenv import load_dotenv
from scipy.io import wavfile
from configs.config import Config
from infer.modules.vc.modules import VC
####
# USAGE
#
# In your Terminal or CMD or whatever
def arg_parse() -> tuple:
parser = argparse.ArgumentParser()
parser.add_argument("--f0up_key", type=int, default=0)
parser.add_argument("--input_path", type=str, help="input path")
parser.add_argument("--index_path", type=str, help="index path")
parser.add_argument("--f0method", type=str, default="harvest", help="harvest or pm")
parser.add_argument("--opt_path", type=str, help="opt path")
parser.add_argument("--model_name", type=str, help="store in assets/weight_root")
parser.add_argument("--index_rate", type=float, default=0.66, help="index rate")
parser.add_argument("--device", type=str, help="device")
parser.add_argument("--is_half", type=bool, help="use half -> True")
parser.add_argument("--filter_radius", type=int, default=3, help="filter radius")
parser.add_argument("--resample_sr", type=int, default=0, help="resample sr")
parser.add_argument("--rms_mix_rate", type=float, default=1, help="rms mix rate")
parser.add_argument("--protect", type=float, default=0.33, help="protect")
args = parser.parse_args()
sys.argv = sys.argv[:1]
return args
def main():
load_dotenv()
args = arg_parse()
config = Config()
config.device = args.device if args.device else config.device
config.is_half = args.is_half if args.is_half else config.is_half
vc = VC(config)
vc.get_vc(args.model_name)
_, wav_opt = vc.vc_single(
0,
args.input_path,
args.f0up_key,
None,
args.f0method,
args.index_path,
None,
args.index_rate,
args.filter_radius,
args.resample_sr,
args.rms_mix_rate,
args.protect,
)
wavfile.write(args.opt_path, wav_opt[0], wav_opt[1])
if __name__ == "__main__":
main()
import soundfile
from ..infer.lib.infer_pack.onnx_inference import OnnxRVC
hop_size = 512
sampling_rate = 40000 # 采样率
f0_up_key = 0 # 升降调
sid = 0 # 角色ID
f0_method = "dio" # F0提取算法
model_path = "ShirohaRVC.onnx" # 模型的完整路径
vec_name = (
"vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型
)
wav_path = "123.wav" # 输入路径或ByteIO实例
out_path = "out.wav" # 输出路径或ByteIO实例
model = OnnxRVC(
model_path, vec_path=vec_name, sr=sampling_rate, hop_size=hop_size, device="cuda"
)
audio = model.inference(wav_path, sid, f0_method=f0_method, f0_up_key=f0_up_key)
soundfile.write(out_path, audio, sampling_rate)
from io import BytesIO
import os
import pickle
import sys
import traceback
from infer.lib import jit
from infer.lib.jit.get_synthesizer import get_synthesizer
from time import time as ttime
import fairseq
import faiss
import numpy as np
import parselmouth
import pyworld
import scipy.signal as signal
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchcrepe
from infer.lib.infer_pack.models import (
SynthesizerTrnMs256NSFsid,
SynthesizerTrnMs256NSFsid_nono,
SynthesizerTrnMs768NSFsid,
SynthesizerTrnMs768NSFsid_nono,
)
now_dir = os.getcwd()
sys.path.append(now_dir)
from multiprocessing import Manager as M
from configs.config import Config
# config = Config()
mm = M()
def printt(strr, *args):
if len(args) == 0:
print(strr)
else:
print(strr % args)
# config.device=torch.device("cpu")########强制cpu测试
# config.is_half=False########强制cpu测试
class RVC:
def __init__(
self,
key,
pth_path,
index_path,
index_rate,
n_cpu,
inp_q,
opt_q,
config: Config,
last_rvc=None,
) -> None:
"""
初始化
"""
try:
if config.dml == True:
def forward_dml(ctx, x, scale):
ctx.scale = scale
res = x.clone().detach()
return res
fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
# global config
self.config = config
self.inp_q = inp_q
self.opt_q = opt_q
# device="cpu"########强制cpu测试
self.device = config.device
self.f0_up_key = key
self.f0_min = 50
self.f0_max = 1100
self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700)
self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700)
self.n_cpu = n_cpu
self.use_jit = self.config.use_jit
self.is_half = config.is_half
if index_rate != 0:
self.index = faiss.read_index(index_path)
self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
printt("Index search enabled")
self.pth_path: str = pth_path
self.index_path = index_path
self.index_rate = index_rate
self.cache_pitch: torch.Tensor = torch.zeros(
1024, device=self.device, dtype=torch.long
)
self.cache_pitchf = torch.zeros(
1024, device=self.device, dtype=torch.float32
)
if last_rvc is None:
models, _, _ = fairseq.checkpoint_utils.load_model_ensemble_and_task(
["assets/hubert/hubert_base.pt"],
suffix="",
)
hubert_model = models[0]
hubert_model = hubert_model.to(self.device)
if self.is_half:
hubert_model = hubert_model.half()
else:
hubert_model = hubert_model.float()
hubert_model.eval()
self.model = hubert_model
else:
self.model = last_rvc.model
self.net_g: nn.Module = None
def set_default_model():
self.net_g, cpt = get_synthesizer(self.pth_path, self.device)
self.tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
self.if_f0 = cpt.get("f0", 1)
self.version = cpt.get("version", "v1")
if self.is_half:
self.net_g = self.net_g.half()
else:
self.net_g = self.net_g.float()
def set_jit_model():
jit_pth_path = self.pth_path.rstrip(".pth")
jit_pth_path += ".half.jit" if self.is_half else ".jit"
reload = False
if str(self.device) == "cuda":
self.device = torch.device("cuda:0")
if os.path.exists(jit_pth_path):
cpt = jit.load(jit_pth_path)
model_device = cpt["device"]
if model_device != str(self.device):
reload = True
else:
reload = True
if reload:
cpt = jit.synthesizer_jit_export(
self.pth_path,
"script",
None,
device=self.device,
is_half=self.is_half,
)
self.tgt_sr = cpt["config"][-1]
self.if_f0 = cpt.get("f0", 1)
self.version = cpt.get("version", "v1")
self.net_g = torch.jit.load(
BytesIO(cpt["model"]), map_location=self.device
)
self.net_g.infer = self.net_g.forward
self.net_g.eval().to(self.device)
def set_synthesizer():
if self.use_jit and not config.dml:
if self.is_half and "cpu" in str(self.device):
printt(
"Use default Synthesizer model. \
Jit is not supported on the CPU for half floating point"
)
set_default_model()
else:
set_jit_model()
else:
set_default_model()
if last_rvc is None or last_rvc.pth_path != self.pth_path:
set_synthesizer()
else:
self.tgt_sr = last_rvc.tgt_sr
self.if_f0 = last_rvc.if_f0
self.version = last_rvc.version
self.is_half = last_rvc.is_half
if last_rvc.use_jit != self.use_jit:
set_synthesizer()
else:
self.net_g = last_rvc.net_g
if last_rvc is not None and hasattr(last_rvc, "model_rmvpe"):
self.model_rmvpe = last_rvc.model_rmvpe
if last_rvc is not None and hasattr(last_rvc, "model_fcpe"):
self.device_fcpe = last_rvc.device_fcpe
self.model_fcpe = last_rvc.model_fcpe
except:
printt(traceback.format_exc())
def change_key(self, new_key):
self.f0_up_key = new_key
def change_index_rate(self, new_index_rate):
if new_index_rate != 0 and self.index_rate == 0:
self.index = faiss.read_index(self.index_path)
self.big_npy = self.index.reconstruct_n(0, self.index.ntotal)
printt("Index search enabled")
self.index_rate = new_index_rate
def get_f0_post(self, f0):
if not torch.is_tensor(f0):
f0 = torch.from_numpy(f0)
f0 = f0.float().to(self.device).squeeze()
f0_mel = 1127 * torch.log(1 + f0 / 700)
f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * 254 / (
self.f0_mel_max - self.f0_mel_min
) + 1
f0_mel[f0_mel <= 1] = 1
f0_mel[f0_mel > 255] = 255
f0_coarse = torch.round(f0_mel).long()
return f0_coarse, f0
def get_f0(self, x, f0_up_key, n_cpu, method="harvest"):
n_cpu = int(n_cpu)
if method == "crepe":
return self.get_f0_crepe(x, f0_up_key)
if method == "rmvpe":
return self.get_f0_rmvpe(x, f0_up_key)
if method == "fcpe":
return self.get_f0_fcpe(x, f0_up_key)
x = x.cpu().numpy()
if method == "pm":
p_len = x.shape[0] // 160 + 1
f0_min = 65
l_pad = int(np.ceil(1.5 / f0_min * 16000))
r_pad = l_pad + 1
s = parselmouth.Sound(np.pad(x, (l_pad, r_pad)), 16000).to_pitch_ac(
time_step=0.01,
voicing_threshold=0.6,
pitch_floor=f0_min,
pitch_ceiling=1100,
)
assert np.abs(s.t1 - 1.5 / f0_min) < 0.001
f0 = s.selected_array["frequency"]
if len(f0) < p_len:
f0 = np.pad(f0, (0, p_len - len(f0)))
f0 = f0[:p_len]
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
if n_cpu == 1:
f0, t = pyworld.harvest(
x.astype(np.double),
fs=16000,
f0_ceil=1100,
f0_floor=50,
frame_period=10,
)
f0 = signal.medfilt(f0, 3)
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
f0bak = np.zeros(x.shape[0] // 160 + 1, dtype=np.float64)
length = len(x)
part_length = 160 * ((length // 160 - 1) // n_cpu + 1)
n_cpu = (length // 160 - 1) // (part_length // 160) + 1
ts = ttime()
res_f0 = mm.dict()
for idx in range(n_cpu):
tail = part_length * (idx + 1) + 320
if idx == 0:
self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts))
else:
self.inp_q.put(
(idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts)
)
while 1:
res_ts = self.opt_q.get()
if res_ts == ts:
break
f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])]
for idx, f0 in enumerate(f0s):
if idx == 0:
f0 = f0[:-3]
elif idx != n_cpu - 1:
f0 = f0[2:-3]
else:
f0 = f0[2:]
f0bak[part_length * idx // 160 : part_length * idx // 160 + f0.shape[0]] = (
f0
)
f0bak = signal.medfilt(f0bak, 3)
f0bak *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0bak)
def get_f0_crepe(self, x, f0_up_key):
if "privateuseone" in str(
self.device
): ###不支持dml,cpu又太慢用不成,拿fcpe顶替
return self.get_f0(x, f0_up_key, 1, "fcpe")
# printt("using crepe,device:%s"%self.device)
f0, pd = torchcrepe.predict(
x.unsqueeze(0).float(),
16000,
160,
self.f0_min,
self.f0_max,
"full",
batch_size=512,
# device=self.device if self.device.type!="privateuseone" else "cpu",###crepe不用半精度全部是全精度所以不愁###cpu延迟高到没法用
device=self.device,
return_periodicity=True,
)
pd = torchcrepe.filter.median(pd, 3)
f0 = torchcrepe.filter.mean(f0, 3)
f0[pd < 0.1] = 0
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
def get_f0_rmvpe(self, x, f0_up_key):
if hasattr(self, "model_rmvpe") == False:
from infer.lib.rmvpe import RMVPE
printt("Loading rmvpe model")
self.model_rmvpe = RMVPE(
"assets/rmvpe/rmvpe.pt",
is_half=self.is_half,
device=self.device,
use_jit=self.config.use_jit,
)
f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
def get_f0_fcpe(self, x, f0_up_key):
if hasattr(self, "model_fcpe") == False:
from torchfcpe import spawn_bundled_infer_model
printt("Loading fcpe model")
if "privateuseone" in str(self.device):
self.device_fcpe = "cpu"
else:
self.device_fcpe = self.device
self.model_fcpe = spawn_bundled_infer_model(self.device_fcpe)
f0 = self.model_fcpe.infer(
x.to(self.device_fcpe).unsqueeze(0).float(),
sr=16000,
decoder_mode="local_argmax",
threshold=0.006,
)
f0 *= pow(2, f0_up_key / 12)
return self.get_f0_post(f0)
def infer(
self,
input_wav: torch.Tensor,
block_frame_16k,
skip_head,
return_length,
f0method,
) -> np.ndarray:
t1 = ttime()
with torch.no_grad():
if self.config.is_half:
feats = input_wav.half().view(1, -1)
else:
feats = input_wav.float().view(1, -1)
padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
inputs = {
"source": feats,
"padding_mask": padding_mask,
"output_layer": 9 if self.version == "v1" else 12,
}
logits = self.model.extract_features(**inputs)
feats = (
self.model.final_proj(logits[0]) if self.version == "v1" else logits[0]
)
feats = torch.cat((feats, feats[:, -1:, :]), 1)
t2 = ttime()
try:
if hasattr(self, "index") and self.index_rate != 0:
npy = feats[0][skip_head // 2 :].cpu().numpy().astype("float32")
score, ix = self.index.search(npy, k=8)
if (ix >= 0).all():
weight = np.square(1 / score)
weight /= weight.sum(axis=1, keepdims=True)
npy = np.sum(
self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1
)
if self.config.is_half:
npy = npy.astype("float16")
feats[0][skip_head // 2 :] = (
torch.from_numpy(npy).unsqueeze(0).to(self.device)
* self.index_rate
+ (1 - self.index_rate) * feats[0][skip_head // 2 :]
)
else:
printt(
"Invalid index. You MUST use added_xxxx.index but not trained_xxxx.index!"
)
else:
printt("Index search FAILED or disabled")
except:
traceback.print_exc()
printt("Index search FAILED")
t3 = ttime()
p_len = input_wav.shape[0] // 160
if self.if_f0 == 1:
f0_extractor_frame = block_frame_16k + 800
if f0method == "rmvpe":
f0_extractor_frame = 5120 * ((f0_extractor_frame - 1) // 5120 + 1) - 160
pitch, pitchf = self.get_f0(
input_wav[-f0_extractor_frame:], self.f0_up_key, self.n_cpu, f0method
)
shift = block_frame_16k // 160
self.cache_pitch[:-shift] = self.cache_pitch[shift:].clone()
self.cache_pitchf[:-shift] = self.cache_pitchf[shift:].clone()
self.cache_pitch[4 - pitch.shape[0] :] = pitch[3:-1]
self.cache_pitchf[4 - pitch.shape[0] :] = pitchf[3:-1]
cache_pitch = self.cache_pitch[None, -p_len:]
cache_pitchf = self.cache_pitchf[None, -p_len:]
t4 = ttime()
feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
feats = feats[:, :p_len, :]
p_len = torch.LongTensor([p_len]).to(self.device)
sid = torch.LongTensor([0]).to(self.device)
skip_head = torch.LongTensor([skip_head])
return_length = torch.LongTensor([return_length])
with torch.no_grad():
if self.if_f0 == 1:
infered_audio, _, _ = self.net_g.infer(
feats,
p_len,
cache_pitch,
cache_pitchf,
sid,
skip_head,
return_length,
)
else:
infered_audio, _, _ = self.net_g.infer(
feats, p_len, sid, skip_head, return_length
)
t5 = ttime()
printt(
"Spent time: fea = %.3fs, index = %.3fs, f0 = %.3fs, model = %.3fs",
t2 - t1,
t3 - t2,
t4 - t3,
t5 - t4,
)
return infered_audio.squeeze().float()
"""
TorchGating is a PyTorch-based implementation of Spectral Gating
================================================
Author: Asaf Zorea
Contents
--------
torchgate imports all the functions from PyTorch, and in addition provides:
TorchGating --- A PyTorch module that applies a spectral gate to an input signal
"""
from .torchgate import TorchGate
import torch
from infer.lib.rmvpe import STFT
from torch.nn.functional import conv1d, conv2d
from typing import Union, Optional
from .utils import linspace, temperature_sigmoid, amp_to_db
class TorchGate(torch.nn.Module):
"""
A PyTorch module that applies a spectral gate to an input signal.
Arguments:
sr {int} -- Sample rate of the input signal.
nonstationary {bool} -- Whether to use non-stationary or stationary masking (default: {False}).
n_std_thresh_stationary {float} -- Number of standard deviations above mean to threshold noise for
stationary masking (default: {1.5}).
n_thresh_nonstationary {float} -- Number of multiplies above smoothed magnitude spectrogram. for
non-stationary masking (default: {1.3}).
temp_coeff_nonstationary {float} -- Temperature coefficient for non-stationary masking (default: {0.1}).
n_movemean_nonstationary {int} -- Number of samples for moving average smoothing in non-stationary masking
(default: {20}).
prop_decrease {float} -- Proportion to decrease signal by where the mask is zero (default: {1.0}).
n_fft {int} -- Size of FFT for STFT (default: {1024}).
win_length {[int]} -- Window length for STFT. If None, defaults to `n_fft` (default: {None}).
hop_length {[int]} -- Hop length for STFT. If None, defaults to `win_length` // 4 (default: {None}).
freq_mask_smooth_hz {float} -- Frequency smoothing width for mask (in Hz). If None, no smoothing is applied
(default: {500}).
time_mask_smooth_ms {float} -- Time smoothing width for mask (in ms). If None, no smoothing is applied
(default: {50}).
"""
@torch.no_grad()
def __init__(
self,
sr: int,
nonstationary: bool = False,
n_std_thresh_stationary: float = 1.5,
n_thresh_nonstationary: float = 1.3,
temp_coeff_nonstationary: float = 0.1,
n_movemean_nonstationary: int = 20,
prop_decrease: float = 1.0,
n_fft: int = 1024,
win_length: bool = None,
hop_length: int = None,
freq_mask_smooth_hz: float = 500,
time_mask_smooth_ms: float = 50,
):
super().__init__()
# General Params
self.sr = sr
self.nonstationary = nonstationary
assert 0.0 <= prop_decrease <= 1.0
self.prop_decrease = prop_decrease
# STFT Params
self.n_fft = n_fft
self.win_length = self.n_fft if win_length is None else win_length
self.hop_length = self.win_length // 4 if hop_length is None else hop_length
# Stationary Params
self.n_std_thresh_stationary = n_std_thresh_stationary
# Non-Stationary Params
self.temp_coeff_nonstationary = temp_coeff_nonstationary
self.n_movemean_nonstationary = n_movemean_nonstationary
self.n_thresh_nonstationary = n_thresh_nonstationary
# Smooth Mask Params
self.freq_mask_smooth_hz = freq_mask_smooth_hz
self.time_mask_smooth_ms = time_mask_smooth_ms
self.register_buffer("smoothing_filter", self._generate_mask_smoothing_filter())
@torch.no_grad()
def _generate_mask_smoothing_filter(self) -> Union[torch.Tensor, None]:
"""
A PyTorch module that applies a spectral gate to an input signal using the STFT.
Returns:
smoothing_filter (torch.Tensor): a 2D tensor representing the smoothing filter,
with shape (n_grad_freq, n_grad_time), where n_grad_freq is the number of frequency
bins to smooth and n_grad_time is the number of time frames to smooth.
If both self.freq_mask_smooth_hz and self.time_mask_smooth_ms are None, returns None.
"""
if self.freq_mask_smooth_hz is None and self.time_mask_smooth_ms is None:
return None
n_grad_freq = (
1
if self.freq_mask_smooth_hz is None
else int(self.freq_mask_smooth_hz / (self.sr / (self.n_fft / 2)))
)
if n_grad_freq < 1:
raise ValueError(
f"freq_mask_smooth_hz needs to be at least {int((self.sr / (self._n_fft / 2)))} Hz"
)
n_grad_time = (
1
if self.time_mask_smooth_ms is None
else int(self.time_mask_smooth_ms / ((self.hop_length / self.sr) * 1000))
)
if n_grad_time < 1:
raise ValueError(
f"time_mask_smooth_ms needs to be at least {int((self.hop_length / self.sr) * 1000)} ms"
)
if n_grad_time == 1 and n_grad_freq == 1:
return None
v_f = torch.cat(
[
linspace(0, 1, n_grad_freq + 1, endpoint=False),
linspace(1, 0, n_grad_freq + 2),
]
)[1:-1]
v_t = torch.cat(
[
linspace(0, 1, n_grad_time + 1, endpoint=False),
linspace(1, 0, n_grad_time + 2),
]
)[1:-1]
smoothing_filter = torch.outer(v_f, v_t).unsqueeze(0).unsqueeze(0)
return smoothing_filter / smoothing_filter.sum()
@torch.no_grad()
def _stationary_mask(
self, X_db: torch.Tensor, xn: Optional[torch.Tensor] = None
) -> torch.Tensor:
"""
Computes a stationary binary mask to filter out noise in a log-magnitude spectrogram.
Arguments:
X_db (torch.Tensor): 2D tensor of shape (frames, freq_bins) containing the log-magnitude spectrogram.
xn (torch.Tensor): 1D tensor containing the audio signal corresponding to X_db.
Returns:
sig_mask (torch.Tensor): Binary mask of the same shape as X_db, where values greater than the threshold
are set to 1, and the rest are set to 0.
"""
if xn is not None:
if "privateuseone" in str(xn.device):
if not hasattr(self, "stft"):
self.stft = STFT(
filter_length=self.n_fft,
hop_length=self.hop_length,
win_length=self.win_length,
window="hann",
).to(xn.device)
XN = self.stft.transform(xn)
else:
XN = torch.stft(
xn,
n_fft=self.n_fft,
hop_length=self.hop_length,
win_length=self.win_length,
return_complex=True,
pad_mode="constant",
center=True,
window=torch.hann_window(self.win_length).to(xn.device),
)
XN_db = amp_to_db(XN).to(dtype=X_db.dtype)
else:
XN_db = X_db
# calculate mean and standard deviation along the frequency axis
std_freq_noise, mean_freq_noise = torch.std_mean(XN_db, dim=-1)
# compute noise threshold
noise_thresh = mean_freq_noise + std_freq_noise * self.n_std_thresh_stationary
# create binary mask by thresholding the spectrogram
sig_mask = X_db > noise_thresh.unsqueeze(2)
return sig_mask
@torch.no_grad()
def _nonstationary_mask(self, X_abs: torch.Tensor) -> torch.Tensor:
"""
Computes a non-stationary binary mask to filter out noise in a log-magnitude spectrogram.
Arguments:
X_abs (torch.Tensor): 2D tensor of shape (frames, freq_bins) containing the magnitude spectrogram.
Returns:
sig_mask (torch.Tensor): Binary mask of the same shape as X_abs, where values greater than the threshold
are set to 1, and the rest are set to 0.
"""
X_smoothed = (
conv1d(
X_abs.reshape(-1, 1, X_abs.shape[-1]),
torch.ones(
self.n_movemean_nonstationary,
dtype=X_abs.dtype,
device=X_abs.device,
).view(1, 1, -1),
padding="same",
).view(X_abs.shape)
/ self.n_movemean_nonstationary
)
# Compute slowness ratio and apply temperature sigmoid
slowness_ratio = (X_abs - X_smoothed) / (X_smoothed + 1e-6)
sig_mask = temperature_sigmoid(
slowness_ratio, self.n_thresh_nonstationary, self.temp_coeff_nonstationary
)
return sig_mask
def forward(
self, x: torch.Tensor, xn: Optional[torch.Tensor] = None
) -> torch.Tensor:
"""
Apply the proposed algorithm to the input signal.
Arguments:
x (torch.Tensor): The input audio signal, with shape (batch_size, signal_length).
xn (Optional[torch.Tensor]): The noise signal used for stationary noise reduction. If `None`, the input
signal is used as the noise signal. Default: `None`.
Returns:
torch.Tensor: The denoised audio signal, with the same shape as the input signal.
"""
# Compute short-time Fourier transform (STFT)
if "privateuseone" in str(x.device):
if not hasattr(self, "stft"):
self.stft = STFT(
filter_length=self.n_fft,
hop_length=self.hop_length,
win_length=self.win_length,
window="hann",
).to(x.device)
X, phase = self.stft.transform(x, return_phase=True)
else:
X = torch.stft(
x,
n_fft=self.n_fft,
hop_length=self.hop_length,
win_length=self.win_length,
return_complex=True,
pad_mode="constant",
center=True,
window=torch.hann_window(self.win_length).to(x.device),
)
# Compute signal mask based on stationary or nonstationary assumptions
if self.nonstationary:
sig_mask = self._nonstationary_mask(X.abs())
else:
sig_mask = self._stationary_mask(amp_to_db(X), xn)
# Propagate decrease in signal power
sig_mask = self.prop_decrease * (sig_mask.float() - 1.0) + 1.0
# Smooth signal mask with 2D convolution
if self.smoothing_filter is not None:
sig_mask = conv2d(
sig_mask.unsqueeze(1),
self.smoothing_filter.to(sig_mask.dtype),
padding="same",
)
# Apply signal mask to STFT magnitude and phase components
Y = X * sig_mask.squeeze(1)
# Inverse STFT to obtain time-domain signal
if "privateuseone" in str(Y.device):
y = self.stft.inverse(Y, phase)
else:
y = torch.istft(
Y,
n_fft=self.n_fft,
hop_length=self.hop_length,
win_length=self.win_length,
center=True,
window=torch.hann_window(self.win_length).to(Y.device),
)
return y.to(dtype=x.dtype)
import torch
from torch.types import Number
@torch.no_grad()
def amp_to_db(
x: torch.Tensor, eps=torch.finfo(torch.float64).eps, top_db=40
) -> torch.Tensor:
"""
Convert the input tensor from amplitude to decibel scale.
Arguments:
x {[torch.Tensor]} -- [Input tensor.]
Keyword Arguments:
eps {[float]} -- [Small value to avoid numerical instability.]
(default: {torch.finfo(torch.float64).eps})
top_db {[float]} -- [threshold the output at ``top_db`` below the peak]
` (default: {40})
Returns:
[torch.Tensor] -- [Output tensor in decibel scale.]
"""
x_db = 20 * torch.log10(x.abs() + eps)
return torch.max(x_db, (x_db.max(-1).values - top_db).unsqueeze(-1))
@torch.no_grad()
def temperature_sigmoid(x: torch.Tensor, x0: float, temp_coeff: float) -> torch.Tensor:
"""
Apply a sigmoid function with temperature scaling.
Arguments:
x {[torch.Tensor]} -- [Input tensor.]
x0 {[float]} -- [Parameter that controls the threshold of the sigmoid.]
temp_coeff {[float]} -- [Parameter that controls the slope of the sigmoid.]
Returns:
[torch.Tensor] -- [Output tensor after applying the sigmoid with temperature scaling.]
"""
return torch.sigmoid((x - x0) / temp_coeff)
@torch.no_grad()
def linspace(
start: Number, stop: Number, num: int = 50, endpoint: bool = True, **kwargs
) -> torch.Tensor:
"""
Generate a linearly spaced 1-D tensor.
Arguments:
start {[Number]} -- [The starting value of the sequence.]
stop {[Number]} -- [The end value of the sequence, unless `endpoint` is set to False.
In that case, the sequence consists of all but the last of ``num + 1``
evenly spaced samples, so that `stop` is excluded. Note that the step
size changes when `endpoint` is False.]
Keyword Arguments:
num {[int]} -- [Number of samples to generate. Default is 50. Must be non-negative.]
endpoint {[bool]} -- [If True, `stop` is the last sample. Otherwise, it is not included.
Default is True.]
**kwargs -- [Additional arguments to be passed to the underlying PyTorch `linspace` function.]
Returns:
[torch.Tensor] -- [1-D tensor of `num` equally spaced samples from `start` to `stop`.]
"""
if endpoint:
return torch.linspace(start, stop, num, **kwargs)
else:
return torch.linspace(start, stop, num + 1, **kwargs)[:-1]
#!/bin/sh
python3.8 -m venv .venv
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment