Commit 4998f0d5 authored by hungchiayu1's avatar hungchiayu1
Browse files

upload files

parent d16bd9c0
...@@ -4,93 +4,32 @@ ...@@ -4,93 +4,32 @@
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{ "source": [
"name": "stderr", "data = [{\"captions\": \"Rhythmic wooden table tapping overlaid with steady water pouring sound\", \"location\": \"test.wav\", \"duration\": 10.0} for _ in range(10)]"
"output_type": "stream",
"text": [
"2024-12-29 16:36:41.493367: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
"2024-12-29 16:36:41.504316: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
"2024-12-29 16:36:41.507600: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"2024-12-29 16:36:41.516354: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2024-12-29 16:36:42.242252: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n",
"/home/chiayu/miniconda3/envs/flux/lib/python3.11/site-packages/torch/nn/utils/weight_norm.py:134: FutureWarning: `torch.nn.utils.weight_norm` is deprecated in favor of `torch.nn.utils.parametrizations.weight_norm`.\n",
" WeightNorm.apply(module, name, dim)\n"
] ]
}, },
{ {
"data": { "cell_type": "code",
"application/vnd.jupyter.widget-view+json": { "execution_count": 4,
"model_id": "1577fbe2ed01403aa9bd2a71ee2c82ae",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Fetching 3 files: 0%| | 0/3 [00:00<?, ?it/s]"
]
},
"metadata": {}, "metadata": {},
"output_type": "display_data" "outputs": [],
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/chiayu/miniconda3/envs/flux/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
" warnings.warn(\n"
]
}
],
"source": [ "source": [
"import torchaudio\n", "import json\n",
"from TangoFlux import TangoFluxInference\n", "with open('data/val.json','w') as f:\n",
"from IPython.display import Audio\n", " json.dump(data,f)"
"model = TangoFluxInference(name='declare-lab/TangoFlux')\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"ename": "OSError",
"evalue": "declare-lab/TangoFlux does not appear to have a file named config.json.",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/utils/_http.py:406\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 405\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 406\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/requests/models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1024\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
"\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/declare-lab/TangoFlux/resolve/main/vae/config.json",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mEntryNotFoundError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/diffusers/configuration_utils.py:379\u001b[0m, in \u001b[0;36mConfigMixin.load_config\u001b[0;34m(cls, pretrained_model_name_or_path, return_unused_kwargs, return_commit_hash, **kwargs)\u001b[0m\n\u001b[1;32m 377\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 378\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 379\u001b[0m config_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 380\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 381\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfig_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 382\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 384\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 385\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 391\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_dir_use_symlinks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_dir_use_symlinks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 393\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RepositoryNotFoundError:\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/file_download.py:860\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m 859\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 860\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 861\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m 862\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 863\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m 864\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 865\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 866\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 867\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 868\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m 869\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 870\u001b[0m \u001b[43m \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 871\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 872\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 873\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/file_download.py:923\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m 921\u001b[0m \u001b[38;5;66;03m# Try to get metadata (etag, commit_hash, url, size) from the server.\u001b[39;00m\n\u001b[1;32m 922\u001b[0m \u001b[38;5;66;03m# If we can't, a HEAD request error is returned.\u001b[39;00m\n\u001b[0;32m--> 923\u001b[0m (url_to_download, etag, commit_hash, expected_size, head_call_error) \u001b[38;5;241m=\u001b[39m \u001b[43m_get_metadata_or_catch_error\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 924\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 925\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 926\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 927\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 928\u001b[0m \u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 929\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 930\u001b[0m \u001b[43m \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 931\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 932\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 933\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 934\u001b[0m \u001b[43m \u001b[49m\u001b[43mstorage_folder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstorage_folder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 935\u001b[0m \u001b[43m \u001b[49m\u001b[43mrelative_filename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrelative_filename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 936\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 938\u001b[0m \u001b[38;5;66;03m# etag can be None for several reasons:\u001b[39;00m\n\u001b[1;32m 939\u001b[0m \u001b[38;5;66;03m# 1. we passed local_files_only.\u001b[39;00m\n\u001b[1;32m 940\u001b[0m \u001b[38;5;66;03m# 2. we don't have a connection\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 946\u001b[0m \u001b[38;5;66;03m# If the specified revision is a commit hash, look inside \"snapshots\".\u001b[39;00m\n\u001b[1;32m 947\u001b[0m \u001b[38;5;66;03m# If the specified revision is a branch or tag, look inside \"refs\".\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/file_download.py:1374\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[0;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[1;32m 1373\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1374\u001b[0m metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1375\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\n\u001b[1;32m 1376\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1377\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/file_download.py:1294\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[0;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers)\u001b[0m\n\u001b[1;32m 1293\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[0;32m-> 1294\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1295\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHEAD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1296\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1297\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1298\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1299\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1300\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1301\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1302\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1303\u001b[0m hf_raise_for_status(r)\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/file_download.py:278\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m 277\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[0;32m--> 278\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 283\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 285\u001b[0m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/file_download.py:302\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m 301\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[0;32m--> 302\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/utils/_http.py:417\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 416\u001b[0m message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mEntry Not Found for url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 417\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m _format(EntryNotFoundError, message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 419\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGatedRepo\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
"\u001b[0;31mEntryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-67710c82-7a5511c86e25a81a6a7dc6ac;8d6f84f4-7b74-40f8-94b2-2f1aa2772b76)\n\nEntry Not Found for url: https://huggingface.co/declare-lab/TangoFlux/resolve/main/vae/config.json.",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[10], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdiffusers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoencoderOobleck\n\u001b[0;32m----> 2\u001b[0m vae \u001b[38;5;241m=\u001b[39m \u001b[43mAutoencoderOobleck\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdeclare-lab/TangoFlux\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mvae\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mhf_OxOrUcsFQmqciAYeXjAKsRvPZisETqrfYb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/diffusers/models/modeling_utils.py:612\u001b[0m, in \u001b[0;36mModelMixin.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, **kwargs)\u001b[0m\n\u001b[1;32m 605\u001b[0m user_agent \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 606\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdiffusers\u001b[39m\u001b[38;5;124m\"\u001b[39m: __version__,\n\u001b[1;32m 607\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile_type\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 608\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mframework\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpytorch\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 609\u001b[0m }\n\u001b[1;32m 611\u001b[0m \u001b[38;5;66;03m# load config\u001b[39;00m\n\u001b[0;32m--> 612\u001b[0m config, unused_kwargs, commit_hash \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_config\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 613\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 614\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 615\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_unused_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 616\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 617\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 618\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 619\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 620\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 621\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 622\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 623\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 624\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 625\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 627\u001b[0m \u001b[38;5;66;03m# Determine if we're loading from a directory of sharded checkpoints.\u001b[39;00m\n\u001b[1;32m 628\u001b[0m is_sharded \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 112\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/flux/lib/python3.11/site-packages/diffusers/configuration_utils.py:406\u001b[0m, in \u001b[0;36mConfigMixin.load_config\u001b[0;34m(cls, pretrained_model_name_or_path, return_unused_kwargs, return_commit_hash, **kwargs)\u001b[0m\n\u001b[1;32m 400\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 401\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a valid git identifier (branch name, tag name or commit id) that exists for\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 402\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m this model name. Check the model page at\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 403\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available revisions.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 404\u001b[0m )\n\u001b[1;32m 405\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError:\n\u001b[0;32m--> 406\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 407\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not appear to have a file named \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39mconfig_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 408\u001b[0m )\n\u001b[1;32m 409\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m 410\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 411\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThere was a specific connection error when trying to load\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpretrained_model_name_or_path\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00merr\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 413\u001b[0m )\n",
"\u001b[0;31mOSError\u001b[0m: declare-lab/TangoFlux does not appear to have a file named config.json."
]
}
],
"source": [ "source": [
"from diffusers import AutoencoderOobleck\n", "import torchaudio\n",
"vae = AutoencoderOobleck.from_pretrained(\"declare-lab/TangoFlux\",subfolder='vae',token='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')" "from TangoFlux import TangoFluxInference\n",
"from IPython.display import Audio\n",
"model = TangoFluxInference(name='declare-lab/TangoFlux')\n"
] ]
}, },
{ {
...@@ -145,37 +84,6 @@ ...@@ -145,37 +84,6 @@
"source": [ "source": [
"torchaudio.save('temp.wav', audio, sample_rate=44100)" "torchaudio.save('temp.wav', audio, sample_rate=44100)"
] ]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(tensor([[-7.5195e-02, 1.5591e-01, -5.4962e-02, ..., 3.0518e-05,\n",
" -3.0518e-05, 0.0000e+00],\n",
" [-7.5165e-02, 1.5591e-01, -5.4993e-02, ..., 3.0518e-05,\n",
" 0.0000e+00, 0.0000e+00]]),\n",
" 44100)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"torchaudio.load('temp.wav')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
File added
[{"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}]
\ No newline at end of file
[{"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}, {"captions": "Rhythmic wooden table tapping overlaid with steady water pouring sound", "location": "test.wav", "duration": 10.0}]
\ No newline at end of file
...@@ -430,7 +430,7 @@ class TangoFlux(nn.Module): ...@@ -430,7 +430,7 @@ class TangoFlux(nn.Module):
1, 1,
) )
loss = loss.mean() loss = loss.mean()
raw_model_loss, raw_ref_loss,implicit_acc,epsilon_diff = 0,0,0,0 ## default this to 0 if doing sft raw_model_loss, raw_ref_loss,implicit_acc = 0,0,0 ## default this to 0 if doing sft
else: else:
encoder_hidden_states = encoder_hidden_states.repeat(2, 1, 1) encoder_hidden_states = encoder_hidden_states.repeat(2, 1, 1)
...@@ -494,8 +494,7 @@ class TangoFlux(nn.Module): ...@@ -494,8 +494,7 @@ class TangoFlux(nn.Module):
epsilon_diff = torch.max(torch.zeros_like(model_losses_w),
ref_losses_w-model_losses_w).mean()
...@@ -504,8 +503,8 @@ class TangoFlux(nn.Module): ...@@ -504,8 +503,8 @@ class TangoFlux(nn.Module):
implicit_acc = (scale_term * (model_diff - ref_diff) > 0).sum().float() / inside_term.size(0) implicit_acc = (scale_term * (model_diff - ref_diff) > 0).sum().float() / inside_term.size(0)
loss = -1 * F.logsigmoid(inside_term).mean() + model_losses_w.mean() loss = -1 * F.logsigmoid(inside_term).mean() + model_losses_w.mean()
## raw_model_loss, raw_ref_loss, implicit_acc is used to help to analyze dpo behaviour.
return loss, raw_model_loss, raw_ref_loss, implicit_acc,epsilon_diff return loss, raw_model_loss, raw_ref_loss, implicit_acc
\ No newline at end of file
...@@ -367,7 +367,7 @@ def main(): ...@@ -367,7 +367,7 @@ def main():
audio_latent = unwrapped_vae.encode(audio_input).latent_dist.sample() audio_latent = unwrapped_vae.encode(audio_input).latent_dist.sample()
audio_latent = audio_latent.transpose(1,2) ## Tranpose to (bsz, seq_len, channel) audio_latent = audio_latent.transpose(1,2) ## Tranpose to (bsz, seq_len, channel)
loss, _, _,_, _ = model(audio_latent, text ,duration=duration) loss, _, _,_ = model(audio_latent, text ,duration=duration)
total_loss += loss.detach().float() total_loss += loss.detach().float()
accelerator.backward(loss) accelerator.backward(loss)
...@@ -441,7 +441,7 @@ def main(): ...@@ -441,7 +441,7 @@ def main():
audio_latent = audio_latent.transpose(1,2) ## Tranpose to (bsz, seq_len, channel) audio_latent = audio_latent.transpose(1,2) ## Tranpose to (bsz, seq_len, channel)
val_loss,_, _,_, _ = model(audio_latent, text , duration=duration) val_loss,_, _,_ = model(audio_latent, text , duration=duration)
total_val_loss += val_loss.detach().float() total_val_loss += val_loss.detach().float()
eval_progress_bar.update(1) eval_progress_bar.update(1)
......
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import torchaudio
import random
import itertools
import numpy as np
import numpy as np
def normalize_wav(waveform):
waveform = waveform - torch.mean(waveform)
waveform = waveform / (torch.max(torch.abs(waveform)) + 1e-8)
return waveform * 0.5
def pad_wav(waveform, segment_length):
waveform_length = len(waveform)
if segment_length is None or waveform_length == segment_length:
return waveform
elif waveform_length > segment_length:
return waveform[:segment_length]
else:
pad_wav = torch.zeros(segment_length - waveform_length).to(waveform.device)
waveform = torch.cat([waveform, pad_wav])
return waveform
def read_wav_file(filename, duration_sec,stereo=False):
info = torchaudio.info(filename)
sample_rate = info.sample_rate
# Calculate the number of frames corresponding to the desired duration
num_frames = int(sample_rate * duration_sec)
waveform, sr = torchaudio.load(filename,num_frames=num_frames) # Faster!!!
if stereo : ## Stereo audio
resampler = torchaudio.transforms.Resample(orig_freq=sr, new_freq=44100)
resampled_waveform = resampler(waveform)
padded_left = pad_wav(resampled_waveform[0], int(44100*duration_sec)) ## We pad left and right seperately
padded_right = pad_wav(resampled_waveform[1], int(44100*duration_sec))
return torch.stack([padded_left,padded_right])
else:
waveform = torchaudio.functional.resample(waveform, orig_freq=sr, new_freq=44100)[0]
waveform = pad_wav(waveform, int(44100*duration_sec)).unsqueeze(0)
return waveform
class DPOText2AudioDataset(Dataset):
def __init__(self, dataset, prefix, text_column, audio_w_column, audio_l_column, duration, num_examples=-1):
inputs = list(dataset[text_column])
self.inputs = [prefix + inp for inp in inputs]
self.audios_w = list(dataset[audio_w_column])
self.audios_l = list(dataset[audio_l_column])
self.durations = list(dataset[duration])
self.indices = list(range(len(self.inputs)))
self.mapper = {}
for index, audio_w, audio_l, duration, text in zip(self.indices, self.audios_w,self.audios_l,self.durations,inputs):
self.mapper[index] = [audio_w, audio_l, duration, text]
if num_examples != -1:
self.inputs, self.audios_w, self.audios_l, self.durations = self.inputs[:num_examples], self.audios_w[:num_examples], self.audios_l[:num_examples], self.durations[:num_examples]
self.indices = self.indices[:num_examples]
def __len__(self):
return len(self.inputs)
def get_num_instances(self):
return len(self.inputs)
def __getitem__(self, index):
s1, s2, s3, s4, s5 = self.inputs[index], self.audios_w[index], self.audios_l[index], self.durations[index], self.indices[index]
return s1, s2, s3, s4, s5
def collate_fn(self, data):
dat = pd.DataFrame(data)
return [dat[i].tolist() for i in dat]
class Text2AudioDataset(Dataset):
def __init__(self, dataset, prefix, text_column, audio_column, duration, num_examples=-1):
inputs = list(dataset[text_column])
self.inputs = [prefix + inp for inp in inputs]
self.audios = list(dataset[audio_column])
self.durations = list(dataset[duration])
self.indices = list(range(len(self.inputs)))
self.mapper = {}
for index, audio, duration,text in zip(self.indices, self.audios, self.durations,inputs):
self.mapper[index] = [audio, text,duration]
if num_examples != -1:
self.inputs, self.audios, self.durations = self.inputs[:num_examples], self.audios[:num_examples], self.durations[:num_examples]
self.indices = self.indices[:num_examples]
def __len__(self):
return len(self.inputs)
def get_num_instances(self):
return len(self.inputs)
def __getitem__(self, index):
s1, s2, s3, s4 = self.inputs[index], self.audios[index], self.durations[index], self.indices[index]
return s1, s2, s3, s4
def collate_fn(self, data):
dat = pd.DataFrame(data)
return [dat[i].tolist() for i in dat]
CUDA_VISISBLE_DEVICES=0,1,2,3,4,5 accelerate launch --config_file='configs/accelerator_config.yaml' src.train.py --report_to='wandb' --checkpointing_steps="best" --save_every=5 --config='tangoflux_config.yaml'
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment