fix tokenizer

3f2d46a1 · patil-suraj · 7b55d334 · 3f2d46a1
Commit 3f2d46a1 authored Jun 16, 2022 by patil-suraj
Show whitespace changes
Inline Side-by-side

Showing with 15 additions and 3 deletions

src/diffusers/pipelines/grad_tts_utils.py src/diffusers/pipelines/grad_tts_utils.py +15 -3

No files found.
--- a/src/diffusers/pipelines/grad_tts_utils.py
+++ b/src/diffusers/pipelines/grad_tts_utils.py
 # tokenizer

 import re
+import os
+from shutil import copyfile

 import torch
 from transformers import PreTrainedTokenizer
@@ -325,7 +327,7 @@ def _should_keep_symbol(s):


 VOCAB_FILES_NAMES = {
-    "dict_file": "merges.txt",
+    "dict_file": "dict_file.txt",
 }

 class GradTTSTokenizer(PreTrainedTokenizer):
@@ -334,8 +336,18 @@ class GradTTSTokenizer(PreTrainedTokenizer):
    def __init__(self, dict_file, **kwargs):
        super().__init__(**kwargs)
        self.cmu = CMUDict(dict_file)
+        self.dict_file = dict_file
    
    def __call__(self, text):
        x = torch.LongTensor(intersperse(text_to_sequence(text, dictionary=self.cmu), len(symbols)))[None]
        x_lengths = torch.LongTensor([x.shape[-1]])
-        return x.shape, x_lengths
+        return x, x_lengths
+    
+    def save_vocabulary(self, save_directory: str, filename_prefix = None):
+        dict_file = os.path.join(
+            save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["dict_file"]
+        )
+
+        copyfile(self.dict_file, dict_file)
+        
+        return (dict_file, )