Commit 165f8493 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

add fast tokenizer flag

parent 98c85d73
...@@ -70,6 +70,7 @@ class HFLM(LM): ...@@ -70,6 +70,7 @@ class HFLM(LM):
batch_size: Optional[int] = 1, batch_size: Optional[int] = 1,
low_cpu_mem_usage: Optional[bool] = True, low_cpu_mem_usage: Optional[bool] = True,
trust_remote_code: Optional[bool] = False, trust_remote_code: Optional[bool] = False,
use_fast_tokenizer: Optional[bool] = True,
# arguments used for splitting a model across GPUs naively. # arguments used for splitting a model across GPUs naively.
# only used if `parallelize=True`. # only used if `parallelize=True`.
parallelize: Optional[bool] = False, parallelize: Optional[bool] = False,
...@@ -216,6 +217,7 @@ class HFLM(LM): ...@@ -216,6 +217,7 @@ class HFLM(LM):
pretrained if tokenizer is None else tokenizer, pretrained if tokenizer is None else tokenizer,
revision=revision, revision=revision,
trust_remote_code=trust_remote_code, trust_remote_code=trust_remote_code,
use_fast=use_fast_tokenizer,
) )
self.vocab_size = self.tokenizer.vocab_size self.vocab_size = self.tokenizer.vocab_size
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment