Unverified Commit a14b055b authored by Albert Villanova del Moral's avatar Albert Villanova del Moral Committed by GitHub
Browse files

Pass datasets trust_remote_code (#31406)

* Pass datasets trust_remote_code

* Pass trust_remote_code in more tests

* Add trust_remote_dataset_code arg to some tests

* Revert "Temporarily pin datasets upper version to fix CI"

This reverts commit b7672826.

* Pass trust_remote_code in librispeech_asr_dummy docstrings

* Revert "Pin datasets<2.20.0 for examples"

This reverts commit 833fc17a.

* Pass trust_remote_code to all examples

* Revert "Add trust_remote_dataset_code arg to some tests" to research_projects

* Pass trust_remote_code to tests

* Pass trust_remote_code to docstrings

* Fix flax examples tests requirements

* Pass trust_remote_dataset_code arg to tests

* Replace trust_remote_dataset_code with trust_remote_code in one example

* Fix duplicate trust_remote_code

* Replace args.trust_remote_dataset_code with args.trust_remote_code

* Replace trust_remote_dataset_code with trust_remote_code in parser

* Replace trust_remote_dataset_code with trust_remote_code in dataclasses

* Replace trust_remote_dataset_code with trust_remote_code arg
parent 485fd814
......@@ -1590,7 +1590,7 @@ class WhisperModel(WhisperPreTrainedModel):
>>> model = WhisperModel.from_pretrained("openai/whisper-base")
>>> feature_extractor = AutoFeatureExtractor.from_pretrained("openai/whisper-base")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
>>> inputs = feature_extractor(ds[0]["audio"]["array"], return_tensors="pt")
>>> input_features = inputs.input_features
>>> decoder_input_ids = torch.tensor([[1, 1]]) * model.config.decoder_start_token_id
......@@ -1731,7 +1731,7 @@ class WhisperForConditionalGeneration(WhisperGenerationMixin, WhisperPreTrainedM
>>> processor = AutoProcessor.from_pretrained("openai/whisper-tiny.en")
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
>>> inputs = processor(ds[0]["audio"]["array"], return_tensors="pt")
>>> input_features = inputs.input_features
......@@ -1983,7 +1983,7 @@ class WhisperForCausalLM(WhisperPreTrainedModel):
>>> assistant_model = WhisperForCausalLM.from_pretrained("distil-whisper/distil-large-v2")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True)
>>> sample = ds[0]["audio"]
>>> input_features = processor(
... sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt"
......
......@@ -385,7 +385,7 @@ PT_SPEECH_BASE_MODEL_SAMPLE = r"""
>>> import torch
>>> from datasets import load_dataset
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
......@@ -411,7 +411,7 @@ PT_SPEECH_CTC_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
......@@ -446,7 +446,7 @@ PT_SPEECH_SEQ_CLASS_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
......@@ -482,7 +482,7 @@ PT_SPEECH_FRAME_CLASS_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
......@@ -511,7 +511,7 @@ PT_SPEECH_XVECTOR_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
......@@ -546,7 +546,7 @@ PT_VISION_BASE_MODEL_SAMPLE = r"""
>>> import torch
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image")
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
......@@ -571,7 +571,7 @@ PT_VISION_SEQ_CLASS_SAMPLE = r"""
>>> import torch
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image")
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
......@@ -803,7 +803,7 @@ TF_SPEECH_BASE_MODEL_SAMPLE = r"""
>>> from transformers import AutoProcessor, {model_class}
>>> from datasets import load_dataset
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
......@@ -828,7 +828,7 @@ TF_SPEECH_CTC_SAMPLE = r"""
>>> from datasets import load_dataset
>>> import tensorflow as tf
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation", trust_remote_code=True)
>>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate
......@@ -863,7 +863,7 @@ TF_VISION_BASE_MODEL_SAMPLE = r"""
>>> from transformers import AutoImageProcessor, {model_class}
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image")
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
......@@ -886,7 +886,7 @@ TF_VISION_SEQ_CLASS_SAMPLE = r"""
>>> import tensorflow as tf
>>> from datasets import load_dataset
>>> dataset = load_dataset("huggingface/cats-image")
>>> dataset = load_dataset("huggingface/cats-image", trust_remote_code=True)
>>> image = dataset["test"]["image"][0]
>>> image_processor = AutoImageProcessor.from_pretrained("{checkpoint}")
......
......@@ -128,9 +128,9 @@ class ModelArguments:
default=False,
metadata={
"help": (
"Whether or not to allow for custom models defined on the Hub in their own modeling files. This option "
"should only be set to `True` for repositories you trust and in which you have read the code, as it will "
"execute code present on the Hub on your local machine."
"Whether to trust the execution of code from datasets/models defined on the Hub."
" This option should only be set to `True` for repositories you trust and in which you have read the"
" code, as it will execute code present on the Hub on your local machine."
)
},
)
......@@ -274,7 +274,11 @@ def main():
# download the dataset.
if data_args.dataset_name is not None:
# Downloading and loading a dataset from the hub.
raw_datasets = load_dataset(data_args.dataset_name, data_args.dataset_config_name)
raw_datasets = load_dataset(
data_args.dataset_name,
data_args.dataset_config_name,
trust_remote_code=model_args.trust_remote_code,
)
else:
data_files = {}
if data_args.train_file is not None:
......@@ -568,6 +572,15 @@ def parse_args():
default=None,
help= "The configuration name of the dataset to use (via the datasets library).",
)
parser.add_argument(
"--trust_remote_code",
action="store_true",
help=(
"Whether to trust the execution of code from datasets/models defined on the Hub."
" This option should only be set to `True` for repositories you trust and in which you have read the"
" code, as it will execute code present on the Hub on your local machine."
),
)
parser.add_argument(
"--train_file", type=str, default=None, help="A csv or a json file containing the training data."
)
......@@ -725,7 +738,9 @@ def main():
# download the dataset.
if args.dataset_name is not None:
# Downloading and loading a dataset from the hub.
raw_datasets = load_dataset(args.dataset_name, args.dataset_config_name)
raw_datasets = load_dataset(
args.dataset_name, args.dataset_config_name, trust_remote_code=args.trust_remote_code
)
else:
data_files = {}
if args.train_file is not None:
......
......@@ -269,6 +269,7 @@ def make_task_cmds():
"img_clas": f"""
{scripts_dir}/image-classification/run_image_classification.py
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--trust_remote_code
--remove_unused_columns False
--max_steps 10
--image_processor_name {DS_TESTS_DIRECTORY}/vit_feature_extractor.json
......
......@@ -153,7 +153,9 @@ class ASTFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.Test
def _load_datasamples(self, num_samples):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
......@@ -96,7 +96,7 @@ class BeitImageProcessingTester(unittest.TestCase):
def prepare_semantic_single_inputs():
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
dataset = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = Image.open(dataset[0]["file"])
map = Image.open(dataset[1]["file"])
......@@ -105,7 +105,7 @@ def prepare_semantic_single_inputs():
def prepare_semantic_batch_inputs():
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image1 = Image.open(ds[0]["file"])
map1 = Image.open(ds[1]["file"])
......
......@@ -484,7 +484,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = Image.open(ds[0]["file"])
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
......@@ -527,7 +527,7 @@ class BeitModelIntegrationTest(unittest.TestCase):
image_processor = BeitImageProcessor(do_resize=True, size=640, do_center_crop=False)
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test", trust_remote_code=True)
image = Image.open(ds[0]["file"])
inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
......
......@@ -123,7 +123,7 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
- https://huggingface.co/bigscience/tokenizer/
"""
tokenizer = self.get_rust_tokenizer()
ds = load_dataset("xnli", "all_languages", split="test", streaming=True)
ds = load_dataset("facebook/xnli", "all_languages", split="test", streaming=True)
sample_data = next(iter(ds))["premise"] # pick up one data
input_text = list(sample_data.values())
......
......@@ -164,7 +164,9 @@ class ClapFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.Tes
# Copied from tests.models.whisper.test_feature_extraction_whisper.WhisperFeatureExtractionTest._load_datasamples
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
......@@ -665,7 +665,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
"repeat": 0.0023,
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
audio_sample = librispeech_dummy[-1]
model_id = "laion/clap-htsat-unfused"
......@@ -692,7 +694,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
"pad": -0.000379,
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
audio_sample = librispeech_dummy[-1]
model_id = "laion/clap-htsat-fused"
......@@ -719,7 +723,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
"pad": 0.0006,
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
audio_samples = [sample["array"] for sample in librispeech_dummy[0:4]["audio"]]
model_id = "laion/clap-htsat-fused"
......@@ -746,7 +752,9 @@ class ClapModelIntegrationTest(unittest.TestCase):
"pad": 0.0019,
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
audio_samples = [sample["array"] for sample in librispeech_dummy[0:4]["audio"]]
model_id = "laion/clap-htsat-unfused"
......
......@@ -209,7 +209,9 @@ class ClvpFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.Tes
self.assertTrue(pt_processed.input_features.dtype == torch.float32)
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", Audio(sampling_rate=22050))
# automatic decoding with librispeech
speech_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
......@@ -371,7 +371,9 @@ class ClvpModelForConditionalGenerationTester:
def prepare_config_and_inputs(self):
_, input_ids, attention_mask = self.clvp_encoder_tester.prepare_config_and_inputs()
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
_, audio, sr = ds.sort("id").select(range(1))[:1]["audio"][0].values()
......@@ -553,7 +555,9 @@ class ClvpModelForConditionalGenerationTest(ModelTesterMixin, unittest.TestCase)
class ClvpIntegrationTest(unittest.TestCase):
def setUp(self):
self.text = "This is an example text."
ds = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = datasets.load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
ds = ds.cast_column("audio", datasets.Audio(sampling_rate=22050))
_, self.speech_samples, self.sr = ds.sort("id").select(range(1))[:1]["audio"][0].values()
......
......@@ -493,7 +493,7 @@ class LlamaIntegrationTest(unittest.TestCase):
pyth_tokenizer = self.tokenizer
rust_tokenizer = self.rust_tokenizer
dataset = load_dataset("code_x_glue_ct_code_to_text", "go")
dataset = load_dataset("google/code_x_glue_ct_code_to_text", "go")
for item in tqdm.tqdm(dataset["validation"]):
string = item["code"]
encoded1 = pyth_tokenizer.encode(string)
......@@ -506,7 +506,7 @@ class LlamaIntegrationTest(unittest.TestCase):
self.assertEqual(decoded1, decoded2)
dataset = load_dataset("xnli", "all_languages")
dataset = load_dataset("facebook/xnli", "all_languages")
for item in tqdm.tqdm(dataset["train"]):
for string in item["premise"].values():
......
......@@ -697,7 +697,9 @@ class Data2VecAudioUtilsTest(unittest.TestCase):
@slow
class Data2VecAudioModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -706,7 +708,7 @@ class Data2VecAudioModelIntegrationTest(unittest.TestCase):
return [x["array"] for x in speech_samples]
def _load_superb(self, task, num_samples):
ds = load_dataset("anton-l/superb_dummy", task, split="test")
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
return ds[:num_samples]
......
......@@ -138,7 +138,9 @@ class EnCodecFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittest.
def _load_datasamples(self, num_samples):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
audio_samples = ds.sort("id").select(range(num_samples))[:num_samples]["audio"]
......
......@@ -462,7 +462,9 @@ class EncodecIntegrationTest(unittest.TestCase):
"1.5": [371955],
"24.0": [6659962],
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
model_id = "facebook/encodec_24khz"
model = EncodecModel.from_pretrained(model_id).to(torch_device)
......@@ -516,7 +518,9 @@ class EncodecIntegrationTest(unittest.TestCase):
"3.0": [144259, 146765, 156435, 176871, 161971],
"24.0": [1568553, 1294948, 1306190, 1464747, 1663150],
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
model_id = "facebook/encodec_48khz"
model = EncodecModel.from_pretrained(model_id).to(torch_device)
......@@ -578,7 +582,9 @@ class EncodecIntegrationTest(unittest.TestCase):
[85561, 81870, 76953, 48967, 79315, 85442, 81479, 107241],
],
}
librispeech_dummy = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
librispeech_dummy = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
model_id = "facebook/encodec_48khz"
model = EncodecModel.from_pretrained(model_id).to(torch_device)
......
......@@ -314,7 +314,7 @@ class GemmaIntegrationTest(unittest.TestCase):
pyth_tokenizer = self.tokenizer
rust_tokenizer = self.rust_tokenizer
dataset = load_dataset("code_x_glue_ct_code_to_text", "go")
dataset = load_dataset("google/code_x_glue_ct_code_to_text", "go")
for item in tqdm.tqdm(dataset["validation"]):
string = item["code"]
encoded1 = pyth_tokenizer.encode(string)
......@@ -333,7 +333,7 @@ class GemmaIntegrationTest(unittest.TestCase):
self.assertEqual(decoded1, decoded2)
dataset = load_dataset("xnli", "all_languages")
dataset = load_dataset("facebook/xnli", "all_languages")
for item in tqdm.tqdm(dataset["train"]):
for string in item["premise"].values():
......
......@@ -757,7 +757,9 @@ class HubertModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......@@ -768,7 +770,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
def _load_superb(self, task, num_samples):
from datasets import load_dataset
ds = load_dataset("anton-l/superb_dummy", task, split="test")
ds = load_dataset("anton-l/superb_dummy", task, split="test", trust_remote_code=True)
return ds[:num_samples]
......
......@@ -609,7 +609,9 @@ class TFHubertModelIntegrationTest(unittest.TestCase):
def _load_datasamples(self, num_samples):
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
ds = load_dataset(
"hf-internal-testing/librispeech_asr_dummy", "clean", split="validation", trust_remote_code=True
)
# automatic decoding with librispeech
speech_samples = ds.sort("id").filter(
lambda x: x["id"] in [f"1272-141231-000{i}" for i in range(num_samples)]
......
......@@ -103,7 +103,7 @@ class LayoutLMv2ImageProcessingTest(ImageProcessingTestMixin, unittest.TestCase)
from datasets import load_dataset
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test")
ds = load_dataset("hf-internal-testing/fixtures_docvqa", split="test", trust_remote_code=True)
image = Image.open(ds[0]["file"]).convert("RGB")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment