"...git@developer.sourcefind.cn:chenpangpang/open-webui.git" did not exist on "9bd48ffd9361f386b49820c0b1b4b292d93caa7b"
Commit 6b8d2270 authored by VictorSanh's avatar VictorSanh
Browse files

some cleaning

parent 122d5c52
...@@ -105,13 +105,10 @@ def bertModel(*args, **kwargs): ...@@ -105,13 +105,10 @@ def bertModel(*args, **kwargs):
# Prepare tokenized input # Prepare tokenized input
>>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]" >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
>>> tokenized_text = tokenizer.tokenize(text) >>> tokenized_text = tokenizer.tokenize(text)
['[CLS]', 'Who', 'was', 'Jim', 'He', '##nson', '?', '[SEP]', 'Jim', 'He', '##nson', 'was', 'a', 'puppet', '##eer', '[SEP]']
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) >>> tokens_tensor = torch.tensor([indexed_tokens])
tensor([[101, 2627, 1108, 3104, 1124, 15703, 136, 102, 3104, 1124, 15703, 1108, 170, 16797, 8284, 102]])
>>> segments_tensors = torch.tensor([segments_ids]) >>> segments_tensors = torch.tensor([segments_ids])
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]])
# Load bertModel # Load bertModel
>>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertModel', 'bert-base-cased') >>> model = torch.hub.load('huggingface/pytorch-pretrained-BERT', 'bertModel', 'bert-base-cased')
>>> model.eval() >>> model.eval()
...@@ -190,7 +187,6 @@ def bertForMaskedLM(*args, **kwargs): ...@@ -190,7 +187,6 @@ def bertForMaskedLM(*args, **kwargs):
>>> tokenized_text = tokenizer.tokenize(text) >>> tokenized_text = tokenizer.tokenize(text)
>>> masked_index = 8 >>> masked_index = 8
>>> tokenized_text[masked_index] = '[MASK]' >>> tokenized_text[masked_index] = '[MASK]'
['[CLS]', 'who', 'was', 'jim', 'henson', '?', '[SEP]', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer', '[SEP]']
>>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
>>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
>>> tokens_tensor = torch.tensor([indexed_tokens]) >>> tokens_tensor = torch.tensor([indexed_tokens])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment