Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
991b8e65
Commit
991b8e65
authored
Apr 11, 2019
by
thomwolf
Browse files
Merge branch 'master' of
https://github.com/huggingface/pytorch-pretrained-BERT
parents
e99b2014
94980b52
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
27 additions
and
7 deletions
+27
-7
examples/lm_finetuning/pregenerate_training_data.py
examples/lm_finetuning/pregenerate_training_data.py
+1
-1
pytorch_pretrained_bert/modeling_gpt2.py
pytorch_pretrained_bert/modeling_gpt2.py
+11
-2
pytorch_pretrained_bert/modeling_openai.py
pytorch_pretrained_bert/modeling_openai.py
+11
-2
pytorch_pretrained_bert/optimization.py
pytorch_pretrained_bert/optimization.py
+2
-1
pytorch_pretrained_bert/optimization_openai.py
pytorch_pretrained_bert/optimization_openai.py
+2
-1
No files found.
examples/lm_finetuning/pregenerate_training_data.py
View file @
991b8e65
...
@@ -49,7 +49,7 @@ class DocumentDatabase:
...
@@ -49,7 +49,7 @@ class DocumentDatabase:
self
.
_precalculate_doc_weights
()
self
.
_precalculate_doc_weights
()
rand_start
=
self
.
doc_cumsum
[
current_idx
]
rand_start
=
self
.
doc_cumsum
[
current_idx
]
rand_end
=
rand_start
+
self
.
cumsum_max
-
self
.
doc_lengths
[
current_idx
]
rand_end
=
rand_start
+
self
.
cumsum_max
-
self
.
doc_lengths
[
current_idx
]
sentence_index
=
randint
(
rand_start
,
rand_end
)
%
self
.
cumsum_max
sentence_index
=
randint
(
rand_start
,
rand_end
-
1
)
%
self
.
cumsum_max
sampled_doc_index
=
np
.
searchsorted
(
self
.
doc_cumsum
,
sentence_index
,
side
=
'right'
)
sampled_doc_index
=
np
.
searchsorted
(
self
.
doc_cumsum
,
sentence_index
,
side
=
'right'
)
else
:
else
:
# If we don't use sentence weighting, then every doc has an equal chance to be chosen
# If we don't use sentence weighting, then every doc has an equal chance to be chosen
...
...
pytorch_pretrained_bert/modeling_gpt2.py
View file @
991b8e65
...
@@ -617,8 +617,14 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
...
@@ -617,8 +617,14 @@ class GPT2LMHeadModel(GPT2PreTrainedModel):
hidden_states
,
presents
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
,
past
)
hidden_states
,
presents
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
,
past
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
if
lm_labels
is
not
None
:
if
lm_labels
is
not
None
:
# Shift so that tokens < n predict n
shift_logits
=
lm_logits
[:,
:
-
1
].
contiguous
()
shift_labels
=
lm_labels
[:,
1
:].
contiguous
()
# Flatten the tokens
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
loss
=
loss_fct
(
lm_logits
.
view
(
-
1
,
lm_logits
.
size
(
-
1
)),
lm_labels
.
view
(
-
1
))
loss
=
loss_fct
(
shift_logits
.
view
(
-
1
,
shift_logits
.
size
(
-
1
)),
shift_labels
.
view
(
-
1
))
return
loss
return
loss
return
lm_logits
,
presents
return
lm_logits
,
presents
...
@@ -690,8 +696,11 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
...
@@ -690,8 +696,11 @@ class GPT2DoubleHeadsModel(GPT2PreTrainedModel):
mc_logits
=
self
.
multiple_choice_head
(
hidden_states
,
mc_token_ids
)
mc_logits
=
self
.
multiple_choice_head
(
hidden_states
,
mc_token_ids
)
losses
=
[]
losses
=
[]
if
lm_labels
is
not
None
:
if
lm_labels
is
not
None
:
shift_logits
=
lm_logits
[:,
:
-
1
].
contiguous
()
shift_labels
=
lm_labels
[:,
1
:].
contiguous
()
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
losses
.
append
(
loss_fct
(
lm_logits
.
view
(
-
1
,
lm_logits
.
size
(
-
1
)),
lm_labels
.
view
(
-
1
)))
losses
.
append
(
loss_fct
(
shift_logits
.
view
(
-
1
,
shift_logits
.
size
(
-
1
)),
shift_labels
.
view
(
-
1
)))
if
mc_labels
is
not
None
:
if
mc_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
()
loss_fct
=
CrossEntropyLoss
()
losses
.
append
(
loss_fct
(
mc_logits
.
view
(
-
1
,
mc_logits
.
size
(
-
1
)),
mc_labels
.
view
(
-
1
)))
losses
.
append
(
loss_fct
(
mc_logits
.
view
(
-
1
,
mc_logits
.
size
(
-
1
)),
mc_labels
.
view
(
-
1
)))
...
...
pytorch_pretrained_bert/modeling_openai.py
View file @
991b8e65
...
@@ -714,8 +714,14 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
...
@@ -714,8 +714,14 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
hidden_states
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
)
hidden_states
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
if
lm_labels
is
not
None
:
if
lm_labels
is
not
None
:
# Shift so that tokens < n predict n
shift_logits
=
lm_logits
[:,
:
-
1
].
contiguous
()
shift_labels
=
lm_labels
[:,
1
:].
contiguous
()
# Flatten the tokens
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
loss
=
loss_fct
(
lm_logits
.
view
(
-
1
,
lm_logits
.
size
(
-
1
)),
lm_labels
.
view
(
-
1
))
loss
=
loss_fct
(
shift_logits
.
view
(
-
1
,
shift_logits
.
size
(
-
1
)),
shift_labels
.
view
(
-
1
))
return
loss
return
loss
return
lm_logits
return
lm_logits
...
@@ -801,8 +807,11 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
...
@@ -801,8 +807,11 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
mc_logits
=
self
.
multiple_choice_head
(
hidden_states
,
mc_token_ids
)
mc_logits
=
self
.
multiple_choice_head
(
hidden_states
,
mc_token_ids
)
losses
=
[]
losses
=
[]
if
lm_labels
is
not
None
:
if
lm_labels
is
not
None
:
shift_logits
=
lm_logits
[:,
:
-
1
].
contiguous
()
shift_labels
=
lm_labels
[:,
1
:].
contiguous
()
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
losses
.
append
(
loss_fct
(
lm_logits
.
view
(
-
1
,
lm_logits
.
size
(
-
1
)),
lm_labels
.
view
(
-
1
)))
losses
.
append
(
loss_fct
(
shift_logits
.
view
(
-
1
,
shift_logits
.
size
(
-
1
)),
shift_labels
.
view
(
-
1
)))
if
mc_labels
is
not
None
:
if
mc_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
()
loss_fct
=
CrossEntropyLoss
()
losses
.
append
(
loss_fct
(
mc_logits
.
view
(
-
1
,
mc_logits
.
size
(
-
1
)),
mc_labels
.
view
(
-
1
)))
losses
.
append
(
loss_fct
(
mc_logits
.
view
(
-
1
,
mc_logits
.
size
(
-
1
)),
mc_labels
.
view
(
-
1
)))
...
...
pytorch_pretrained_bert/optimization.py
View file @
991b8e65
...
@@ -26,7 +26,8 @@ logger = logging.getLogger(__name__)
...
@@ -26,7 +26,8 @@ logger = logging.getLogger(__name__)
def
warmup_cosine
(
x
,
warmup
=
0.002
):
def
warmup_cosine
(
x
,
warmup
=
0.002
):
if
x
<
warmup
:
if
x
<
warmup
:
return
x
/
warmup
return
x
/
warmup
return
0.5
*
(
1.0
+
torch
.
cos
(
math
.
pi
*
x
))
x_
=
(
x
-
warmup
)
/
(
1
-
warmup
)
# progress after warmup -
return
0.5
*
(
1.
+
math
.
cos
(
math
.
pi
*
x_
))
def
warmup_constant
(
x
,
warmup
=
0.002
):
def
warmup_constant
(
x
,
warmup
=
0.002
):
""" Linearly increases learning rate over `warmup`*`t_total` (as provided to BertAdam) training steps.
""" Linearly increases learning rate over `warmup`*`t_total` (as provided to BertAdam) training steps.
...
...
pytorch_pretrained_bert/optimization_openai.py
View file @
991b8e65
...
@@ -26,7 +26,8 @@ logger = logging.getLogger(__name__)
...
@@ -26,7 +26,8 @@ logger = logging.getLogger(__name__)
def
warmup_cosine
(
x
,
warmup
=
0.002
):
def
warmup_cosine
(
x
,
warmup
=
0.002
):
if
x
<
warmup
:
if
x
<
warmup
:
return
x
/
warmup
return
x
/
warmup
return
0.5
*
(
1.0
+
torch
.
cos
(
math
.
pi
*
x
))
x_
=
(
x
-
warmup
)
/
(
1
-
warmup
)
# progress after warmup
return
0.5
*
(
1.
+
math
.
cos
(
math
.
pi
*
x_
))
def
warmup_constant
(
x
,
warmup
=
0.002
):
def
warmup_constant
(
x
,
warmup
=
0.002
):
""" Linearly increases learning rate over `warmup`*`t_total` (as provided to OpenAIAdam) training steps.
""" Linearly increases learning rate over `warmup`*`t_total` (as provided to OpenAIAdam) training steps.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment