Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
49e812d1
Unverified
Commit
49e812d1
authored
Jun 30, 2023
by
Stas Bekman
Committed by
GitHub
Jun 30, 2023
Browse files
[several models] improve readability (#24585)
* [modeling_clip.py] improve readability * apply to other models * fix
parent
134caef3
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
15 additions
and
15 deletions
+15
-15
src/transformers/models/align/modeling_align.py
src/transformers/models/align/modeling_align.py
+1
-1
src/transformers/models/altclip/modeling_altclip.py
src/transformers/models/altclip/modeling_altclip.py
+1
-1
src/transformers/models/blip/modeling_blip.py
src/transformers/models/blip/modeling_blip.py
+1
-1
src/transformers/models/bridgetower/modeling_bridgetower.py
src/transformers/models/bridgetower/modeling_bridgetower.py
+1
-1
src/transformers/models/chinese_clip/modeling_chinese_clip.py
...transformers/models/chinese_clip/modeling_chinese_clip.py
+1
-1
src/transformers/models/clap/modeling_clap.py
src/transformers/models/clap/modeling_clap.py
+2
-2
src/transformers/models/clip/modeling_clip.py
src/transformers/models/clip/modeling_clip.py
+1
-1
src/transformers/models/clipseg/modeling_clipseg.py
src/transformers/models/clipseg/modeling_clipseg.py
+1
-1
src/transformers/models/flava/modeling_flava.py
src/transformers/models/flava/modeling_flava.py
+1
-1
src/transformers/models/groupvit/modeling_groupvit.py
src/transformers/models/groupvit/modeling_groupvit.py
+1
-1
src/transformers/models/oneformer/modeling_oneformer.py
src/transformers/models/oneformer/modeling_oneformer.py
+1
-1
src/transformers/models/owlvit/modeling_owlvit.py
src/transformers/models/owlvit/modeling_owlvit.py
+1
-1
src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
...on_text_dual_encoder/modeling_vision_text_dual_encoder.py
+1
-1
src/transformers/models/x_clip/modeling_x_clip.py
src/transformers/models/x_clip/modeling_x_clip.py
+1
-1
No files found.
src/transformers/models/align/modeling_align.py
View file @
49e812d1
...
...
@@ -1444,7 +1444,7 @@ class AlignModel(AlignPreTrainedModel):
self
.
vision_model
=
AlignVisionModel
(
vision_config
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
)
self
.
temperature
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
temperature_init_value
)
self
.
temperature
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
temperature_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/altclip/modeling_altclip.py
View file @
49e812d1
...
...
@@ -1506,7 +1506,7 @@ class AltCLIPModel(AltCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/blip/modeling_blip.py
View file @
49e812d1
...
...
@@ -743,7 +743,7 @@ class BlipModel(BlipPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/bridgetower/modeling_bridgetower.py
View file @
49e812d1
...
...
@@ -1778,7 +1778,7 @@ class BridgeTowerForContrastiveLearning(BridgeTowerPreTrainedModel):
self
.
itc_image_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
,
config
.
contrastive_hidden_size
)
self
.
itc_cross_modal_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
*
2
,
config
.
contrastive_hidden_size
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/chinese_clip/modeling_chinese_clip.py
View file @
49e812d1
...
...
@@ -1376,7 +1376,7 @@ class ChineseCLIPModel(ChineseCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/clap/modeling_clap.py
View file @
49e812d1
...
...
@@ -1956,8 +1956,8 @@ class ClapModel(ClapPreTrainedModel):
text_config
=
config
.
text_config
audio_config
=
config
.
audio_config
self
.
logit_scale_a
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
config
.
logit_scale_init_value
))
self
.
logit_scale_t
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
config
.
logit_scale_init_value
))
self
.
logit_scale_a
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
config
.
logit_scale_init_value
))
)
self
.
logit_scale_t
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
config
.
logit_scale_init_value
))
)
self
.
projection_dim
=
config
.
projection_dim
...
...
src/transformers/models/clip/modeling_clip.py
View file @
49e812d1
...
...
@@ -977,7 +977,7 @@ class CLIPModel(CLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/clipseg/modeling_clipseg.py
View file @
49e812d1
...
...
@@ -979,7 +979,7 @@ class CLIPSegModel(CLIPSegPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/flava/modeling_flava.py
View file @
49e812d1
...
...
@@ -1229,7 +1229,7 @@ class FlavaModel(FlavaPreTrainedModel):
self
.
image_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
projection_dim
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
projection_dim
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
self
.
image_to_mm_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
mm_hidden_size
)
self
.
text_to_mm_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
mm_hidden_size
)
...
...
src/transformers/models/groupvit/modeling_groupvit.py
View file @
49e812d1
...
...
@@ -1368,7 +1368,7 @@ class GroupViTModel(GroupViTPreTrainedModel):
nn
.
ReLU
(
inplace
=
True
),
nn
.
Linear
(
self
.
projection_intermediate_dim
,
self
.
projection_dim
,
bias
=
True
),
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/oneformer/modeling_oneformer.py
View file @
49e812d1
...
...
@@ -399,7 +399,7 @@ class OneFormerLoss(nn.Module):
self
.
importance_sample_ratio
=
importance_sample_ratio
self
.
contrastive_temperature
=
contrastive_temperature
if
self
.
contrastive_temperature
is
not
None
:
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
1
/
contrastive_temperature
))
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
1
/
contrastive_temperature
))
)
def
_max_by_axis
(
self
,
the_list
:
List
[
List
[
int
]])
->
List
[
int
]:
maxes
=
the_list
[
0
]
...
...
src/transformers/models/owlvit/modeling_owlvit.py
View file @
49e812d1
...
...
@@ -1065,7 +1065,7 @@ class OwlViTModel(OwlViTPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
self
.
post_init
()
...
...
src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
View file @
49e812d1
...
...
@@ -204,7 +204,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
@
add_start_docstrings_to_model_forward
(
VISION_TEXT_DUAL_ENCODER_TEXT_INPUTS_DOCSTRING
)
def
get_text_features
(
...
...
src/transformers/models/x_clip/modeling_x_clip.py
View file @
49e812d1
...
...
@@ -1309,7 +1309,7 @@ class XCLIPModel(XCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
self
.
prompts_visual_layernorm
=
nn
.
LayerNorm
(
self
.
vision_embed_dim
,
eps
=
config
.
vision_config
.
layer_norm_eps
)
self
.
prompts_visual_projection
=
nn
.
Parameter
(
torch
.
randn
(
self
.
vision_embed_dim
,
self
.
projection_dim
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment