Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
49e812d1
Unverified
Commit
49e812d1
authored
Jun 30, 2023
by
Stas Bekman
Committed by
GitHub
Jun 30, 2023
Browse files
[several models] improve readability (#24585)
* [modeling_clip.py] improve readability * apply to other models * fix
parent
134caef3
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
15 additions
and
15 deletions
+15
-15
src/transformers/models/align/modeling_align.py
src/transformers/models/align/modeling_align.py
+1
-1
src/transformers/models/altclip/modeling_altclip.py
src/transformers/models/altclip/modeling_altclip.py
+1
-1
src/transformers/models/blip/modeling_blip.py
src/transformers/models/blip/modeling_blip.py
+1
-1
src/transformers/models/bridgetower/modeling_bridgetower.py
src/transformers/models/bridgetower/modeling_bridgetower.py
+1
-1
src/transformers/models/chinese_clip/modeling_chinese_clip.py
...transformers/models/chinese_clip/modeling_chinese_clip.py
+1
-1
src/transformers/models/clap/modeling_clap.py
src/transformers/models/clap/modeling_clap.py
+2
-2
src/transformers/models/clip/modeling_clip.py
src/transformers/models/clip/modeling_clip.py
+1
-1
src/transformers/models/clipseg/modeling_clipseg.py
src/transformers/models/clipseg/modeling_clipseg.py
+1
-1
src/transformers/models/flava/modeling_flava.py
src/transformers/models/flava/modeling_flava.py
+1
-1
src/transformers/models/groupvit/modeling_groupvit.py
src/transformers/models/groupvit/modeling_groupvit.py
+1
-1
src/transformers/models/oneformer/modeling_oneformer.py
src/transformers/models/oneformer/modeling_oneformer.py
+1
-1
src/transformers/models/owlvit/modeling_owlvit.py
src/transformers/models/owlvit/modeling_owlvit.py
+1
-1
src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
...on_text_dual_encoder/modeling_vision_text_dual_encoder.py
+1
-1
src/transformers/models/x_clip/modeling_x_clip.py
src/transformers/models/x_clip/modeling_x_clip.py
+1
-1
No files found.
src/transformers/models/align/modeling_align.py
View file @
49e812d1
...
@@ -1444,7 +1444,7 @@ class AlignModel(AlignPreTrainedModel):
...
@@ -1444,7 +1444,7 @@ class AlignModel(AlignPreTrainedModel):
self
.
vision_model
=
AlignVisionModel
(
vision_config
)
self
.
vision_model
=
AlignVisionModel
(
vision_config
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
)
self
.
temperature
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
temperature_init_value
)
self
.
temperature
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
temperature_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/altclip/modeling_altclip.py
View file @
49e812d1
...
@@ -1506,7 +1506,7 @@ class AltCLIPModel(AltCLIPPreTrainedModel):
...
@@ -1506,7 +1506,7 @@ class AltCLIPModel(AltCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/blip/modeling_blip.py
View file @
49e812d1
...
@@ -743,7 +743,7 @@ class BlipModel(BlipPreTrainedModel):
...
@@ -743,7 +743,7 @@ class BlipModel(BlipPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/bridgetower/modeling_bridgetower.py
View file @
49e812d1
...
@@ -1778,7 +1778,7 @@ class BridgeTowerForContrastiveLearning(BridgeTowerPreTrainedModel):
...
@@ -1778,7 +1778,7 @@ class BridgeTowerForContrastiveLearning(BridgeTowerPreTrainedModel):
self
.
itc_image_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
,
config
.
contrastive_hidden_size
)
self
.
itc_image_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
,
config
.
contrastive_hidden_size
)
self
.
itc_cross_modal_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
*
2
,
config
.
contrastive_hidden_size
)
self
.
itc_cross_modal_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
*
2
,
config
.
contrastive_hidden_size
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/chinese_clip/modeling_chinese_clip.py
View file @
49e812d1
...
@@ -1376,7 +1376,7 @@ class ChineseCLIPModel(ChineseCLIPPreTrainedModel):
...
@@ -1376,7 +1376,7 @@ class ChineseCLIPModel(ChineseCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/clap/modeling_clap.py
View file @
49e812d1
...
@@ -1956,8 +1956,8 @@ class ClapModel(ClapPreTrainedModel):
...
@@ -1956,8 +1956,8 @@ class ClapModel(ClapPreTrainedModel):
text_config
=
config
.
text_config
text_config
=
config
.
text_config
audio_config
=
config
.
audio_config
audio_config
=
config
.
audio_config
self
.
logit_scale_a
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
config
.
logit_scale_init_value
))
self
.
logit_scale_a
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
config
.
logit_scale_init_value
))
)
self
.
logit_scale_t
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
config
.
logit_scale_init_value
))
self
.
logit_scale_t
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
config
.
logit_scale_init_value
))
)
self
.
projection_dim
=
config
.
projection_dim
self
.
projection_dim
=
config
.
projection_dim
...
...
src/transformers/models/clip/modeling_clip.py
View file @
49e812d1
...
@@ -977,7 +977,7 @@ class CLIPModel(CLIPPreTrainedModel):
...
@@ -977,7 +977,7 @@ class CLIPModel(CLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/clipseg/modeling_clipseg.py
View file @
49e812d1
...
@@ -979,7 +979,7 @@ class CLIPSegModel(CLIPSegPreTrainedModel):
...
@@ -979,7 +979,7 @@ class CLIPSegModel(CLIPSegPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/flava/modeling_flava.py
View file @
49e812d1
...
@@ -1229,7 +1229,7 @@ class FlavaModel(FlavaPreTrainedModel):
...
@@ -1229,7 +1229,7 @@ class FlavaModel(FlavaPreTrainedModel):
self
.
image_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
projection_dim
)
self
.
image_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
projection_dim
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
projection_dim
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
projection_dim
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
self
.
image_to_mm_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
mm_hidden_size
)
self
.
image_to_mm_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
mm_hidden_size
)
self
.
text_to_mm_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
mm_hidden_size
)
self
.
text_to_mm_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
mm_hidden_size
)
...
...
src/transformers/models/groupvit/modeling_groupvit.py
View file @
49e812d1
...
@@ -1368,7 +1368,7 @@ class GroupViTModel(GroupViTPreTrainedModel):
...
@@ -1368,7 +1368,7 @@ class GroupViTModel(GroupViTPreTrainedModel):
nn
.
ReLU
(
inplace
=
True
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Linear
(
self
.
projection_intermediate_dim
,
self
.
projection_dim
,
bias
=
True
),
nn
.
Linear
(
self
.
projection_intermediate_dim
,
self
.
projection_dim
,
bias
=
True
),
)
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/oneformer/modeling_oneformer.py
View file @
49e812d1
...
@@ -399,7 +399,7 @@ class OneFormerLoss(nn.Module):
...
@@ -399,7 +399,7 @@ class OneFormerLoss(nn.Module):
self
.
importance_sample_ratio
=
importance_sample_ratio
self
.
importance_sample_ratio
=
importance_sample_ratio
self
.
contrastive_temperature
=
contrastive_temperature
self
.
contrastive_temperature
=
contrastive_temperature
if
self
.
contrastive_temperature
is
not
None
:
if
self
.
contrastive_temperature
is
not
None
:
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
1
/
contrastive_temperature
))
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
1
/
contrastive_temperature
))
)
def
_max_by_axis
(
self
,
the_list
:
List
[
List
[
int
]])
->
List
[
int
]:
def
_max_by_axis
(
self
,
the_list
:
List
[
List
[
int
]])
->
List
[
int
]:
maxes
=
the_list
[
0
]
maxes
=
the_list
[
0
]
...
...
src/transformers/models/owlvit/modeling_owlvit.py
View file @
49e812d1
...
@@ -1065,7 +1065,7 @@ class OwlViTModel(OwlViTPreTrainedModel):
...
@@ -1065,7 +1065,7 @@ class OwlViTModel(OwlViTPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
View file @
49e812d1
...
@@ -204,7 +204,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
...
@@ -204,7 +204,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
@
add_start_docstrings_to_model_forward
(
VISION_TEXT_DUAL_ENCODER_TEXT_INPUTS_DOCSTRING
)
@
add_start_docstrings_to_model_forward
(
VISION_TEXT_DUAL_ENCODER_TEXT_INPUTS_DOCSTRING
)
def
get_text_features
(
def
get_text_features
(
...
...
src/transformers/models/x_clip/modeling_x_clip.py
View file @
49e812d1
...
@@ -1309,7 +1309,7 @@ class XCLIPModel(XCLIPPreTrainedModel):
...
@@ -1309,7 +1309,7 @@ class XCLIPModel(XCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
self
.
prompts_visual_layernorm
=
nn
.
LayerNorm
(
self
.
vision_embed_dim
,
eps
=
config
.
vision_config
.
layer_norm_eps
)
self
.
prompts_visual_layernorm
=
nn
.
LayerNorm
(
self
.
vision_embed_dim
,
eps
=
config
.
vision_config
.
layer_norm_eps
)
self
.
prompts_visual_projection
=
nn
.
Parameter
(
torch
.
randn
(
self
.
vision_embed_dim
,
self
.
projection_dim
))
self
.
prompts_visual_projection
=
nn
.
Parameter
(
torch
.
randn
(
self
.
vision_embed_dim
,
self
.
projection_dim
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment