Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
49e812d1
Unverified
Commit
49e812d1
authored
Jun 30, 2023
by
Stas Bekman
Committed by
GitHub
Jun 30, 2023
Browse files
[several models] improve readability (#24585)
* [modeling_clip.py] improve readability * apply to other models * fix
parent
134caef3
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
15 additions
and
15 deletions
+15
-15
src/transformers/models/align/modeling_align.py
src/transformers/models/align/modeling_align.py
+1
-1
src/transformers/models/altclip/modeling_altclip.py
src/transformers/models/altclip/modeling_altclip.py
+1
-1
src/transformers/models/blip/modeling_blip.py
src/transformers/models/blip/modeling_blip.py
+1
-1
src/transformers/models/bridgetower/modeling_bridgetower.py
src/transformers/models/bridgetower/modeling_bridgetower.py
+1
-1
src/transformers/models/chinese_clip/modeling_chinese_clip.py
...transformers/models/chinese_clip/modeling_chinese_clip.py
+1
-1
src/transformers/models/clap/modeling_clap.py
src/transformers/models/clap/modeling_clap.py
+2
-2
src/transformers/models/clip/modeling_clip.py
src/transformers/models/clip/modeling_clip.py
+1
-1
src/transformers/models/clipseg/modeling_clipseg.py
src/transformers/models/clipseg/modeling_clipseg.py
+1
-1
src/transformers/models/flava/modeling_flava.py
src/transformers/models/flava/modeling_flava.py
+1
-1
src/transformers/models/groupvit/modeling_groupvit.py
src/transformers/models/groupvit/modeling_groupvit.py
+1
-1
src/transformers/models/oneformer/modeling_oneformer.py
src/transformers/models/oneformer/modeling_oneformer.py
+1
-1
src/transformers/models/owlvit/modeling_owlvit.py
src/transformers/models/owlvit/modeling_owlvit.py
+1
-1
src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
...on_text_dual_encoder/modeling_vision_text_dual_encoder.py
+1
-1
src/transformers/models/x_clip/modeling_x_clip.py
src/transformers/models/x_clip/modeling_x_clip.py
+1
-1
No files found.
src/transformers/models/align/modeling_align.py
View file @
49e812d1
...
@@ -1444,7 +1444,7 @@ class AlignModel(AlignPreTrainedModel):
...
@@ -1444,7 +1444,7 @@ class AlignModel(AlignPreTrainedModel):
self
.
vision_model
=
AlignVisionModel
(
vision_config
)
self
.
vision_model
=
AlignVisionModel
(
vision_config
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
)
self
.
temperature
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
temperature_init_value
)
self
.
temperature
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
temperature_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/altclip/modeling_altclip.py
View file @
49e812d1
...
@@ -1506,7 +1506,7 @@ class AltCLIPModel(AltCLIPPreTrainedModel):
...
@@ -1506,7 +1506,7 @@ class AltCLIPModel(AltCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/blip/modeling_blip.py
View file @
49e812d1
...
@@ -743,7 +743,7 @@ class BlipModel(BlipPreTrainedModel):
...
@@ -743,7 +743,7 @@ class BlipModel(BlipPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/bridgetower/modeling_bridgetower.py
View file @
49e812d1
...
@@ -1778,7 +1778,7 @@ class BridgeTowerForContrastiveLearning(BridgeTowerPreTrainedModel):
...
@@ -1778,7 +1778,7 @@ class BridgeTowerForContrastiveLearning(BridgeTowerPreTrainedModel):
self
.
itc_image_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
,
config
.
contrastive_hidden_size
)
self
.
itc_image_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
,
config
.
contrastive_hidden_size
)
self
.
itc_cross_modal_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
*
2
,
config
.
contrastive_hidden_size
)
self
.
itc_cross_modal_head
=
BridgeTowerContrastiveHead
(
config
.
hidden_size
*
2
,
config
.
contrastive_hidden_size
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/chinese_clip/modeling_chinese_clip.py
View file @
49e812d1
...
@@ -1376,7 +1376,7 @@ class ChineseCLIPModel(ChineseCLIPPreTrainedModel):
...
@@ -1376,7 +1376,7 @@ class ChineseCLIPModel(ChineseCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/clap/modeling_clap.py
View file @
49e812d1
...
@@ -1956,8 +1956,8 @@ class ClapModel(ClapPreTrainedModel):
...
@@ -1956,8 +1956,8 @@ class ClapModel(ClapPreTrainedModel):
text_config
=
config
.
text_config
text_config
=
config
.
text_config
audio_config
=
config
.
audio_config
audio_config
=
config
.
audio_config
self
.
logit_scale_a
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
config
.
logit_scale_init_value
))
self
.
logit_scale_a
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
config
.
logit_scale_init_value
))
)
self
.
logit_scale_t
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
config
.
logit_scale_init_value
))
self
.
logit_scale_t
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
config
.
logit_scale_init_value
))
)
self
.
projection_dim
=
config
.
projection_dim
self
.
projection_dim
=
config
.
projection_dim
...
...
src/transformers/models/clip/modeling_clip.py
View file @
49e812d1
...
@@ -977,7 +977,7 @@ class CLIPModel(CLIPPreTrainedModel):
...
@@ -977,7 +977,7 @@ class CLIPModel(CLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/clipseg/modeling_clipseg.py
View file @
49e812d1
...
@@ -979,7 +979,7 @@ class CLIPSegModel(CLIPSegPreTrainedModel):
...
@@ -979,7 +979,7 @@ class CLIPSegModel(CLIPSegPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/flava/modeling_flava.py
View file @
49e812d1
...
@@ -1229,7 +1229,7 @@ class FlavaModel(FlavaPreTrainedModel):
...
@@ -1229,7 +1229,7 @@ class FlavaModel(FlavaPreTrainedModel):
self
.
image_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
projection_dim
)
self
.
image_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
projection_dim
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
projection_dim
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
projection_dim
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
self
.
image_to_mm_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
mm_hidden_size
)
self
.
image_to_mm_projection
=
nn
.
Linear
(
self
.
image_hidden_size
,
self
.
mm_hidden_size
)
self
.
text_to_mm_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
mm_hidden_size
)
self
.
text_to_mm_projection
=
nn
.
Linear
(
self
.
text_hidden_size
,
self
.
mm_hidden_size
)
...
...
src/transformers/models/groupvit/modeling_groupvit.py
View file @
49e812d1
...
@@ -1368,7 +1368,7 @@ class GroupViTModel(GroupViTPreTrainedModel):
...
@@ -1368,7 +1368,7 @@ class GroupViTModel(GroupViTPreTrainedModel):
nn
.
ReLU
(
inplace
=
True
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Linear
(
self
.
projection_intermediate_dim
,
self
.
projection_dim
,
bias
=
True
),
nn
.
Linear
(
self
.
projection_intermediate_dim
,
self
.
projection_dim
,
bias
=
True
),
)
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/oneformer/modeling_oneformer.py
View file @
49e812d1
...
@@ -399,7 +399,7 @@ class OneFormerLoss(nn.Module):
...
@@ -399,7 +399,7 @@ class OneFormerLoss(nn.Module):
self
.
importance_sample_ratio
=
importance_sample_ratio
self
.
importance_sample_ratio
=
importance_sample_ratio
self
.
contrastive_temperature
=
contrastive_temperature
self
.
contrastive_temperature
=
contrastive_temperature
if
self
.
contrastive_temperature
is
not
None
:
if
self
.
contrastive_temperature
is
not
None
:
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
np
.
log
(
1
/
contrastive_temperature
))
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
np
.
log
(
1
/
contrastive_temperature
))
)
def
_max_by_axis
(
self
,
the_list
:
List
[
List
[
int
]])
->
List
[
int
]:
def
_max_by_axis
(
self
,
the_list
:
List
[
List
[
int
]])
->
List
[
int
]:
maxes
=
the_list
[
0
]
maxes
=
the_list
[
0
]
...
...
src/transformers/models/owlvit/modeling_owlvit.py
View file @
49e812d1
...
@@ -1065,7 +1065,7 @@ class OwlViTModel(OwlViTPreTrainedModel):
...
@@ -1065,7 +1065,7 @@ class OwlViTModel(OwlViTPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
logit_scale_init_value
)
)
# Initialize weights and apply final processing
# Initialize weights and apply final processing
self
.
post_init
()
self
.
post_init
()
...
...
src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py
View file @
49e812d1
...
@@ -204,7 +204,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
...
@@ -204,7 +204,7 @@ class VisionTextDualEncoderModel(PreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
@
add_start_docstrings_to_model_forward
(
VISION_TEXT_DUAL_ENCODER_TEXT_INPUTS_DOCSTRING
)
@
add_start_docstrings_to_model_forward
(
VISION_TEXT_DUAL_ENCODER_TEXT_INPUTS_DOCSTRING
)
def
get_text_features
(
def
get_text_features
(
...
...
src/transformers/models/x_clip/modeling_x_clip.py
View file @
49e812d1
...
@@ -1309,7 +1309,7 @@ class XCLIPModel(XCLIPPreTrainedModel):
...
@@ -1309,7 +1309,7 @@ class XCLIPModel(XCLIPPreTrainedModel):
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
visual_projection
=
nn
.
Linear
(
self
.
vision_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
text_projection
=
nn
.
Linear
(
self
.
text_embed_dim
,
self
.
projection_dim
,
bias
=
False
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
ones
([])
*
self
.
config
.
logit_scale_init_value
)
self
.
logit_scale
=
nn
.
Parameter
(
torch
.
tensor
(
self
.
config
.
logit_scale_init_value
)
)
self
.
prompts_visual_layernorm
=
nn
.
LayerNorm
(
self
.
vision_embed_dim
,
eps
=
config
.
vision_config
.
layer_norm_eps
)
self
.
prompts_visual_layernorm
=
nn
.
LayerNorm
(
self
.
vision_embed_dim
,
eps
=
config
.
vision_config
.
layer_norm_eps
)
self
.
prompts_visual_projection
=
nn
.
Parameter
(
torch
.
randn
(
self
.
vision_embed_dim
,
self
.
projection_dim
))
self
.
prompts_visual_projection
=
nn
.
Parameter
(
torch
.
randn
(
self
.
vision_embed_dim
,
self
.
projection_dim
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment