Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
73a73b41
Unverified
Commit
73a73b41
authored
Mar 21, 2024
by
Arthur
Committed by
GitHub
Mar 21, 2024
Browse files
[`LlavaNext`] Fix llava next unsafe imports (#29773)
* path llava-next * styling * styling
parent
2ddceef9
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
41 additions
and
41 deletions
+41
-41
src/transformers/image_processing_utils.py
src/transformers/image_processing_utils.py
+38
-0
src/transformers/models/auto/image_processing_auto.py
src/transformers/models/auto/image_processing_auto.py
+1
-1
src/transformers/models/llava_next/image_processing_llava_next.py
...sformers/models/llava_next/image_processing_llava_next.py
+1
-39
src/transformers/models/llava_next/modeling_llava_next.py
src/transformers/models/llava_next/modeling_llava_next.py
+1
-1
No files found.
src/transformers/image_processing_utils.py
View file @
73a73b41
...
...
@@ -748,6 +748,44 @@ def get_size_dict(
return
size_dict
def
select_best_resolution
(
original_size
:
tuple
,
possible_resolutions
:
list
)
->
tuple
:
"""
Selects the best resolution from a list of possible resolutions based on the original size.
This is done by calculating the effective and wasted resolution for each possible resolution.
The best fit resolution is the one that maximizes the effective resolution and minimizes the wasted resolution.
Args:
original_size (tuple):
The original size of the image in the format (height, width).
possible_resolutions (list):
A list of possible resolutions in the format [(height1, width1), (height2, width2), ...].
Returns:
tuple: The best fit resolution in the format (height, width).
"""
original_height
,
original_width
=
original_size
best_fit
=
None
max_effective_resolution
=
0
min_wasted_resolution
=
float
(
"inf"
)
for
height
,
width
in
possible_resolutions
:
scale
=
min
(
width
/
original_width
,
height
/
original_height
)
downscaled_width
,
downscaled_height
=
int
(
original_width
*
scale
),
int
(
original_height
*
scale
)
effective_resolution
=
min
(
downscaled_width
*
downscaled_height
,
original_width
*
original_height
)
wasted_resolution
=
(
width
*
height
)
-
effective_resolution
if
effective_resolution
>
max_effective_resolution
or
(
effective_resolution
==
max_effective_resolution
and
wasted_resolution
<
min_wasted_resolution
):
max_effective_resolution
=
effective_resolution
min_wasted_resolution
=
wasted_resolution
best_fit
=
(
height
,
width
)
return
best_fit
ImageProcessingMixin
.
push_to_hub
=
copy_func
(
ImageProcessingMixin
.
push_to_hub
)
if
ImageProcessingMixin
.
push_to_hub
.
__doc__
is
not
None
:
ImageProcessingMixin
.
push_to_hub
.
__doc__
=
ImageProcessingMixin
.
push_to_hub
.
__doc__
.
format
(
...
...
src/transformers/models/auto/image_processing_auto.py
View file @
73a73b41
...
...
@@ -77,7 +77,7 @@ IMAGE_PROCESSOR_MAPPING_NAMES = OrderedDict(
(
"layoutlmv3"
,
"LayoutLMv3ImageProcessor"
),
(
"levit"
,
"LevitImageProcessor"
),
(
"llava"
,
"CLIPImageProcessor"
),
(
"llava_next"
,
"
CLIP
ImageProcessor"
),
(
"llava_next"
,
"
LlavaNext
ImageProcessor"
),
(
"mask2former"
,
"Mask2FormerImageProcessor"
),
(
"maskformer"
,
"MaskFormerImageProcessor"
),
(
"mgp-str"
,
"ViTImageProcessor"
),
...
...
src/transformers/models/llava_next/image_processing_llava_next.py
View file @
73a73b41
...
...
@@ -19,7 +19,7 @@ from typing import Dict, List, Optional, Union
import
numpy
as
np
from
...image_processing_utils
import
BaseImageProcessor
,
BatchFeature
,
get_size_dict
from
...image_processing_utils
import
BaseImageProcessor
,
BatchFeature
,
get_size_dict
,
select_best_resolution
from
...image_transforms
import
(
convert_to_rgb
,
get_resize_output_image_size
,
...
...
@@ -51,44 +51,6 @@ if is_vision_available():
from
PIL
import
Image
def
select_best_resolution
(
original_size
:
tuple
,
possible_resolutions
:
list
)
->
tuple
:
"""
Selects the best resolution from a list of possible resolutions based on the original size.
This is done by calculating the effective and wasted resolution for each possible resolution.
The best fit resolution is the one that maximizes the effective resolution and minimizes the wasted resolution.
Args:
original_size (tuple):
The original size of the image in the format (height, width).
possible_resolutions (list):
A list of possible resolutions in the format [(height1, width1), (height2, width2), ...].
Returns:
tuple: The best fit resolution in the format (height, width).
"""
original_height
,
original_width
=
original_size
best_fit
=
None
max_effective_resolution
=
0
min_wasted_resolution
=
float
(
"inf"
)
for
height
,
width
in
possible_resolutions
:
scale
=
min
(
width
/
original_width
,
height
/
original_height
)
downscaled_width
,
downscaled_height
=
int
(
original_width
*
scale
),
int
(
original_height
*
scale
)
effective_resolution
=
min
(
downscaled_width
*
downscaled_height
,
original_width
*
original_height
)
wasted_resolution
=
(
width
*
height
)
-
effective_resolution
if
effective_resolution
>
max_effective_resolution
or
(
effective_resolution
==
max_effective_resolution
and
wasted_resolution
<
min_wasted_resolution
):
max_effective_resolution
=
effective_resolution
min_wasted_resolution
=
wasted_resolution
best_fit
=
(
height
,
width
)
return
best_fit
def
divide_to_patches
(
image
:
np
.
array
,
patch_size
:
int
,
input_data_format
)
->
List
[
np
.
array
]:
"""
Divides an image into patches of a specified size.
...
...
src/transformers/models/llava_next/modeling_llava_next.py
View file @
73a73b41
...
...
@@ -24,6 +24,7 @@ from torch import nn
from
...
import
PreTrainedModel
from
...activations
import
ACT2FN
from
...cache_utils
import
Cache
from
...image_processing_utils
import
select_best_resolution
from
...modeling_outputs
import
ModelOutput
from
...utils
import
(
add_start_docstrings
,
...
...
@@ -33,7 +34,6 @@ from ...utils import (
)
from
..auto
import
AutoModel
,
AutoModelForCausalLM
from
.configuration_llava_next
import
LlavaNextConfig
from
.image_processing_llava_next
import
select_best_resolution
logger
=
logging
.
get_logger
(
__name__
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment