Unverified Commit e1eb45d3 authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

[Bugfix] Fix precommit - line too long in pixtral.py (#14960)


Signed-off-by: default avatarTyler Michael Smith <tyler@neuralmagic.com>
Co-authored-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 89fca671
...@@ -235,7 +235,7 @@ mbstrdecoder==1.1.3 ...@@ -235,7 +235,7 @@ mbstrdecoder==1.1.3
# typepy # typepy
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
mistral-common==1.5.1 mistral-common==1.5.4
# via -r requirements/test.in # via -r requirements/test.in
more-itertools==10.5.0 more-itertools==10.5.0
# via lm-eval # via lm-eval
......
...@@ -73,7 +73,7 @@ class PixtralImagePixelInputs(TypedDict): ...@@ -73,7 +73,7 @@ class PixtralImagePixelInputs(TypedDict):
""" """
A boolean mask indicating which image embeddings correspond A boolean mask indicating which image embeddings correspond
to patch tokens. to patch tokens.
Shape: `(batch_size, num_images, num_embeds)` Shape: `(batch_size, num_images, num_embeds)`
""" """
...@@ -849,10 +849,10 @@ class VisionTransformer(nn.Module): ...@@ -849,10 +849,10 @@ class VisionTransformer(nn.Module):
) -> torch.Tensor: ) -> torch.Tensor:
""" """
Args: Args:
images: list of N_img images of variable sizes, images: list of N_img images of variable sizes,
each of shape (C, H, W) each of shape (C, H, W)
Returns: Returns:
image_features: tensor of token features for image_features: tensor of token features for
all tokens of all images of shape (N_toks, D) all tokens of all images of shape (N_toks, D)
""" """
# pass images through initial convolution independently # pass images through initial convolution independently
...@@ -935,7 +935,8 @@ class PatchMerger(nn.Module): ...@@ -935,7 +935,8 @@ class PatchMerger(nn.Module):
# x is (N, vision_encoder_dim) # x is (N, vision_encoder_dim)
x = self.permute(x, image_sizes) x = self.permute(x, image_sizes)
# x is (N / spatial_merge_size ** 2, vision_encoder_dim * spatial_merge_size ** 2) # x is (N / spatial_merge_size ** 2,
# vision_encoder_dim * spatial_merge_size ** 2)
x = self.merging_layer(x) x = self.merging_layer(x)
# x is (N / spatial_merge_size ** 2, vision_encoder_dim) # x is (N / spatial_merge_size ** 2, vision_encoder_dim)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment