Commit 1d5a34cf authored by wanglch's avatar wanglch
Browse files

Initial commit

parents
Pipeline #1446 canceled with stages
from typing import Union
import torch
from .internvl import load_internvl
from .japanese_clip import load_japanese_clip
from .open_clip import load_open_clip
# loading function must return (model, transform, tokenizer)
TYPE2FUNC = {
'open_clip': load_open_clip,
'ja_clip': load_japanese_clip,
'internvl': load_internvl,
}
MODEL_TYPES = list(TYPE2FUNC.keys())
def load_clip(
model_type: str,
model_name: str,
pretrained: str,
cache_dir: str,
device: Union[str, torch.device] = 'cuda'
):
assert model_type in MODEL_TYPES, f'model_type={model_type} is invalid!'
load_func = TYPE2FUNC[model_type]
return load_func(model_name=model_name, pretrained=pretrained, cache_dir=cache_dir, device=device)
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import os
from typing import Union
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
class InternVisionConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`InternVisionModel`]. It is used to
instantiate a vision encoder according to the specified arguments, defining the model architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
num_channels (`int`, *optional*, defaults to 3):
Number of color channels in the input images (e.g., 3 for RGB).
patch_size (`int`, *optional*, defaults to 14):
The size (resolution) of each patch.
image_size (`int`, *optional*, defaults to 224):
The size (resolution) of each image.
qkv_bias (`bool`, *optional*, defaults to `False`):
Whether to add a bias to the queries and values in the self-attention layers.
hidden_size (`int`, *optional*, defaults to 3200):
Dimensionality of the encoder layers and the pooler layer.
num_attention_heads (`int`, *optional*, defaults to 25):
Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (`int`, *optional*, defaults to 12800):
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
qk_normalization (`bool`, *optional*, defaults to `True`):
Whether to normalize the queries and keys in the self-attention layers.
num_hidden_layers (`int`, *optional*, defaults to 48):
Number of hidden layers in the Transformer encoder.
use_flash_attn (`bool`, *optional*, defaults to `True`):
Whether to use flash attention mechanism.
hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` ``"gelu"` are supported.
layer_norm_eps (`float`, *optional*, defaults to 1e-6):
The epsilon used by the layer normalization layers.
dropout (`float`, *optional*, defaults to 0.0):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
drop_path_rate (`float`, *optional*, defaults to 0.0):
Dropout rate for stochastic depth.
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
initializer_factor (`float`, *optional*, defaults to 0.1):
A factor for layer scale.
"""
model_type = 'intern_vit_6b'
def __init__(
self,
num_channels=3,
patch_size=14,
image_size=224,
qkv_bias=False,
hidden_size=3200,
num_attention_heads=25,
intermediate_size=12800,
qk_normalization=True,
num_hidden_layers=48,
use_flash_attn=True,
hidden_act='gelu',
layer_norm_eps=1e-6,
dropout=0.0,
drop_path_rate=0.0,
attention_dropout=0.0,
initializer_range=0.02,
initializer_factor=0.1,
**kwargs,
):
super().__init__(**kwargs)
self.hidden_size = hidden_size
self.intermediate_size = intermediate_size
self.dropout = dropout
self.drop_path_rate = drop_path_rate
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.num_channels = num_channels
self.patch_size = patch_size
self.image_size = image_size
self.initializer_range = initializer_range
self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
self.qkv_bias = qkv_bias
self.qk_normalization = qk_normalization
self.use_flash_attn = use_flash_attn
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> 'PretrainedConfig':
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if 'vision_config' in config_dict:
config_dict = config_dict['vision_config']
if 'model_type' in config_dict and hasattr(cls, 'model_type') and config_dict['model_type'] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
)
return cls.from_dict(config_dict, **kwargs)
import torch
import torch.nn as nn
from einops import rearrange
try: # v1
from flash_attn.flash_attn_interface import \
flash_attn_unpadded_qkvpacked_func
except: # v2
from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func
from flash_attn.bert_padding import pad_input, unpad_input
class FlashAttention(nn.Module):
"""Implement the scaled dot product attention with softmax.
Arguments
---------
softmax_scale: The temperature to use for the softmax attention.
(default: 1/sqrt(d_keys) where d_keys is computed at
runtime)
attention_dropout: The dropout rate to apply to the attention
(default: 0.0)
"""
def __init__(self, softmax_scale=None, attention_dropout=0.0, device=None, dtype=None):
super().__init__()
self.softmax_scale = softmax_scale
self.dropout_p = attention_dropout
def forward(self, qkv, key_padding_mask=None, causal=False, cu_seqlens=None,
max_s=None, need_weights=False):
"""Implements the multihead softmax attention.
Arguments
---------
qkv: The tensor containing the query, key, and value. (B, S, 3, H, D) if key_padding_mask is None
if unpadded: (nnz, 3, h, d)
key_padding_mask: a bool tensor of shape (B, S)
"""
assert not need_weights
assert qkv.dtype in [torch.float16, torch.bfloat16]
assert qkv.is_cuda
if cu_seqlens is None:
batch_size = qkv.shape[0]
seqlen = qkv.shape[1]
if key_padding_mask is None:
qkv = rearrange(qkv, 'b s ... -> (b s) ...')
max_s = seqlen
cu_seqlens = torch.arange(0, (batch_size + 1) * seqlen, step=seqlen, dtype=torch.int32,
device=qkv.device)
output = flash_attn_unpadded_qkvpacked_func(
qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
output = rearrange(output, '(b s) ... -> b s ...', b=batch_size)
else:
nheads = qkv.shape[-2]
x = rearrange(qkv, 'b s three h d -> b s (three h d)')
x_unpad, indices, cu_seqlens, max_s = unpad_input(x, key_padding_mask)
x_unpad = rearrange(x_unpad, 'nnz (three h d) -> nnz three h d', three=3, h=nheads)
output_unpad = flash_attn_unpadded_qkvpacked_func(
x_unpad, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
output = rearrange(pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'),
indices, batch_size, seqlen),
'b s (h d) -> b s h d', h=nheads)
else:
assert max_s is not None
output = flash_attn_unpadded_qkvpacked_func(
qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
return output, None
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from typing import Optional, Tuple, Union
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from einops import rearrange
from timm.models.layers import DropPath
from torch import nn
from transformers.activations import ACT2FN
from transformers.modeling_outputs import (BaseModelOutput,
BaseModelOutputWithPooling)
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import logging
from .configuration_intern_vit import InternVisionConfig
try:
from .flash_attention import FlashAttention
has_flash_attn = True
except:
print('FlashAttention is not installed.')
has_flash_attn = False
logger = logging.get_logger(__name__)
class InternRMSNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-6):
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.variance_epsilon = eps
def forward(self, hidden_states):
input_dtype = hidden_states.dtype
hidden_states = hidden_states.to(torch.float32)
variance = hidden_states.pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
return self.weight * hidden_states.to(input_dtype)
try:
from apex.normalization import FusedRMSNorm
InternRMSNorm = FusedRMSNorm # noqa
logger.info('Discovered apex.normalization.FusedRMSNorm - will use it instead of InternRMSNorm')
except ImportError:
# using the normal InternRMSNorm
pass
except Exception:
logger.warning('discovered apex but it failed to load, falling back to InternRMSNorm')
pass
class InternVisionEmbeddings(nn.Module):
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
self.embed_dim = config.hidden_size
self.image_size = config.image_size
self.patch_size = config.patch_size
self.class_embedding = nn.Parameter(
torch.randn(1, 1, self.embed_dim),
)
self.patch_embedding = nn.Conv2d(
in_channels=3, out_channels=self.embed_dim, kernel_size=self.patch_size, stride=self.patch_size
)
self.num_patches = (self.image_size // self.patch_size) ** 2
self.num_positions = self.num_patches + 1
self.position_embedding = nn.Parameter(torch.randn(1, self.num_positions, self.embed_dim))
def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
batch_size = pixel_values.shape[0]
target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values) # shape = [*, width, grid, grid]
patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
class_embeds = self.class_embedding.expand(batch_size, 1, -1).to(target_dtype)
embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
embeddings = embeddings + self.position_embedding.to(target_dtype)
return embeddings
class InternAttention(nn.Module):
"""Multi-headed attention from 'Attention Is All You Need' paper"""
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
self.embed_dim = config.hidden_size
self.num_heads = config.num_attention_heads
self.use_flash_attn = config.use_flash_attn and has_flash_attn
if config.use_flash_attn and not has_flash_attn:
print('Warning: Flash Attention is not available, use_flash_attn is set to False.')
self.head_dim = self.embed_dim // self.num_heads
if self.head_dim * self.num_heads != self.embed_dim:
raise ValueError(
f'embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:'
f' {self.num_heads}).'
)
self.scale = self.head_dim ** -0.5
self.qkv = nn.Linear(self.embed_dim, 3 * self.embed_dim, bias=config.qkv_bias)
self.attn_drop = nn.Dropout(config.attention_dropout)
self.proj_drop = nn.Dropout(config.dropout)
self.qk_normalization = config.qk_normalization
if self.qk_normalization:
self.q_norm = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
self.k_norm = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
if self.use_flash_attn:
self.inner_attn = FlashAttention(attention_dropout=config.attention_dropout)
self.proj = nn.Linear(self.embed_dim, self.embed_dim)
def _naive_attn(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple)
if self.qk_normalization:
B_, H_, N_, D_ = q.shape
q = self.q_norm(q.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
k = self.k_norm(k.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
attn = ((q * self.scale) @ k.transpose(-2, -1))
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
def _flash_attn(self, x, key_padding_mask=None, need_weights=False):
qkv = self.qkv(x)
qkv = rearrange(qkv, 'b s (three h d) -> b s three h d', three=3, h=self.num_heads)
if self.qk_normalization:
q, k, v = qkv.unbind(2)
q = self.q_norm(q.flatten(-2, -1)).view(q.shape)
k = self.k_norm(k.flatten(-2, -1)).view(k.shape)
qkv = torch.stack([q, k, v], dim=2)
context, _ = self.inner_attn(
qkv, key_padding_mask=key_padding_mask, need_weights=need_weights, causal=False
)
outs = self.proj(rearrange(context, 'b s h d -> b s (h d)'))
outs = self.proj_drop(outs)
return outs
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
x = self._naive_attn(hidden_states) if not self.use_flash_attn else self._flash_attn(hidden_states)
return x
class InternMLP(nn.Module):
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
self.act = ACT2FN[config.hidden_act]
self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size)
self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size)
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = self.fc1(hidden_states)
hidden_states = self.act(hidden_states)
hidden_states = self.fc2(hidden_states)
return hidden_states
class InternVisionEncoderLayer(nn.Module):
def __init__(self, config: InternVisionConfig, drop_path_rate: float):
super().__init__()
self.embed_dim = config.hidden_size
self.intermediate_size = config.intermediate_size
self.attn = InternAttention(config)
self.mlp = InternMLP(config)
self.norm1 = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
self.norm2 = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
self.ls1 = nn.Parameter(config.initializer_factor * torch.ones(self.embed_dim))
self.ls2 = nn.Parameter(config.initializer_factor * torch.ones(self.embed_dim))
self.drop_path1 = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
self.drop_path2 = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
def forward(
self,
hidden_states: torch.Tensor,
) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor], Optional[Tuple[torch.FloatTensor]]]:
"""
Args:
hidden_states (`Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]`): input to the layer of shape `(batch, seq_len, embed_dim)`
"""
hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states)) * self.ls1)
hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states)) * self.ls2)
return hidden_states
class InternVisionEncoder(nn.Module):
"""
Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
[`InternEncoderLayer`].
Args:
config (`InternConfig`):
The corresponding vision configuration for the `InternEncoder`.
"""
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
# stochastic depth decay rule
dpr = [x.item() for x in torch.linspace(0, config.drop_path_rate, config.num_hidden_layers)]
self.layers = nn.ModuleList([
InternVisionEncoderLayer(config, dpr[idx]) for idx in range(config.num_hidden_layers)])
self.gradient_checkpointing = True
def forward(
self,
inputs_embeds,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutput]:
r"""
Args:
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
Embedded representation of the inputs. Should be float, not int tokens.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
encoder_states = () if output_hidden_states else None
hidden_states = inputs_embeds
for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
if self.gradient_checkpointing and self.training:
layer_outputs = torch.utils.checkpoint.checkpoint(
encoder_layer,
hidden_states)
else:
layer_outputs = encoder_layer(
hidden_states,
)
hidden_states = layer_outputs
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
if not return_dict:
return tuple(v for v in [hidden_states, encoder_states] if v is not None)
return BaseModelOutput(
last_hidden_state=hidden_states, hidden_states=encoder_states
)
class InternVisionModel(PreTrainedModel):
main_input_name = 'pixel_values'
config_class = InternVisionConfig
def __init__(self, config: InternVisionConfig):
super().__init__(config)
self.config = config
self.embeddings = InternVisionEmbeddings(config)
self.encoder = InternVisionEncoder(config)
def resize_pos_embeddings(self, old_size, new_size, patch_size):
pos_emb = self.embeddings.position_embedding
_, num_positions, embed_dim = pos_emb.shape
cls_emb = pos_emb[:, :1, :]
pos_emb = pos_emb[:, 1:, :].reshape(1, old_size // patch_size, old_size // patch_size, -1).permute(0, 3, 1, 2)
pos_emb = F.interpolate(pos_emb.float(), size=new_size // patch_size, mode='bicubic', align_corners=False)
pos_emb = pos_emb.to(cls_emb.dtype).reshape(1, embed_dim, -1).permute(0, 2, 1)
pos_emb = torch.cat([cls_emb, pos_emb], dim=1)
self.embeddings.position_embedding = nn.Parameter(pos_emb)
logger.info('Resized position embeddings from {} to {}'.format(old_size, new_size))
def get_input_embeddings(self):
return self.embeddings
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
pixel_embeds: Optional[torch.FloatTensor] = None,
) -> Union[Tuple, BaseModelOutputWithPooling]:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if pixel_values is None and pixel_embeds is None:
raise ValueError('You have to specify pixel_values or pixel_embeds')
if pixel_embeds is not None:
hidden_states = pixel_embeds
else:
if len(pixel_values.shape) == 4:
hidden_states = self.embeddings(pixel_values)
else:
raise ValueError(f'wrong pixel_values size: {pixel_values.shape}')
encoder_outputs = self.encoder(
inputs_embeds=hidden_states,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
last_hidden_state = encoder_outputs.last_hidden_state
pooled_output = last_hidden_state[:, 0, :]
if not return_dict:
return (last_hidden_state, pooled_output) + encoder_outputs[1:]
return BaseModelOutputWithPooling(
last_hidden_state=last_hidden_state,
pooler_output=pooled_output,
hidden_states=encoder_outputs.hidden_states,
attentions=encoder_outputs.attentions,
)
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from .internvl_c_pytorch import load_internvl_c_pytorch
from .internvl_huggingface import (load_internvl_c_huggingface,
load_internvl_g_huggingface)
def load_internvl(model_name, pretrained, cache_dir, device):
if model_name == 'internvl_c_classification':
return load_internvl_c_pytorch(pretrained, device, 'classification')
elif model_name == 'internvl_c_retrieval':
return load_internvl_c_pytorch(pretrained, device, 'retrieval')
elif model_name == 'internvl_c_classification_hf':
return load_internvl_c_huggingface(pretrained, device, 'classification')
elif model_name == 'internvl_c_retrieval_hf':
return load_internvl_c_huggingface(pretrained, device, 'retrieval')
elif model_name == 'internvl_g_classification_hf':
return load_internvl_g_huggingface(pretrained, device, 'classification')
elif model_name == 'internvl_g_retrieval_hf':
return load_internvl_g_huggingface(pretrained, device, 'retrieval')
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import os
import torch
import torchvision.transforms as T
from torch import nn
from torchvision.transforms import InterpolationMode
from transformers import LlamaTokenizer
from .internvl_c import InternVL_C
try:
from .flash_attention import FlashAttention
except:
print('FlashAttention is not installed.')
class InternVLTokenizer(nn.Module):
def __init__(self, model_path):
super(InternVLTokenizer, self).__init__()
self.tokenizer = LlamaTokenizer.from_pretrained(model_path)
self.tokenizer.pad_token = ' ' # allow padding
self.tokenizer.add_eos_token = True
def forward(self, text, prefix='summarize:'):
if type(text) == str:
text = prefix + text
elif type(text) == list:
text = [prefix + item for item in text]
text = self.tokenizer(text, return_tensors='pt', max_length=80, truncation=True, padding=True).input_ids
return text
def build_transform(task, image_size=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
if task == 'retrieval':
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize((image_size, image_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=mean, std=std)])
else:
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize(image_size, interpolation=InterpolationMode.BICUBIC),
T.CenterCrop(image_size),
T.ToTensor(),
T.Normalize(mean=mean, std=std)])
return transform
def get_model_and_transform(task, image_size, device):
llm_path = os.path.split(os.path.realpath(__file__))[0]
llm_path = os.path.join(llm_path, 'chinese_alpaca_lora_7b')
model = InternVL_C(img_size=image_size, layerscale_force_fp32=True, llm_path=llm_path)
model = model.to(torch.float16).to(device)
transform = build_transform(task, image_size)
return model, transform
def load_internvl_c_pytorch(ckpt_path, device, task, image_size=224):
llm_path = os.path.split(os.path.realpath(__file__))[0]
llm_path = os.path.join(llm_path, 'chinese_alpaca_lora_7b')
tokenizer = InternVLTokenizer(llm_path)
model, transform = get_model_and_transform(task=task, image_size=image_size, device=device)
ckpt = torch.load(ckpt_path, map_location='cpu')
model.load_state_dict(ckpt, strict=False)
return model, transform, tokenizer
# https://github.com/Dao-AILab/flash-attention/blob/v0.2.8/flash_attn/flash_attention.py
import torch
import torch.nn as nn
from einops import rearrange
try: # v1
from flash_attn.flash_attn_interface import \
flash_attn_unpadded_qkvpacked_func
except: # v2
from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func
from flash_attn.bert_padding import pad_input, unpad_input
class FlashAttention(nn.Module):
"""Implement the scaled dot product attention with softmax.
Arguments
---------
softmax_scale: The temperature to use for the softmax attention.
(default: 1/sqrt(d_keys) where d_keys is computed at
runtime)
attention_dropout: The dropout rate to apply to the attention
(default: 0.0)
"""
def __init__(self, softmax_scale=None, attention_dropout=0.0, device=None, dtype=None):
super().__init__()
self.softmax_scale = softmax_scale
self.dropout_p = attention_dropout
def forward(self, qkv, key_padding_mask=None, causal=False, cu_seqlens=None,
max_s=None, need_weights=False):
"""Implements the multihead softmax attention.
Arguments
---------
qkv: The tensor containing the query, key, and value. (B, S, 3, H, D) if key_padding_mask is None
if unpadded: (nnz, 3, h, d)
key_padding_mask: a bool tensor of shape (B, S)
"""
assert not need_weights
assert qkv.dtype in [torch.float16, torch.bfloat16]
assert qkv.is_cuda
if cu_seqlens is None:
batch_size = qkv.shape[0]
seqlen = qkv.shape[1]
if key_padding_mask is None:
qkv = rearrange(qkv, 'b s ... -> (b s) ...')
max_s = seqlen
cu_seqlens = torch.arange(0, (batch_size + 1) * seqlen, step=seqlen, dtype=torch.int32,
device=qkv.device)
output = flash_attn_unpadded_qkvpacked_func(
qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
output = rearrange(output, '(b s) ... -> b s ...', b=batch_size)
else:
nheads = qkv.shape[-2]
x = rearrange(qkv, 'b s three h d -> b s (three h d)')
x_unpad, indices, cu_seqlens, max_s = unpad_input(x, key_padding_mask)
x_unpad = rearrange(x_unpad, 'nnz (three h d) -> nnz three h d', three=3, h=nheads)
output_unpad = flash_attn_unpadded_qkvpacked_func(
x_unpad, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
output = rearrange(pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'),
indices, batch_size, seqlen),
'b s (h d) -> b s h d', h=nheads)
else:
assert max_s is not None
output = flash_attn_unpadded_qkvpacked_func(
qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
return output, None
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from functools import partial
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint
from einops import rearrange
from timm.models.layers import DropPath, to_2tuple
from torch import nn
from transformers import LlamaConfig, LlamaForCausalLM
try:
from .flash_attention import FlashAttention
has_flash_attn = True
except:
print('FlashAttention is not installed.')
has_flash_attn = False
class CrossAttention(nn.Module):
def __init__(
self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
proj_drop=0., attn_head_dim=None, out_dim=None):
super().__init__()
if out_dim is None:
out_dim = dim
self.num_heads = num_heads
head_dim = dim // num_heads
if attn_head_dim is not None:
head_dim = attn_head_dim
all_head_dim = head_dim * self.num_heads
self.scale = qk_scale or head_dim ** -0.5
assert all_head_dim == dim
self.q = nn.Linear(dim, all_head_dim, bias=False)
self.k = nn.Linear(dim, all_head_dim, bias=False)
self.v = nn.Linear(dim, all_head_dim, bias=False)
if qkv_bias:
self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
self.k_bias = nn.Parameter(torch.zeros(all_head_dim))
self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
else:
self.q_bias = None
self.k_bias = None
self.v_bias = None
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(all_head_dim, out_dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x, k=None, v=None):
B, N, C = x.shape
N_k = k.shape[1]
N_v = v.shape[1]
q_bias, k_bias, v_bias = None, None, None
if self.q_bias is not None:
q_bias = self.q_bias
k_bias = self.k_bias
v_bias = self.v_bias
q = F.linear(input=x, weight=self.q.weight, bias=q_bias)
q = q.reshape(B, N, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0) # (B, N_head, N_q, dim)
k = F.linear(input=k, weight=self.k.weight, bias=k_bias)
k = k.reshape(B, N_k, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)
v = F.linear(input=v, weight=self.v.weight, bias=v_bias)
v = v.reshape(B, N_v, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)
q = q * self.scale
attn = (q @ k.transpose(-2, -1)) # (B, N_head, N_q, N_k)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
x = self.proj(x)
x = self.proj_drop(x)
return x
class AttentiveBlock(nn.Module):
def __init__(self, dim, num_heads, qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., norm_layer=nn.LayerNorm, attn_head_dim=None, out_dim=None):
super().__init__()
self.norm1_q = norm_layer(dim)
self.norm1_k = norm_layer(dim)
self.norm1_v = norm_layer(dim)
self.cross_attn = CrossAttention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop,
proj_drop=drop, attn_head_dim=attn_head_dim, out_dim=out_dim)
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
def forward(self, x_q, x_kv, pos_q, pos_k, bool_masked_pos, rel_pos_bias=None):
x_q = self.norm1_q(x_q + pos_q)
x_k = self.norm1_k(x_kv + pos_k)
x_v = self.norm1_v(x_kv)
x = self.cross_attn(x_q, k=x_k, v=x_v)
return x
class AttentionPoolingBlock(AttentiveBlock):
def forward(self, x):
x_q = x.mean(1, keepdim=True)
x_kv, pos_q, pos_k = x, 0, 0
x = super().forward(x_q, x_kv, pos_q, pos_k, bool_masked_pos=None, rel_pos_bias=None)
x = x.squeeze(1)
return x
class RMSNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-6):
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.variance_epsilon = eps
def forward(self, hidden_states):
input_dtype = hidden_states.dtype
hidden_states = hidden_states.to(torch.float32)
variance = hidden_states.pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
return self.weight * hidden_states.to(input_dtype)
try:
from apex.normalization import FusedRMSNorm
RMSNorm = FusedRMSNorm # noqa
print('Discovered apex.normalization.FusedRMSNorm - will use it instead of RMSNorm')
except ImportError:
# using the normal RMSNorm
pass
except Exception:
print('discovered apex but it failed to load, falling back to RMSNorm')
pass
class LayerScale(nn.Module):
def __init__(self, dim, init_values=1e-5, inplace=False, force_fp32=False):
super().__init__()
self.inplace = inplace
self.gamma = nn.Parameter(init_values * torch.ones(dim))
self.force_fp32 = force_fp32
@torch.cuda.amp.autocast(enabled=False)
def forward(self, x):
if self.force_fp32:
output_type = x.dtype
out = x.float().mul_(self.gamma.float()) if self.inplace else x.float() * self.gamma.float()
return out.to(dtype=output_type)
else:
out = x.mul_(self.gamma) if self.inplace else x * self.gamma
return out
class Attention(nn.Module):
def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0., use_flash_attn=False,
causal=False, norm_layer=nn.LayerNorm, qk_normalization=False):
super().__init__()
assert dim % num_heads == 0, 'dim should be divisible by num_heads'
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.use_flash_attn = use_flash_attn
if use_flash_attn:
self.causal = causal
self.inner_attn = FlashAttention(attention_dropout=attn_drop)
self.qk_normalization = qk_normalization
self.q_norm = norm_layer(dim) if qk_normalization else nn.Identity()
self.k_norm = norm_layer(dim) if qk_normalization else nn.Identity()
def _naive_attn(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple)
if self.qk_normalization:
B_, H_, N_, D_ = q.shape
q = self.q_norm(q.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
k = self.k_norm(k.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
attn = ((q * self.scale) @ k.transpose(-2, -1))
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
def _flash_attn(self, x, key_padding_mask=None, need_weights=False):
qkv = self.qkv(x)
qkv = rearrange(qkv, 'b s (three h d) -> b s three h d', three=3, h=self.num_heads)
if self.qk_normalization:
q, k, v = qkv.unbind(2)
q = self.q_norm(q.flatten(-2, -1)).view(q.shape)
k = self.k_norm(k.flatten(-2, -1)).view(k.shape)
qkv = torch.stack([q, k, v], dim=2)
context, _ = self.inner_attn(
qkv, key_padding_mask=key_padding_mask, need_weights=need_weights, causal=self.causal
)
outs = self.proj(rearrange(context, 'b s h d -> b s (h d)'))
outs = self.proj_drop(outs)
return outs
def forward(self, x):
x = self._naive_attn(x) if not self.use_flash_attn else self._flash_attn(x)
return x
class Mlp(nn.Module):
""" MLP as used in Vision Transformer, MLP-Mixer and related networks
"""
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU,
bias=True, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
bias = to_2tuple(bias)
drop_probs = to_2tuple(drop)
self.fc1 = nn.Linear(in_features, hidden_features, bias=bias[0])
self.act = act_layer()
self.drop1 = nn.Dropout(drop_probs[0])
self.fc2 = nn.Linear(hidden_features, out_features, bias=bias[1])
self.drop2 = nn.Dropout(drop_probs[1])
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop1(x)
x = self.fc2(x)
x = self.drop2(x)
return x
class Block(nn.Module):
def __init__(
self, dim, num_heads, mlp_ratio=4., qkv_bias=False, drop=0., attn_drop=0., init_values=None,
drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm, use_flash_attn=False, with_cp=False,
qk_normalization=False, layerscale_force_fp32=False):
super().__init__()
self.norm1 = norm_layer(dim)
self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop,
use_flash_attn=use_flash_attn, causal=False, norm_layer=norm_layer,
qk_normalization=qk_normalization)
self.ls1 = LayerScale(dim, init_values=init_values,
force_fp32=layerscale_force_fp32) if init_values else nn.Identity()
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self.drop_path1 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
self.ls2 = LayerScale(dim, init_values=init_values,
force_fp32=layerscale_force_fp32) if init_values else nn.Identity()
self.drop_path2 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
self.with_cp = with_cp
def forward(self, x):
def _inner_forward(x):
x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x))))
x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
return x
if self.with_cp:
return checkpoint.checkpoint(_inner_forward, x)
else:
return _inner_forward(x)
class PatchEmbed(nn.Module):
""" 2D Image to Patch Embedding
"""
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None, flatten=True):
super().__init__()
img_size = to_2tuple(img_size)
patch_size = to_2tuple(patch_size)
num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
self.img_size = img_size
self.patch_size = patch_size
self.num_patches = num_patches
self.flatten = flatten
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
def forward(self, x, **kwargs):
x = self.proj(x)
_, _, H, W = x.shape
if self.flatten:
x = x.flatten(2).transpose(1, 2) # BCHW -> BNC
x = self.norm(x)
return x, H, W
class InternVL_C(nn.Module):
def __init__(self, in_chans=3, patch_size=14, img_size=224, qkv_bias=False, drop_path_rate=0.0,
embed_dim=3200, num_heads=25, mlp_ratio=4, init_values=0.1, qk_normalization=True, depth=48,
use_flash_attn=True, with_cp=True, layerscale_force_fp32=False, context_length: int = 80,
transformer_width=4096, llm_path=None, attn_pool_num_heads=16, clip_embed_dim=768):
super().__init__()
use_flash_attn = use_flash_attn and has_flash_attn
if use_flash_attn and not has_flash_attn:
print('Warning: Flash Attention is not available, use_flash_attn is set to False.')
self.use_flash_attn = use_flash_attn
self.context_length = context_length
self.embed_dim = embed_dim
self.transformer_width = transformer_width
""" text encoder of InternVL """
llama_config = LlamaConfig.from_pretrained(llm_path)
model = LlamaForCausalLM(llama_config)
self.transformer = model.model
self.transformer.gradient_checkpointing = True
self.text_projection = nn.Parameter(torch.empty(transformer_width, clip_embed_dim))
self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07))
""" image encoder of InternVL """
norm_layer_for_blocks = partial(RMSNorm, eps=1e-6)
self.norm_layer_for_blocks = norm_layer_for_blocks
self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim)
num_patches = self.patch_embed.num_patches
self.num_patches = num_patches
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
self.blocks = nn.ModuleList([
Block(embed_dim, num_heads, mlp_ratio, qkv_bias=qkv_bias,
norm_layer=norm_layer_for_blocks,
drop_path=dpr[i], init_values=init_values, attn_drop=0.,
use_flash_attn=use_flash_attn,
with_cp=with_cp,
qk_normalization=qk_normalization,
layerscale_force_fp32=layerscale_force_fp32)
for i in range(depth)])
self.clip_projector = AttentionPoolingBlock(
dim=embed_dim, num_heads=attn_pool_num_heads, qkv_bias=True, qk_scale=None,
drop=0., attn_drop=0., norm_layer=partial(nn.LayerNorm, eps=1e-5), out_dim=clip_embed_dim)
@property
def dtype(self):
return self.patch_embed.proj.weight.dtype
def forward_features(self, x):
x, _, _ = self.patch_embed(x.type(self.dtype))
batch_size, seq_len, _ = x.size()
cls_tokens = self.cls_token.expand(batch_size, -1, -1)
x = torch.cat((cls_tokens, x), dim=1)
x = x + self.pos_embed
for idx, blk in enumerate(self.blocks):
x = blk(x)
return x
def encode_image(self, image):
x = self.forward_features(image)
x = self.clip_projector(x)
return x
def encode_text(self, text):
text_key_padding_mask = text > 0
x = self.transformer(input_ids=text, attention_mask=text_key_padding_mask).last_hidden_state
x = x[torch.arange(x.shape[0]), text_key_padding_mask.sum(1) - 1]
x = x @ self.text_projection
return x
def forward(self, image, text):
image_features = self.encode_image(image)
text_features = self.encode_text(text)
# normalized features
image_features = image_features / image_features.norm(dim=1, keepdim=True)
text_features = text_features / text_features.norm(dim=1, keepdim=True)
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_image = logit_scale * image_features @ text_features.t()
logits_per_text = logits_per_image.t()
return logits_per_image, logits_per_text
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import torch
import torch.nn as nn
import torchvision.transforms as T
from torchvision.transforms import InterpolationMode
from transformers import LlamaTokenizer
from .configuration_intern_vit import InternVisionConfig
from .configuration_internvl import InternVLConfig
from .modeling_intern_vit import InternVisionModel
from .modeling_internvl import InternVL_C, InternVL_G, InternVLModel
__all__ = ['InternVisionConfig', 'InternVisionModel', 'InternVLConfig',
'InternVLModel', 'InternVL_C', 'InternVL_G']
# Prefix the text "summarize:"
class InternVLTokenizer(nn.Module):
def __init__(self, model_path):
super(InternVLTokenizer, self).__init__()
self.tokenizer = LlamaTokenizer.from_pretrained(model_path)
self.tokenizer.pad_token = ' ' # allow padding
self.tokenizer.add_eos_token = True
def forward(self, text, prefix='summarize:'):
if type(text) == str:
text = prefix + text
elif type(text) == list:
text = [prefix + item for item in text]
text = self.tokenizer(text, return_tensors='pt', max_length=80, truncation=True, padding='max_length').input_ids
return text
def build_transform(task, image_size=224, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
if task == 'retrieval':
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize((image_size, image_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=mean, std=std)])
else:
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize(image_size, interpolation=InterpolationMode.BICUBIC),
T.CenterCrop(image_size),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
return transform
def load_internvl_c_huggingface(ckpt_path, device, task):
model = InternVL_C.from_pretrained(ckpt_path, torch_dtype=torch.float16).to(device)
if model.config.use_backbone_lora:
model.vision_model.merge_and_unload()
model.vision_model = model.vision_model.model
if model.config.use_qllama_lora:
model.qllama.merge_and_unload()
model.qllama = model.qllama.model
if model.config.force_image_size is not None:
image_size = model.config.force_image_size
else:
image_size = model.config.vision_config.image_size
transform = build_transform(task, image_size)
tokenizer = InternVLTokenizer(ckpt_path)
return model, transform, tokenizer
def load_internvl_g_huggingface(ckpt_path, device, task):
model = InternVL_G.from_pretrained(ckpt_path, torch_dtype=torch.float16).to(device)
if model.config.use_backbone_lora:
model.vision_model.merge_and_unload()
model.vision_model = model.vision_model.model
if model.config.use_qllama_lora:
model.qllama.merge_and_unload()
model.qllama = model.qllama.model
if model.config.force_image_size is not None:
image_size = model.config.force_image_size
else:
image_size = model.config.vision_config.image_size
transform = build_transform(task, image_size)
tokenizer = InternVLTokenizer(ckpt_path)
return model, transform, tokenizer
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import os
from typing import Union
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
class InternVisionConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`InternVisionModel`]. It is used to
instantiate a vision encoder according to the specified arguments, defining the model architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
num_channels (`int`, *optional*, defaults to 3):
Number of color channels in the input images (e.g., 3 for RGB).
patch_size (`int`, *optional*, defaults to 14):
The size (resolution) of each patch.
image_size (`int`, *optional*, defaults to 224):
The size (resolution) of each image.
qkv_bias (`bool`, *optional*, defaults to `False`):
Whether to add a bias to the queries and values in the self-attention layers.
hidden_size (`int`, *optional*, defaults to 3200):
Dimensionality of the encoder layers and the pooler layer.
num_attention_heads (`int`, *optional*, defaults to 25):
Number of attention heads for each attention layer in the Transformer encoder.
intermediate_size (`int`, *optional*, defaults to 12800):
Dimensionality of the "intermediate" (i.e., feed-forward) layer in the Transformer encoder.
qk_normalization (`bool`, *optional*, defaults to `True`):
Whether to normalize the queries and keys in the self-attention layers.
num_hidden_layers (`int`, *optional*, defaults to 48):
Number of hidden layers in the Transformer encoder.
use_flash_attn (`bool`, *optional*, defaults to `True`):
Whether to use flash attention mechanism.
hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
`"relu"`, `"selu"` and `"gelu_new"` ``"gelu"` are supported.
layer_norm_eps (`float`, *optional*, defaults to 1e-6):
The epsilon used by the layer normalization layers.
dropout (`float`, *optional*, defaults to 0.0):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
drop_path_rate (`float`, *optional*, defaults to 0.0):
Dropout rate for stochastic depth.
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout ratio for the attention probabilities.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
initializer_factor (`float`, *optional*, defaults to 0.1):
A factor for layer scale.
"""
model_type = 'intern_vit_6b'
def __init__(
self,
num_channels=3,
patch_size=14,
image_size=224,
qkv_bias=False,
hidden_size=3200,
num_attention_heads=25,
intermediate_size=12800,
qk_normalization=True,
num_hidden_layers=48,
use_flash_attn=True,
hidden_act='gelu',
layer_norm_eps=1e-6,
dropout=0.0,
drop_path_rate=0.0,
attention_dropout=0.0,
initializer_range=0.02,
initializer_factor=0.1,
**kwargs,
):
super().__init__(**kwargs)
self.hidden_size = hidden_size
self.intermediate_size = intermediate_size
self.dropout = dropout
self.drop_path_rate = drop_path_rate
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.num_channels = num_channels
self.patch_size = patch_size
self.image_size = image_size
self.initializer_range = initializer_range
self.initializer_factor = initializer_factor
self.attention_dropout = attention_dropout
self.layer_norm_eps = layer_norm_eps
self.hidden_act = hidden_act
self.qkv_bias = qkv_bias
self.qk_normalization = qk_normalization
self.use_flash_attn = use_flash_attn
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: Union[str, os.PathLike], **kwargs) -> 'PretrainedConfig':
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if 'vision_config' in config_dict:
config_dict = config_dict['vision_config']
if 'model_type' in config_dict and hasattr(cls, 'model_type') and config_dict['model_type'] != cls.model_type:
logger.warning(
f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
f'{cls.model_type}. This is not supported for all configurations of models and can yield errors.'
)
return cls.from_dict(config_dict, **kwargs)
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
import copy
from transformers import LlamaConfig
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
from .configuration_intern_vit import InternVisionConfig
logger = logging.get_logger(__name__)
class InternVLConfig(PretrainedConfig):
r"""
[`InternVLConfig`] is the configuration class to store the configuration of a
[`InternVLModel`]. It is used to instantiate a InternVLModel according to the specified
arguments, defining the InternViT-6B and QLLaMA configs. Instantiating a configuration with
the defaults will yield a similar configuration to that of the InternVL architecture.
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.
Args:
vision_config (`dict`, *optional*):
Dictionary of configuration options used to initialize [`InternVisionConfig`].
qllama_config (`dict`, *optional*):
Dictionary of configuration options used to initialize [`LLaMAConfig`].
clip_embed_dim (`int`, *optional*, defaults to 768):
Size of the embeddings from the CLIP model.
attn_pool_num_heads (`int`, *optional*, defaults to 16):
Number of attention heads used in the attention pooling layers.
num_query_token (`int`, *optional*, defaults to 96):
Number of query tokens used in the transformer.
label_smoothing (`float`, *optional*, defaults to 0.0):
The amount of label smoothing to apply.
cross_attention_frequency (`int`, *optional*, defaults to 2):
The frequency of cross-attention layers in the model.
use_backbone_lora (`int`, *optional*, defaults to 0):
If non-zero, indicates the use of LoRA in the backbone of the model.
use_qllama_lora (`int`, *optional*, defaults to 0):
If non-zero, indicates the use of LoRA in the QLLaMA of the model.
force_image_size (`int` or `None`, *optional*):
If not None, forces the model to use this specific image size.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
kwargs (*optional*):
Dictionary of additional keyword arguments.
"""
model_type = 'internvl'
is_composition = True
def __init__(
self,
vision_config=None,
qllama_config=None,
clip_embed_dim=768,
attn_pool_num_heads=16,
num_query_token=96,
label_smoothing=0.0,
cross_attention_frequency=2,
use_backbone_lora=0,
use_qllama_lora=0,
force_image_size=None,
initializer_range=0.02,
**kwargs):
super().__init__(**kwargs)
if vision_config is None:
vision_config = {}
logger.info('vision_config is None. initializing the InternVisionConfig with default values.')
if qllama_config is None:
qllama_config = {}
logger.info(
'qllama_config is None. Initializing the InternTextConfig config with default values (`LlamaConfig`).')
self.vision_config = InternVisionConfig(**vision_config)
self.qllama_config = LlamaConfig(**qllama_config)
self.qllama_config.num_query_token = num_query_token
self.qllama_config.cross_attention_frequency = cross_attention_frequency
self.hidden_size = self.qllama_config.hidden_size
self.clip_embed_dim = clip_embed_dim
self.attn_pool_num_heads = attn_pool_num_heads
self.num_query_token = num_query_token
self.label_smoothing = label_smoothing
self.use_backbone_lora = use_backbone_lora
self.use_qllama_lora = use_qllama_lora
self.force_image_size = force_image_size
self.initializer_range = initializer_range
def to_dict(self):
"""
Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
Returns:
`Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
"""
output = copy.deepcopy(self.__dict__)
output['vision_config'] = self.vision_config.to_dict()
output['qllama_config'] = self.qllama_config.to_dict()
output['model_type'] = self.__class__.model_type
return output
# https://github.com/Dao-AILab/flash-attention/blob/v0.2.8/flash_attn/flash_attention.py
import torch
import torch.nn as nn
from einops import rearrange
try: # v1
from flash_attn.flash_attn_interface import \
flash_attn_unpadded_qkvpacked_func
except: # v2
from flash_attn.flash_attn_interface import flash_attn_varlen_qkvpacked_func as flash_attn_unpadded_qkvpacked_func
from flash_attn.bert_padding import pad_input, unpad_input
class FlashAttention(nn.Module):
"""Implement the scaled dot product attention with softmax.
Arguments
---------
softmax_scale: The temperature to use for the softmax attention.
(default: 1/sqrt(d_keys) where d_keys is computed at
runtime)
attention_dropout: The dropout rate to apply to the attention
(default: 0.0)
"""
def __init__(self, softmax_scale=None, attention_dropout=0.0, device=None, dtype=None):
super().__init__()
self.softmax_scale = softmax_scale
self.dropout_p = attention_dropout
def forward(self, qkv, key_padding_mask=None, causal=False, cu_seqlens=None,
max_s=None, need_weights=False):
"""Implements the multihead softmax attention.
Arguments
---------
qkv: The tensor containing the query, key, and value. (B, S, 3, H, D) if key_padding_mask is None
if unpadded: (nnz, 3, h, d)
key_padding_mask: a bool tensor of shape (B, S)
"""
assert not need_weights
assert qkv.dtype in [torch.float16, torch.bfloat16]
assert qkv.is_cuda
if cu_seqlens is None:
batch_size = qkv.shape[0]
seqlen = qkv.shape[1]
if key_padding_mask is None:
qkv = rearrange(qkv, 'b s ... -> (b s) ...')
max_s = seqlen
cu_seqlens = torch.arange(0, (batch_size + 1) * seqlen, step=seqlen, dtype=torch.int32,
device=qkv.device)
output = flash_attn_unpadded_qkvpacked_func(
qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
output = rearrange(output, '(b s) ... -> b s ...', b=batch_size)
else:
nheads = qkv.shape[-2]
x = rearrange(qkv, 'b s three h d -> b s (three h d)')
x_unpad, indices, cu_seqlens, max_s = unpad_input(x, key_padding_mask)
x_unpad = rearrange(x_unpad, 'nnz (three h d) -> nnz three h d', three=3, h=nheads)
output_unpad = flash_attn_unpadded_qkvpacked_func(
x_unpad, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
output = rearrange(pad_input(rearrange(output_unpad, 'nnz h d -> nnz (h d)'),
indices, batch_size, seqlen),
'b s (h d) -> b s h d', h=nheads)
else:
assert max_s is not None
output = flash_attn_unpadded_qkvpacked_func(
qkv, cu_seqlens, max_s, self.dropout_p if self.training else 0.0,
softmax_scale=self.softmax_scale, causal=causal
)
return output, None
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from typing import Optional, Tuple, Union
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from einops import rearrange
from timm.models.layers import DropPath
from torch import nn
from transformers.activations import ACT2FN
from transformers.modeling_outputs import (BaseModelOutput,
BaseModelOutputWithPooling)
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import logging
from .configuration_intern_vit import InternVisionConfig
try:
from .flash_attention import FlashAttention
has_flash_attn = True
except:
print('FlashAttention is not installed.')
has_flash_attn = False
logger = logging.get_logger(__name__)
class InternRMSNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-6):
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.variance_epsilon = eps
def forward(self, hidden_states):
input_dtype = hidden_states.dtype
hidden_states = hidden_states.to(torch.float32)
variance = hidden_states.pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
return self.weight * hidden_states.to(input_dtype)
try:
from apex.normalization import FusedRMSNorm
InternRMSNorm = FusedRMSNorm # noqa
logger.info('Discovered apex.normalization.FusedRMSNorm - will use it instead of InternRMSNorm')
except ImportError:
# using the normal InternRMSNorm
pass
except Exception:
logger.warning('discovered apex but it failed to load, falling back to InternRMSNorm')
pass
class InternVisionEmbeddings(nn.Module):
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
self.embed_dim = config.hidden_size
self.image_size = config.image_size
self.patch_size = config.patch_size
self.class_embedding = nn.Parameter(
torch.randn(1, 1, self.embed_dim),
)
self.patch_embedding = nn.Conv2d(
in_channels=3, out_channels=self.embed_dim, kernel_size=self.patch_size, stride=self.patch_size
)
self.num_patches = (self.image_size // self.patch_size) ** 2
self.num_positions = self.num_patches + 1
self.position_embedding = nn.Parameter(torch.randn(1, self.num_positions, self.embed_dim))
def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
batch_size = pixel_values.shape[0]
target_dtype = self.patch_embedding.weight.dtype
patch_embeds = self.patch_embedding(pixel_values) # shape = [*, width, grid, grid]
patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
class_embeds = self.class_embedding.expand(batch_size, 1, -1).to(target_dtype)
embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
embeddings = embeddings + self.position_embedding.to(target_dtype)
return embeddings
class InternAttention(nn.Module):
"""Multi-headed attention from 'Attention Is All You Need' paper"""
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
self.embed_dim = config.hidden_size
self.num_heads = config.num_attention_heads
self.use_flash_attn = config.use_flash_attn and has_flash_attn
if config.use_flash_attn and not has_flash_attn:
print('Warning: Flash Attention is not available, use_flash_attn is set to False.')
self.head_dim = self.embed_dim // self.num_heads
if self.head_dim * self.num_heads != self.embed_dim:
raise ValueError(
f'embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:'
f' {self.num_heads}).'
)
self.scale = self.head_dim ** -0.5
self.qkv = nn.Linear(self.embed_dim, 3 * self.embed_dim, bias=config.qkv_bias)
self.attn_drop = nn.Dropout(config.attention_dropout)
self.proj_drop = nn.Dropout(config.dropout)
self.qk_normalization = config.qk_normalization
if self.qk_normalization:
self.q_norm = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
self.k_norm = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
if self.use_flash_attn:
self.inner_attn = FlashAttention(attention_dropout=config.attention_dropout)
self.proj = nn.Linear(self.embed_dim, self.embed_dim)
def _naive_attn(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple)
if self.qk_normalization:
B_, H_, N_, D_ = q.shape
q = self.q_norm(q.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
k = self.k_norm(k.transpose(1, 2).flatten(-2, -1)).view(B_, N_, H_, D_).transpose(1, 2)
attn = ((q * self.scale) @ k.transpose(-2, -1))
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
def _flash_attn(self, x, key_padding_mask=None, need_weights=False):
qkv = self.qkv(x)
qkv = rearrange(qkv, 'b s (three h d) -> b s three h d', three=3, h=self.num_heads)
if self.qk_normalization:
q, k, v = qkv.unbind(2)
q = self.q_norm(q.flatten(-2, -1)).view(q.shape)
k = self.k_norm(k.flatten(-2, -1)).view(k.shape)
qkv = torch.stack([q, k, v], dim=2)
context, _ = self.inner_attn(
qkv, key_padding_mask=key_padding_mask, need_weights=need_weights, causal=False
)
outs = self.proj(rearrange(context, 'b s h d -> b s (h d)'))
outs = self.proj_drop(outs)
return outs
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
x = self._naive_attn(hidden_states) if not self.use_flash_attn else self._flash_attn(hidden_states)
return x
class InternMLP(nn.Module):
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
self.act = ACT2FN[config.hidden_act]
self.fc1 = nn.Linear(config.hidden_size, config.intermediate_size)
self.fc2 = nn.Linear(config.intermediate_size, config.hidden_size)
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
hidden_states = self.fc1(hidden_states)
hidden_states = self.act(hidden_states)
hidden_states = self.fc2(hidden_states)
return hidden_states
class InternVisionEncoderLayer(nn.Module):
def __init__(self, config: InternVisionConfig, drop_path_rate: float):
super().__init__()
self.embed_dim = config.hidden_size
self.intermediate_size = config.intermediate_size
self.attn = InternAttention(config)
self.mlp = InternMLP(config)
self.norm1 = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
self.norm2 = InternRMSNorm(self.embed_dim, eps=config.layer_norm_eps)
self.ls1 = nn.Parameter(config.initializer_factor * torch.ones(self.embed_dim))
self.ls2 = nn.Parameter(config.initializer_factor * torch.ones(self.embed_dim))
self.drop_path1 = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
self.drop_path2 = DropPath(drop_path_rate) if drop_path_rate > 0. else nn.Identity()
def forward(
self,
hidden_states: torch.Tensor,
) -> Tuple[torch.FloatTensor, Optional[torch.FloatTensor], Optional[Tuple[torch.FloatTensor]]]:
"""
Args:
hidden_states (`Tuple[torch.FloatTensor, Optional[torch.FloatTensor]]`): input to the layer of shape `(batch, seq_len, embed_dim)`
"""
hidden_states = hidden_states + self.drop_path1(self.attn(self.norm1(hidden_states)) * self.ls1)
hidden_states = hidden_states + self.drop_path2(self.mlp(self.norm2(hidden_states)) * self.ls2)
return hidden_states
class InternVisionEncoder(nn.Module):
"""
Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
[`InternEncoderLayer`].
Args:
config (`InternConfig`):
The corresponding vision configuration for the `InternEncoder`.
"""
def __init__(self, config: InternVisionConfig):
super().__init__()
self.config = config
# stochastic depth decay rule
dpr = [x.item() for x in torch.linspace(0, config.drop_path_rate, config.num_hidden_layers)]
self.layers = nn.ModuleList([
InternVisionEncoderLayer(config, dpr[idx]) for idx in range(config.num_hidden_layers)])
self.gradient_checkpointing = True
def forward(
self,
inputs_embeds,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutput]:
r"""
Args:
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
Embedded representation of the inputs. Should be float, not int tokens.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
for more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
encoder_states = () if output_hidden_states else None
hidden_states = inputs_embeds
for idx, encoder_layer in enumerate(self.layers):
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
if self.gradient_checkpointing and self.training:
layer_outputs = torch.utils.checkpoint.checkpoint(
encoder_layer,
hidden_states)
else:
layer_outputs = encoder_layer(
hidden_states,
)
hidden_states = layer_outputs
if output_hidden_states:
encoder_states = encoder_states + (hidden_states,)
if not return_dict:
return tuple(v for v in [hidden_states, encoder_states] if v is not None)
return BaseModelOutput(
last_hidden_state=hidden_states, hidden_states=encoder_states
)
class InternVisionModel(PreTrainedModel):
main_input_name = 'pixel_values'
config_class = InternVisionConfig
def __init__(self, config: InternVisionConfig):
super().__init__(config)
self.config = config
self.embeddings = InternVisionEmbeddings(config)
self.encoder = InternVisionEncoder(config)
def resize_pos_embeddings(self, old_size, new_size, patch_size):
pos_emb = self.embeddings.position_embedding
_, num_positions, embed_dim = pos_emb.shape
cls_emb = pos_emb[:, :1, :]
pos_emb = pos_emb[:, 1:, :].reshape(1, old_size // patch_size, old_size // patch_size, -1).permute(0, 3, 1, 2)
pos_emb = F.interpolate(pos_emb.float(), size=new_size // patch_size, mode='bicubic', align_corners=False)
pos_emb = pos_emb.to(cls_emb.dtype).reshape(1, embed_dim, -1).permute(0, 2, 1)
pos_emb = torch.cat([cls_emb, pos_emb], dim=1)
self.embeddings.position_embedding = nn.Parameter(pos_emb)
logger.info('Resized position embeddings from {} to {}'.format(old_size, new_size))
def get_input_embeddings(self):
return self.embeddings
def forward(
self,
pixel_values: Optional[torch.FloatTensor] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
pixel_embeds: Optional[torch.FloatTensor] = None,
) -> Union[Tuple, BaseModelOutputWithPooling]:
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
if pixel_values is None and pixel_embeds is None:
raise ValueError('You have to specify pixel_values or pixel_embeds')
if pixel_embeds is not None:
hidden_states = pixel_embeds
else:
if len(pixel_values.shape) == 4:
hidden_states = self.embeddings(pixel_values)
else:
raise ValueError(f'wrong pixel_values size: {pixel_values.shape}')
encoder_outputs = self.encoder(
inputs_embeds=hidden_states,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)
last_hidden_state = encoder_outputs.last_hidden_state
pooled_output = last_hidden_state[:, 0, :]
if not return_dict:
return (last_hidden_state, pooled_output) + encoder_outputs[1:]
return BaseModelOutputWithPooling(
last_hidden_state=last_hidden_state,
pooler_output=pooled_output,
hidden_states=encoder_outputs.hidden_states,
attentions=encoder_outputs.attentions,
)
# --------------------------------------------------------
# InternVL
# Copyright (c) 2023 OpenGVLab
# Licensed under The MIT License [see LICENSE for details]
# --------------------------------------------------------
from functools import partial
from typing import Optional
import numpy as np
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from peft import LoraConfig, get_peft_model
from timm.models.layers import DropPath
from torch import nn
from transformers import GenerationConfig
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import logging
from .configuration_internvl import InternVLConfig
from .modeling_intern_vit import (InternVisionEmbeddings, InternVisionEncoder,
InternVisionModel)
from .modeling_qllama import LlamaForCausalLM, _expand_mask, _make_causal_mask
try:
from .flash_attention import FlashAttention # v1/v2
except:
print('FlashAttention is not installed.')
logger = logging.get_logger(__name__)
class InternVLPreTrainedModel(PreTrainedModel):
"""
An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
models.
"""
config_class = InternVLConfig
base_model_prefix = 'internvl'
supports_gradient_checkpointing = True
_keys_to_ignore_on_load_missing = [
r'position_ids',
]
_no_split_modules = ['InternAttention', 'LlamaDecoderLayer', 'LlamaForCausalLM']
_skip_keys_device_placement = 'past_key_values'
_keep_in_fp32_modules = ['wo']
def _init_weights(self, module):
"""Initialize the weights"""
factor = self.config.initializer_range
if isinstance(module, nn.Conv2d) or isinstance(module, nn.Embedding) or isinstance(module, nn.Linear):
module.weight.data.normal_(mean=0.0, std=factor)
if hasattr(module, 'bias') and module.bias is not None:
module.bias.data.zero_()
if isinstance(module, InternVisionEmbeddings):
if hasattr(self.config, 'vision_config'):
factor = self.config.vision_config.initializer_range
nn.init.trunc_normal_(module.position_embedding, mean=0.0, std=factor)
nn.init.trunc_normal_(module.class_embedding, mean=0.0, std=factor)
elif isinstance(module, nn.LayerNorm):
module.bias.data.zero_()
module.weight.data.fill_(1.0)
elif isinstance(module, nn.Linear) and module.bias is not None:
module.bias.data.zero_()
def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, InternVisionModel):
module.gradient_checkpointing = value
if isinstance(module, InternVisionEncoder):
module.gradient_checkpointing = value
class CrossAttention(nn.Module):
def __init__(
self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
proj_drop=0., attn_head_dim=None, out_dim=None):
super().__init__()
if out_dim is None:
out_dim = dim
self.num_heads = num_heads
head_dim = dim // num_heads
if attn_head_dim is not None:
head_dim = attn_head_dim
all_head_dim = head_dim * self.num_heads
self.scale = qk_scale or head_dim ** -0.5
assert all_head_dim == dim
self.q = nn.Linear(dim, all_head_dim, bias=False)
self.k = nn.Linear(dim, all_head_dim, bias=False)
self.v = nn.Linear(dim, all_head_dim, bias=False)
if qkv_bias:
self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
self.k_bias = nn.Parameter(torch.zeros(all_head_dim))
self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
else:
self.q_bias = None
self.k_bias = None
self.v_bias = None
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(all_head_dim, out_dim)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x, k=None, v=None):
B, N, C = x.shape
N_k = k.shape[1]
N_v = v.shape[1]
q_bias, k_bias, v_bias = None, None, None
if self.q_bias is not None:
q_bias = self.q_bias
k_bias = self.k_bias
v_bias = self.v_bias
q = F.linear(input=x, weight=self.q.weight, bias=q_bias)
q = q.reshape(B, N, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0) # (B, N_head, N_q, dim)
k = F.linear(input=k, weight=self.k.weight, bias=k_bias)
k = k.reshape(B, N_k, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)
v = F.linear(input=v, weight=self.v.weight, bias=v_bias)
v = v.reshape(B, N_v, 1, self.num_heads, -1).permute(2, 0, 3, 1, 4).squeeze(0)
q = q * self.scale
attn = (q @ k.transpose(-2, -1)) # (B, N_head, N_q, N_k)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
x = self.proj(x)
x = self.proj_drop(x)
return x
class AttentiveBlock(nn.Module):
def __init__(self, dim, num_heads, qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
drop_path=0., norm_layer=nn.LayerNorm, attn_head_dim=None, out_dim=None):
super().__init__()
self.norm1_q = norm_layer(dim)
self.norm1_k = norm_layer(dim)
self.norm1_v = norm_layer(dim)
self.cross_attn = CrossAttention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop,
proj_drop=drop, attn_head_dim=attn_head_dim, out_dim=out_dim)
self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
def forward(self, x_q, x_kv, pos_q, pos_k, bool_masked_pos, rel_pos_bias=None):
x_q = self.norm1_q(x_q + pos_q)
x_k = self.norm1_k(x_kv + pos_k)
x_v = self.norm1_v(x_kv)
x = self.cross_attn(x_q, k=x_k, v=x_v)
return x
class AttentionPoolingBlock(AttentiveBlock):
def forward(self, x):
x_q = x.mean(1, keepdim=True)
x_kv, pos_q, pos_k = x, 0, 0
x = super().forward(x_q, x_kv, pos_q, pos_k, bool_masked_pos=None, rel_pos_bias=None)
x = x.squeeze(1)
return x
class InternVLModel(InternVLPreTrainedModel):
config_class = InternVLConfig
main_input_name = 'pixel_values'
def __init__(self, config: InternVLConfig):
super().__init__(config)
text_hidden_size = config.qllama_config.hidden_size
vision_hidden_size = config.vision_config.hidden_size
clip_embed_dim = config.clip_embed_dim
attn_pool_num_heads = config.attn_pool_num_heads
config.qllama_config.num_query_token = config.num_query_token
self.num_query_token = config.num_query_token
self.label_smoothing = config.label_smoothing
self.vision_model = InternVisionModel(config.vision_config) # frozen
self.qllama = LlamaForCausalLM(config.qllama_config) # frozen
self.query_tokens = nn.Parameter( # trainable
torch.zeros(1, config.num_query_token, text_hidden_size)
)
self.text_projection = nn.Parameter(torch.empty(text_hidden_size, clip_embed_dim)) # frozen
self.logit_scale = nn.Parameter(torch.ones([]) * np.log(1 / 0.07)) # trainable
self.clip_projector = AttentionPoolingBlock( # frozen
dim=vision_hidden_size, num_heads=attn_pool_num_heads, qkv_bias=True, qk_scale=None,
drop=0., attn_drop=0., norm_layer=partial(nn.LayerNorm, eps=1e-5), out_dim=clip_embed_dim)
self.clip_projector2 = AttentionPoolingBlock( # trainable
dim=text_hidden_size, num_heads=attn_pool_num_heads, qkv_bias=True, qk_scale=None,
drop=0., attn_drop=0., norm_layer=partial(nn.LayerNorm, eps=1e-5), out_dim=clip_embed_dim)
self.itm_head = nn.Linear(text_hidden_size, 2) # trainable
self.gradient_checkpointing = True
# Initialize weights and apply final processing
# self.post_init()
if config.use_backbone_lora:
self.wrap_backbone_lora(r=config.use_backbone_lora)
if config.use_qllama_lora:
self.wrap_qllama_lora(r=config.use_qllama_lora)
if config.force_image_size:
self.vision_model.resize_pos_embeddings(
old_size=config.vision_config.image_size,
new_size=config.force_image_size,
patch_size=config.vision_config.patch_size
)
def wrap_backbone_lora(self, r=128, lora_alpha=256, lora_dropout=0.05):
lora_config = LoraConfig(
r=r,
target_modules=['attn.qkv', 'attn.proj', 'mlp.fc1', 'mlp.fc2'],
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
)
self.vision_model = get_peft_model(self.vision_model, lora_config)
self.vision_model.print_trainable_parameters()
def wrap_qllama_lora(self, r=128, lora_alpha=256, lora_dropout=0.05):
lora_config = LoraConfig(
r=r,
target_modules=['self_attn.q_proj', 'self_attn.k_proj', 'self_attn.v_proj', 'self_attn.o_proj',
'mlp.gate_proj', 'mlp.down_proj', 'mlp.up_proj'],
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
)
self.qllama = get_peft_model(self.qllama, lora_config)
self.qllama.print_trainable_parameters()
def get_input_embeddings(self):
return self.qllama.get_input_embeddings()
def set_input_embeddings(self, value):
self.qllama.set_input_embeddings(value)
def set_output_embeddings(self, new_embeddings):
self.qllama.set_output_embeddings(new_embeddings)
def get_output_embeddings(self) -> nn.Module:
return self.qllama.get_output_embeddings()
@torch.no_grad()
def generate(
self,
pixel_values: torch.FloatTensor,
input_ids: torch.FloatTensor,
attention_mask: torch.LongTensor,
generation_config: Optional[GenerationConfig] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
**generate_kwargs,
) -> torch.LongTensor:
vision_outputs = self.vision_model(
pixel_values=pixel_values,
output_hidden_states=output_hidden_states,
return_dict=return_dict)
image_embeds = vision_outputs[0]
batch_size = image_embeds.shape[0]
input_embeds = self.get_input_embeddings()(input_ids)
query_tokens = self.query_tokens.repeat(batch_size, 1, 1)
input_embeds = torch.cat([query_tokens, input_embeds], dim=1)
image_attention_mask = torch.ones(query_tokens.size()[:-1], dtype=torch.long, device=image_embeds.device)
attention_mask = torch.cat([image_attention_mask, attention_mask], dim=1)
outputs = self.qllama.generate(
inputs_embeds=input_embeds,
attention_mask=attention_mask,
vision_hidden_states=image_embeds,
generation_config=generation_config,
use_zero_attention_mask=True,
**generate_kwargs,
)
return outputs
def get_text_features(
self,
input_ids: torch.Tensor,
attention_mask: torch.Tensor,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
):
r"""
Returns:
text_outputs (`CausalLMOutputWithPast`, or `tuple(torch.FloatTensor)` if `return_dict=False`):
The language model outputs. If `return_dict=True`, the output is a [`CausalLMOutputWithPast`] that
contains the language model logits, the past key values and the hidden states if
`output_hidden_states=True`.
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
input_embeds = self.get_input_embeddings()(input_ids)
attention_mask = _expand_mask(attention_mask, input_embeds.dtype).to(
input_embeds.device) # [bsz, 1, tgt_seq_len, src_seq_len]
attention_mask += _make_causal_mask(
(attention_mask.shape[0], attention_mask.shape[2]),
input_embeds.dtype,
device=input_embeds.device
)
if type(self.qllama.model) == LlamaForCausalLM:
outputs = self.qllama.model.model.forward_train(
inputs_embeds=input_embeds,
vision_hidden_states=None,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
).last_hidden_state
else:
outputs = self.qllama.model.forward_train(
inputs_embeds=input_embeds,
vision_hidden_states=None,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
).last_hidden_state
return outputs
def get_image_features(
self,
pixel_values: torch.FloatTensor,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
):
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
vision_outputs = self.vision_model(
pixel_values=pixel_values,
output_hidden_states=output_hidden_states,
return_dict=return_dict)
image_embeds = vision_outputs[0]
backbone_embeds = image_embeds
batch_size = image_embeds.shape[0]
input_embeds = self.query_tokens.repeat(batch_size, 1, 1)
attention_mask = torch.ones(input_embeds.size()[:-1], dtype=torch.long, device=image_embeds.device)
attention_mask = _expand_mask(attention_mask, input_embeds.dtype).to(
input_embeds.device) # [bsz, 1, tgt_seq_len, src_seq_len]
if type(self.qllama.model) == LlamaForCausalLM:
outputs = self.qllama.model.model.forward_train(
inputs_embeds=input_embeds,
vision_hidden_states=image_embeds,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
).last_hidden_state
else:
outputs = self.qllama.model.forward_train(
inputs_embeds=input_embeds,
vision_hidden_states=image_embeds,
attention_mask=attention_mask,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
).last_hidden_state
return backbone_embeds, outputs
def encode_image(self, image, mode):
if mode == 'InternVL-C':
vision_outputs = self.vision_model(
pixel_values=image,
output_hidden_states=False,
return_dict=True)
image_embeds = vision_outputs[0]
image_embeds = self.clip_projector(image_embeds)
elif mode == 'InternVL-G':
backbone_embeds, image_embeds = self.get_image_features(
pixel_values=image,
output_hidden_states=False,
return_dict=True,
)
backbone_embeds = self.clip_projector(backbone_embeds)
image_embeds = self.clip_projector2(image_embeds)
# ensemble
backbone_embeds = backbone_embeds / backbone_embeds.norm(dim=1, keepdim=True)
image_embeds = image_embeds / image_embeds.norm(dim=1, keepdim=True)
image_embeds = image_embeds + backbone_embeds
else:
raise NotImplementedError
return image_embeds
def encode_text(self, text):
attention_mask = text > 0
text_embeds = self.get_text_features(
input_ids=text,
attention_mask=attention_mask,
output_attentions=False,
output_hidden_states=False,
return_dict=True,
)
text_embeds = text_embeds[torch.arange(text_embeds.shape[0]), attention_mask.sum(1) - 1]
text_embeds = text_embeds @ self.text_projection
return text_embeds
def forward(self, image, text, mode='InternVL-C'):
assert mode in ['InternVL-C', 'InternVL-G'], 'mode must be InternVL-C or InternVL-G'
image_features = self.encode_image(image, mode)
text_features = self.encode_text(text)
# normalized features
image_features = image_features / image_features.norm(dim=1, keepdim=True)
text_features = text_features / text_features.norm(dim=1, keepdim=True)
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_image = logit_scale * image_features @ text_features.t()
logits_per_text = logits_per_image.t()
return logits_per_image, logits_per_text
class InternVL_C(InternVLModel):
def encode_image(self, image):
vision_outputs = self.vision_model(
pixel_values=image,
output_hidden_states=False,
return_dict=True)
image_embeds = vision_outputs[0]
image_embeds = self.clip_projector(image_embeds)
return image_embeds
def encode_text(self, text):
attention_mask = text > 0
text_embeds = self.get_text_features(
input_ids=text,
attention_mask=attention_mask,
output_attentions=False,
output_hidden_states=False,
return_dict=True,
)
text_embeds = text_embeds[torch.arange(text_embeds.shape[0]), attention_mask.sum(1) - 1]
text_embeds = text_embeds @ self.text_projection
return text_embeds
def forward(self, image, text):
image_features = self.encode_image(image)
text_features = self.encode_text(text)
# normalized features
image_features = image_features / image_features.norm(dim=1, keepdim=True)
text_features = text_features / text_features.norm(dim=1, keepdim=True)
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_image = logit_scale * image_features @ text_features.t()
logits_per_text = logits_per_image.t()
return logits_per_image, logits_per_text
class InternVL_G(InternVLModel):
def encode_image(self, image):
backbone_embeds, image_embeds = self.get_image_features(
pixel_values=image,
output_hidden_states=False,
return_dict=True,
)
backbone_embeds = self.clip_projector(backbone_embeds)
image_embeds = self.clip_projector2(image_embeds)
# ensemble
backbone_embeds = backbone_embeds / backbone_embeds.norm(dim=1, keepdim=True)
image_embeds = image_embeds / image_embeds.norm(dim=1, keepdim=True)
image_embeds = image_embeds + backbone_embeds
return image_embeds
def encode_text(self, text):
attention_mask = text > 0
text_embeds = self.get_text_features(
input_ids=text,
attention_mask=attention_mask,
output_attentions=False,
output_hidden_states=False,
return_dict=True,
)
text_embeds = text_embeds[torch.arange(text_embeds.shape[0]), attention_mask.sum(1) - 1]
text_embeds = text_embeds @ self.text_projection
return text_embeds
def forward(self, image, text):
image_features = self.encode_image(image)
text_features = self.encode_text(text)
# normalized features
image_features = image_features / image_features.norm(dim=1, keepdim=True)
text_features = text_features / text_features.norm(dim=1, keepdim=True)
# cosine similarity as logits
logit_scale = self.logit_scale.exp()
logits_per_image = logit_scale * image_features @ text_features.t()
logits_per_text = logits_per_image.t()
return logits_per_image, logits_per_text
# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" PyTorch QLLaMA model."""
import math
from typing import List, Optional, Tuple, Union
import torch
import torch.utils.checkpoint
from torch import nn
from torch.nn import CrossEntropyLoss
from transformers import LlamaConfig
from transformers.activations import ACT2FN
from transformers.modeling_outputs import (BaseModelOutputWithPast,
CausalLMOutputWithPast)
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import (add_start_docstrings,
add_start_docstrings_to_model_forward, logging,
replace_return_docstrings)
logger = logging.get_logger(__name__)
_CONFIG_FOR_DOC = 'LlamaConfig'
# Copied from transformers.models.bart.modeling_bart._make_causal_mask
def _make_causal_mask(
input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0
):
"""
Make causal mask used for bi-directional self-attention.
"""
bsz, tgt_len = input_ids_shape
mask = torch.full((tgt_len, tgt_len), torch.finfo(dtype).min, device=device)
mask_cond = torch.arange(mask.size(-1), device=device)
mask.masked_fill_(mask_cond < (mask_cond + 1).view(mask.size(-1), 1), 0)
mask = mask.to(dtype)
if past_key_values_length > 0:
mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1)
return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length)
# Copied from transformers.models.bart.modeling_bart._expand_mask
def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None):
"""
Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
"""
bsz, src_len = mask.size()
tgt_len = tgt_len if tgt_len is not None else src_len
expanded_mask = mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(dtype)
inverted_mask = 1.0 - expanded_mask
return inverted_mask.masked_fill(inverted_mask.to(torch.bool), torch.finfo(dtype).min)
class LlamaRMSNorm(nn.Module):
def __init__(self, hidden_size, eps=1e-6):
"""
LlamaRMSNorm is equivalent to T5LayerNorm
"""
super().__init__()
self.weight = nn.Parameter(torch.ones(hidden_size))
self.variance_epsilon = eps
def forward(self, hidden_states):
variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
# convert into half-precision if necessary
if self.weight.dtype in [torch.float16, torch.bfloat16]:
hidden_states = hidden_states.to(self.weight.dtype)
return self.weight * hidden_states
try:
from functools import partial
from apex.normalization import FusedRMSNorm
LlamaRMSNorm = partial(FusedRMSNorm, eps=1e-6) # noqa
print('Discovered apex.normalization.FusedRMSNorm - will use it instead of LlamaRMSNorm')
except ImportError:
# using the normal LlamaRMSNorm
pass
except Exception:
print('discovered apex but it failed to load, falling back to LlamaRMSNorm')
pass
class LlamaRotaryEmbedding(torch.nn.Module):
def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
super().__init__()
inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float().to(device) / dim))
self.register_buffer('inv_freq', inv_freq)
# Build here to make `torch.jit.trace` work.
self.max_seq_len_cached = max_position_embeddings
t = torch.arange(self.max_seq_len_cached, device=self.inv_freq.device, dtype=self.inv_freq.dtype)
freqs = torch.einsum('i,j->ij', t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer('cos_cached', emb.cos()[None, None, :, :], persistent=False)
self.register_buffer('sin_cached', emb.sin()[None, None, :, :], persistent=False)
def forward(self, x, seq_len=None):
# x: [bs, num_attention_heads, seq_len, head_size]
# This `if` block is unlikely to be run after we build sin/cos in `__init__`. Keep the logic here just in case.
if seq_len > self.max_seq_len_cached:
self.max_seq_len_cached = seq_len
t = torch.arange(self.max_seq_len_cached, device=x.device, dtype=self.inv_freq.dtype)
freqs = torch.einsum('i,j->ij', t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1).to(x.device)
self.register_buffer('cos_cached', emb.cos()[None, None, :, :], persistent=False)
self.register_buffer('sin_cached', emb.sin()[None, None, :, :], persistent=False)
return (
self.cos_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
self.sin_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
)
class FixedLlamaRotaryEmbedding(torch.nn.Module):
def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
super().__init__()
self.dim = dim
self.max_position_embeddings = max_position_embeddings
self.base = base
self.inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
# Build here to make `torch.jit.trace` work.
self._set_cos_sin_cache(
seq_len=max_position_embeddings, device=self.inv_freq.device, dtype=torch.get_default_dtype()
)
def _set_cos_sin_cache(self, seq_len, device, dtype):
self.max_seq_len_cached = seq_len
t = torch.arange(self.max_seq_len_cached, device=self.inv_freq.device, dtype=torch.float32)
freqs = torch.outer(t, self.inv_freq)
# Different from paper, but it uses a different permutation in order to obtain the same calculation
emb = torch.cat((freqs, freqs), dim=-1)
self.register_buffer('cos_cached', emb.cos()[None, None, :, :], persistent=False)
self.register_buffer('sin_cached', emb.sin()[None, None, :, :], persistent=False)
def forward(self, x, seq_len=None):
# x: [bs, num_attention_heads, seq_len, head_size]
if seq_len > self.max_seq_len_cached:
self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
return (
self.cos_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
self.sin_cached[:, :, :seq_len, ...].to(dtype=x.dtype),
)
LlamaRotaryEmbedding = FixedLlamaRotaryEmbedding
def rotate_half(x):
"""Rotates half the hidden dims of the input."""
x1 = x[..., : x.shape[-1] // 2]
x2 = x[..., x.shape[-1] // 2:]
return torch.cat((-x2, x1), dim=-1)
def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
gather_indices = position_ids[:, None, :, None] # [bs, 1, seq_len, 1]
gather_indices = gather_indices.repeat(1, cos.shape[1], 1, cos.shape[3])
cos = torch.gather(cos.repeat(gather_indices.shape[0], 1, 1, 1), 2, gather_indices)
sin = torch.gather(sin.repeat(gather_indices.shape[0], 1, 1, 1), 2, gather_indices)
q_embed = (q * cos) + (rotate_half(q) * sin)
k_embed = (k * cos) + (rotate_half(k) * sin)
return q_embed, k_embed
class LlamaMLP(nn.Module):
def __init__(
self,
hidden_size: int,
intermediate_size: int,
hidden_act: str,
):
super().__init__()
self.gate_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
self.down_proj = nn.Linear(intermediate_size, hidden_size, bias=False)
self.up_proj = nn.Linear(hidden_size, intermediate_size, bias=False)
self.act_fn = ACT2FN[hidden_act]
def forward(self, x):
return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
class LlamaAttention(nn.Module):
"""Multi-headed attention from 'Attention Is All You Need' paper"""
def __init__(self, config: LlamaConfig):
super().__init__()
self.config = config
self.hidden_size = config.hidden_size
self.num_heads = config.num_attention_heads
self.head_dim = self.hidden_size // self.num_heads
self.max_position_embeddings = config.max_position_embeddings
if (self.head_dim * self.num_heads) != self.hidden_size:
raise ValueError(
f'hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}'
f' and `num_heads`: {self.num_heads}).'
)
self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
self.k_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
self.v_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
self.rotary_emb = LlamaRotaryEmbedding(self.head_dim, max_position_embeddings=self.max_position_embeddings)
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
key_states = self.k_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
value_states = self.v_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
kv_seq_len = key_states.shape[-2]
if past_key_value is not None:
kv_seq_len += past_key_value[0].shape[-2]
cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin, position_ids)
# [bsz, nh, t, hd]
if past_key_value is not None:
# reuse k, v, self_attention
key_states = torch.cat([past_key_value[0], key_states], dim=2)
value_states = torch.cat([past_key_value[1], value_states], dim=2)
past_key_value = (key_states, value_states) if use_cache else None
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
raise ValueError(
f'Attention weights should be of size {(bsz * self.num_heads, q_len, kv_seq_len)}, but is'
f' {attn_weights.size()}'
)
if attention_mask is not None:
if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
raise ValueError(
f'Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}'
)
attn_weights = attn_weights + attention_mask
attn_weights = torch.max(attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min))
# upcast attention to fp32
attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
attn_output = torch.matmul(attn_weights, value_states)
if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
raise ValueError(
f'`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is'
f' {attn_output.size()}'
)
attn_output = attn_output.transpose(1, 2)
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
attn_output = self.o_proj(attn_output)
if not output_attentions:
attn_weights = None
return attn_output, attn_weights, past_key_value
class LlamaCrossAttention(nn.Module):
"""Multi-headed attention from 'Attention Is All You Need' paper"""
def __init__(self, config: LlamaConfig):
super().__init__()
self.config = config
self.hidden_size = config.hidden_size
self.num_heads = config.num_attention_heads
self.head_dim = self.hidden_size // self.num_heads
self.max_position_embeddings = config.max_position_embeddings
self.vision_hidden_size = 3200
if (self.head_dim * self.num_heads) != self.hidden_size:
raise ValueError(
f'hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}'
f' and `num_heads`: {self.num_heads}).'
)
self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=False)
self.o_proj = nn.Linear(self.num_heads * self.head_dim, self.hidden_size, bias=False)
self.norm1 = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.k_proj = nn.Linear(self.vision_hidden_size, self.num_heads * self.head_dim, bias=False)
self.v_proj = nn.Linear(self.vision_hidden_size, self.num_heads * self.head_dim, bias=False)
self.norm2 = LlamaRMSNorm(self.vision_hidden_size, eps=config.rms_norm_eps)
def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
def forward(
self,
hidden_states: torch.Tensor,
vision_hidden_states: torch.Tensor,
repeat_time: int = 1,
attention_mask: Optional[torch.Tensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
hidden_states = self.norm1(hidden_states)
bsz, q_len, _ = hidden_states.size()
query_states = self.q_proj(hidden_states).view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
vision_hidden_states = self.norm2(vision_hidden_states)
bs_v, kv_len, _ = vision_hidden_states.size()
key_states = self.k_proj(vision_hidden_states).view(
bs_v, kv_len, self.num_heads, self.head_dim).transpose(1, 2)
value_states = self.v_proj(vision_hidden_states).view(
bs_v, kv_len, self.num_heads, self.head_dim).transpose(1, 2)
key_states = key_states.repeat(repeat_time, 1, 1, 1)
value_states = value_states.repeat(repeat_time, 1, 1, 1)
kv_seq_len = key_states.shape[-2]
if past_key_value is not None:
kv_seq_len += past_key_value[0].shape[-2]
if past_key_value is not None:
# reuse k, v, self_attention
key_states = torch.cat([past_key_value[0], key_states], dim=2)
value_states = torch.cat([past_key_value[1], value_states], dim=2)
past_key_value = (key_states, value_states) if use_cache else None
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
raise ValueError(
f'Attention weights should be of size {(bsz * self.num_heads, q_len, kv_seq_len)}, but is'
f' {attn_weights.size()}'
)
if attention_mask is not None:
if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
raise ValueError(
f'Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}'
)
attn_weights = attn_weights + attention_mask
attn_weights = torch.max(attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min))
# upcast attention to fp32
attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
attn_output = torch.matmul(attn_weights, value_states)
if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim):
raise ValueError(
f'`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is'
f' {attn_output.size()}'
)
attn_output = attn_output.transpose(1, 2)
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
attn_output = self.o_proj(attn_output)
if not output_attentions:
attn_weights = None
return attn_output, attn_weights, past_key_value
class LlamaDecoderLayer(nn.Module):
def __init__(self, config: LlamaConfig, use_cross_attn: bool):
super().__init__()
self.hidden_size = config.hidden_size
self.self_attn = LlamaAttention(config=config)
self.cross_attn = LlamaCrossAttention(config=config) if use_cross_attn else None
self.mlp = LlamaMLP(
hidden_size=self.hidden_size,
intermediate_size=config.intermediate_size,
hidden_act=config.hidden_act,
)
self.num_query_token = 96
self.input_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.post_attention_layernorm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
def forward(
self,
hidden_states: torch.Tensor,
vision_hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: Optional[bool] = False,
use_cache: Optional[bool] = False,
repeat_time: int = 1,
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
"""
Args:
hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
`(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under
returned tensors for more detail.
use_cache (`bool`, *optional*):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
(see `past_key_values`).
past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
"""
residual = hidden_states
hidden_states = self.input_layernorm(hidden_states)
# Self Attention
hidden_states, self_attn_weights, present_key_value = self.self_attn(
hidden_states=hidden_states,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
)
hidden_states = residual + hidden_states
# when using generate function and cache mode, the size of hidden_states is 1,
# so we should not use cross attention
if self.cross_attn is not None and hidden_states.size(1) >= self.num_query_token \
and vision_hidden_states is not None:
query_feats = hidden_states[:, :self.num_query_token, :]
text_feats = hidden_states[:, self.num_query_token:, :]
residual = query_feats
query_feats, _, _ = self.cross_attn(
hidden_states=query_feats,
vision_hidden_states=vision_hidden_states,
attention_mask=None, # not use attention mask in cross attention
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
repeat_time=repeat_time,
)
query_feats = residual + query_feats
hidden_states = torch.cat([query_feats, text_feats], dim=1)
# Fully Connected
residual = hidden_states
hidden_states = self.post_attention_layernorm(hidden_states)
hidden_states = self.mlp(hidden_states)
hidden_states = residual + hidden_states
outputs = (hidden_states,)
if output_attentions:
outputs += (self_attn_weights,)
if use_cache:
outputs += (present_key_value,)
return outputs
LLAMA_START_DOCSTRING = r"""
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)
This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.
Parameters:
config ([`LlamaConfig`]):
Model configuration class with all the parameters of the model. Initializing with a config file does not
load the weights associated with the model, only the configuration. Check out the
[`~PreTrainedModel.from_pretrained`] method to load the model weights.
"""
@add_start_docstrings(
'The bare LLaMA Model outputting raw hidden-states without any specific head on top.',
LLAMA_START_DOCSTRING,
)
class LlamaPreTrainedModel(PreTrainedModel):
config_class = LlamaConfig
base_model_prefix = 'model'
supports_gradient_checkpointing = True
_no_split_modules = ['LlamaDecoderLayer']
_keys_to_ignore_on_load_unexpected = [r'decoder\.version']
def _init_weights(self, module):
std = self.config.initializer_range
if isinstance(module, nn.Linear):
module.weight.data.normal_(mean=0.0, std=std)
if module.bias is not None:
module.bias.data.zero_()
elif isinstance(module, nn.Embedding):
module.weight.data.normal_(mean=0.0, std=std)
if module.padding_idx is not None:
module.weight.data[module.padding_idx].zero_()
def _set_gradient_checkpointing(self, module, value=False):
if isinstance(module, LlamaModel):
module.gradient_checkpointing = value
if isinstance(module, LlamaDecoderLayer):
module.gradient_checkpointing = value
LLAMA_INPUTS_DOCSTRING = r"""
Args:
input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
it.
Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
[`PreTrainedTokenizer.__call__`] for details.
[What are input IDs?](../glossary#input-ids)
attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
- 1 for tokens that are **not masked**,
- 0 for tokens that are **masked**.
[What are attention masks?](../glossary#attention-mask)
Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
[`PreTrainedTokenizer.__call__`] for details.
If `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
`past_key_values`).
If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
information on the default strategy.
- 1 indicates the head is **not masked**,
- 0 indicates the head is **masked**.
position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
config.n_positions - 1]`.
[What are position IDs?](../glossary#position-ids)
past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
`(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
`(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
`decoder_input_ids` of shape `(batch_size, sequence_length)`.
inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
model's internal embedding lookup matrix.
use_cache (`bool`, *optional*):
If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
`past_key_values`).
output_attentions (`bool`, *optional*):
Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
tensors for more detail.
output_hidden_states (`bool`, *optional*):
Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
more detail.
return_dict (`bool`, *optional*):
Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
"""
@add_start_docstrings(
'The bare LLaMA Model outputting raw hidden-states without any specific head on top.',
LLAMA_START_DOCSTRING,
)
class LlamaModel(LlamaPreTrainedModel):
"""
Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`LlamaDecoderLayer`]
Args:
config: LlamaConfig
"""
def __init__(self, config: LlamaConfig):
super().__init__(config)
self.padding_idx = config.pad_token_id
self.vocab_size = config.vocab_size
self.cross_attention_frequency = config.cross_attention_frequency
self.num_query_token = config.num_query_token
self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
use_cross_attn = [idx % self.cross_attention_frequency == 0 for idx in range(config.num_hidden_layers)]
self.layers = nn.ModuleList(
[LlamaDecoderLayer(config, use_cross_attn[idx]) for idx in range(config.num_hidden_layers)])
self.norm = LlamaRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
self.gradient_checkpointing = False
# Initialize weights and apply final processing
# self.post_init()
def get_input_embeddings(self):
return self.embed_tokens
def set_input_embeddings(self, value):
self.embed_tokens = value
# Copied from transformers.models.bart.modeling_bart.BartDecoder._prepare_decoder_attention_mask
def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
# create causal mask
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
combined_attention_mask = None
if input_shape[-1] > 1:
combined_attention_mask = _make_causal_mask(
input_shape,
inputs_embeds.dtype,
device=inputs_embeds.device,
past_key_values_length=past_key_values_length,
)
if attention_mask is not None:
# [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
expanded_attn_mask = _expand_mask(attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]).to(
inputs_embeds.device
)
combined_attention_mask = (
expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
)
return combined_attention_mask
@add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
def forward(
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
vision_hidden_states: Optional[torch.FloatTensor] = None,
repeat_time: Optional[int] = 1,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
use_zero_attention_mask: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# retrieve input_ids and inputs_embeds
if input_ids is not None and inputs_embeds is not None:
raise ValueError('You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time')
elif input_ids is not None:
batch_size, seq_length = input_ids.shape
elif inputs_embeds is not None:
batch_size, seq_length, _ = inputs_embeds.shape
else:
raise ValueError('You have to specify either decoder_input_ids or decoder_inputs_embeds')
seq_length_with_past = seq_length
past_key_values_length = 0
if past_key_values is not None:
past_key_values_length = past_key_values[0][0].shape[2]
seq_length_with_past = seq_length_with_past + past_key_values_length
if position_ids is None:
device = input_ids.device if input_ids is not None else inputs_embeds.device
position_ids = torch.arange(
past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
)
position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
else:
position_ids = position_ids.view(-1, seq_length).long()
if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input_ids)
# embed positions
if attention_mask is None:
attention_mask = torch.ones(
(batch_size, seq_length_with_past), dtype=torch.bool, device=inputs_embeds.device
)
attention_mask = self._prepare_decoder_attention_mask(
attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
)
if use_zero_attention_mask:
attention_mask[:, :, :self.num_query_token, :self.num_query_token] = 0
hidden_states = inputs_embeds
if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning_once(
'`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...'
)
use_cache = False
# decoder layers
all_hidden_states = () if output_hidden_states else None
all_self_attns = () if output_attentions else None
next_decoder_cache = () if use_cache else None
for idx, decoder_layer in enumerate(self.layers):
if output_hidden_states:
all_hidden_states += (hidden_states,)
past_key_value = past_key_values[idx] if past_key_values is not None else None
layer_outputs = decoder_layer(
hidden_states,
vision_hidden_states,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
repeat_time=repeat_time,
)
hidden_states = layer_outputs[0]
if use_cache:
next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
if output_attentions:
all_self_attns += (layer_outputs[1],)
hidden_states = self.norm(hidden_states)
# add hidden states from the last decoder layer
if output_hidden_states:
all_hidden_states += (hidden_states,)
next_cache = next_decoder_cache if use_cache else None
if not return_dict:
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
return BaseModelOutputWithPast(
last_hidden_state=hidden_states,
past_key_values=next_cache,
hidden_states=all_hidden_states,
attentions=all_self_attns,
)
@add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
def forward_train(
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
vision_hidden_states: Optional[torch.FloatTensor] = None,
repeat_time: Optional[int] = 1,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, BaseModelOutputWithPast]:
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
use_cache = use_cache if use_cache is not None else self.config.use_cache
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# retrieve input_ids and inputs_embeds
if input_ids is not None and inputs_embeds is not None:
raise ValueError('You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time')
elif input_ids is not None:
batch_size, seq_length = input_ids.shape
elif inputs_embeds is not None:
batch_size, seq_length, _ = inputs_embeds.shape
else:
raise ValueError('You have to specify either decoder_input_ids or decoder_inputs_embeds')
seq_length_with_past = seq_length
past_key_values_length = 0
if past_key_values is not None:
past_key_values_length = past_key_values[0][0].shape[2]
seq_length_with_past = seq_length_with_past + past_key_values_length
if position_ids is None:
device = input_ids.device if input_ids is not None else inputs_embeds.device
position_ids = torch.arange(
past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
)
position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
else:
position_ids = position_ids.view(-1, seq_length).long()
if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input_ids)
# embed positions
# if attention_mask is None:
# attention_mask = torch.ones(
# (batch_size, seq_length_with_past), dtype=torch.bool, device=inputs_embeds.device
# )
# attention_mask = self._prepare_decoder_attention_mask(
# attention_mask, (batch_size, seq_length), inputs_embeds, past_key_values_length
# )
hidden_states = inputs_embeds
if self.gradient_checkpointing and self.training:
if use_cache:
logger.warning_once(
'`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...'
)
use_cache = False
# decoder layers
all_hidden_states = () if output_hidden_states else None
all_self_attns = () if output_attentions else None
next_decoder_cache = () if use_cache else None
for idx, decoder_layer in enumerate(self.layers):
if output_hidden_states:
all_hidden_states += (hidden_states,)
past_key_value = past_key_values[idx] if past_key_values is not None else None
if self.gradient_checkpointing and self.training:
def create_custom_forward(module):
def custom_forward(*inputs):
# None for past_key_value
return module(*inputs, output_attentions, None, repeat_time)
return custom_forward
layer_outputs = torch.utils.checkpoint.checkpoint(
create_custom_forward(decoder_layer),
hidden_states,
vision_hidden_states,
attention_mask,
position_ids,
None,
)
else:
layer_outputs = decoder_layer(
hidden_states,
vision_hidden_states,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_value=past_key_value,
output_attentions=output_attentions,
use_cache=use_cache,
repeat_time=repeat_time,
)
hidden_states = layer_outputs[0]
if use_cache:
next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
if output_attentions:
all_self_attns += (layer_outputs[1],)
hidden_states = self.norm(hidden_states)
# add hidden states from the last decoder layer
if output_hidden_states:
all_hidden_states += (hidden_states,)
next_cache = next_decoder_cache if use_cache else None
if not return_dict:
return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
return BaseModelOutputWithPast(
last_hidden_state=hidden_states,
past_key_values=next_cache,
hidden_states=all_hidden_states,
attentions=all_self_attns,
)
class LlamaForCausalLM(LlamaPreTrainedModel):
def __init__(self, config):
super().__init__(config)
self.model = LlamaModel(config)
self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
# Initialize weights and apply final processing
# self.post_init()
def get_input_embeddings(self):
return self.model.embed_tokens
def set_input_embeddings(self, value):
self.model.embed_tokens = value
def get_output_embeddings(self):
return self.lm_head
def set_output_embeddings(self, new_embeddings):
self.lm_head = new_embeddings
def set_decoder(self, decoder):
self.model = decoder
def get_decoder(self):
return self.model
@add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
def forward(
self,
input_ids: torch.LongTensor = None,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.LongTensor] = None,
past_key_values: Optional[List[torch.FloatTensor]] = None,
inputs_embeds: Optional[torch.FloatTensor] = None,
vision_hidden_states: Optional[torch.FloatTensor] = None,
labels: Optional[torch.LongTensor] = None,
use_cache: Optional[bool] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
use_zero_attention_mask: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple, CausalLMOutputWithPast]:
r"""
Args:
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
Returns:
Example:
```python
>>> from transformers import AutoTokenizer, LlamaForCausalLM
>>> model = LlamaForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
>>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
>>> prompt = "Hey, are you consciours? Can you talk to me?"
>>> inputs = tokenizer(prompt, return_tensors="pt")
>>> # Generate
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"Hey, are you consciours? Can you talk to me?\nI'm not consciours, but I can talk to you."
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
output_hidden_states = (
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
)
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
position_ids=position_ids,
past_key_values=past_key_values,
inputs_embeds=inputs_embeds,
vision_hidden_states=vision_hidden_states,
use_cache=use_cache,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
use_zero_attention_mask=use_zero_attention_mask,
)
hidden_states = outputs[0]
logits = self.lm_head(hidden_states)
loss = None
if labels is not None:
# Shift so that tokens < n predict n
shift_logits = logits[..., :-1, :].contiguous()
shift_labels = labels[..., 1:].contiguous()
# Flatten the tokens
loss_fct = CrossEntropyLoss()
shift_logits = shift_logits.view(-1, self.config.vocab_size)
shift_labels = shift_labels.view(-1)
# Enable model parallelism
shift_labels = shift_labels.to(shift_logits.device)
loss = loss_fct(shift_logits, shift_labels)
if not return_dict:
output = (logits,) + outputs[1:]
return (loss,) + output if loss is not None else output
return CausalLMOutputWithPast(
loss=loss,
logits=logits,
past_key_values=outputs.past_key_values,
hidden_states=outputs.hidden_states,
attentions=outputs.attentions,
)
def prepare_inputs_for_generation(
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None,
vision_hidden_states=None, use_zero_attention_mask=None, **kwargs
):
if past_key_values:
input_ids = input_ids[:, -1:]
position_ids = kwargs.get('position_ids', None)
if attention_mask is not None and position_ids is None:
# create position_ids on the fly for batch generation
position_ids = attention_mask.long().cumsum(-1) - 1
position_ids.masked_fill_(attention_mask == 0, 1)
if past_key_values:
position_ids = position_ids[:, -1].unsqueeze(-1)
# if `inputs_embeds` are passed, we only want to use them in the 1st generation step
if inputs_embeds is not None and past_key_values is None:
model_inputs = {'inputs_embeds': inputs_embeds}
else:
model_inputs = {'input_ids': input_ids}
model_inputs.update(
{
'position_ids': position_ids,
'past_key_values': past_key_values,
'use_cache': kwargs.get('use_cache'),
'attention_mask': attention_mask,
'vision_hidden_states': vision_hidden_states,
'use_zero_attention_mask': use_zero_attention_mask,
}
)
return model_inputs
@staticmethod
def _reorder_cache(past_key_values, beam_idx):
reordered_past = ()
for layer_past in past_key_values:
reordered_past += (tuple(past_state.index_select(0, beam_idx) for past_state in layer_past),)
return reordered_past
from typing import Dict
import torch
class DictTensor:
"""
enable to do `tokenizer(texts).to(device)`
"""
def __init__(self, d: Dict[str, torch.Tensor]):
self.d = d
def to(self, device):
return {k: v.to(device) for k, v in self.d.items()}
class JaCLIPForBenchmark:
"""
enable to do model.encode_text(dict_tensor)
"""
def __init__(self, model):
self.model = model
def encode_text(self, dict_tensor):
return self.model.get_text_features(**dict_tensor)
def encode_image(self, image):
return self.model.get_image_features(image)
def load_japanese_clip(pretrained: str, device='cpu', **kwargs):
"""
Load Japanese CLIP/CLOOB by rinna (https://github.com/rinnakk/japanese-clip)
Remarks:
- You must input not only input_ids but also attention_masks and position_ids when doing `model.encode_text()` to make it work correctly.
"""
try:
import japanese_clip as ja_clip
except ImportError:
raise ImportError('Install `japanese_clip` by `pip install git+https://github.com/rinnakk/japanese-clip.git`')
cache_dir = kwargs.pop('cache_dir', None)
model, transform = ja_clip.load(pretrained, device=device, cache_dir=cache_dir)
class JaTokenizerForBenchmark:
def __init__(self, ):
self.tokenizer = ja_clip.load_tokenizer()
def __call__(self, texts) -> Dict[str, torch.Tensor]:
inputs = ja_clip.tokenize(texts, tokenizer=self.tokenizer, device='cpu')
return DictTensor(inputs)
def __len__(self):
return len(self.tokenizer)
return JaCLIPForBenchmark(model), transform, JaTokenizerForBenchmark()
import open_clip
def load_open_clip(model_name: str = 'ViT-B-32-quickgelu', pretrained: str = 'laion400m_e32', cache_dir: str = None,
device='cpu'):
model, _, transform = open_clip.create_model_and_transforms(model_name, pretrained=pretrained, cache_dir=cache_dir)
model = model.to(device)
tokenizer = open_clip.get_tokenizer(model_name)
return model, transform, tokenizer
# Convert CLIP_benchmark datasets to webdataset format
import argparse
import io
import os
import sys
import torch
import torch.utils.data
import webdataset
from tqdm import tqdm
from .datasets.builder import build_dataset
def get_parser_args():
parser = argparse.ArgumentParser(description="""
Convert a CLIP_benchmark dataset to the webdataset format (TAR files).
Datasets can be uploaded to the Huggingface Hub to allow CLIP model
evaluation from anywhere with an Internet connection.
To convert other image classification datasets, use the Python API:
>>> import clip_benchmark.webdataset_builder
>>> help(clip_benchmark.webdataset_builder.convert_dataset)
""")
# Main arguments
parser.add_argument('--dataset', '-d', required=True, type=str,
help='CLIP_benchmark compatible dataset for conversion')
parser.add_argument('--split', '-s', default='test', type=str,
help='Dataset split to use')
parser.add_argument('--dataset-root', '-r', default='data', type=str,
help='Root directory for input data')
parser.add_argument('--output', '-o', required=True, type=str,
help='Root directory for output data')
# Special dataset types
parser_special = parser.add_mutually_exclusive_group()
parser_special.add_argument('--retrieval', action='store_true',
help='Flag to signal retrieval dataset (text captions instead of classes)')
parser_special.add_argument('--multilabel', action='store_true',
help='Flag to signal multilabel classification dataset')
# Additional parameters
parser.add_argument('--image-format', default='webp', type=str,
help='Image extension for saving: (lossless) webp, png, or jpg (Default: webp)')
parser.add_argument('--max-count', default=10_000, type=int,
help='Maximum number of images per TAR shard (Default: 10_000)')
parser.add_argument('--max-size', default=1_000_000_000, type=int,
help='Maximum size in bytes per TAR shard (Default: 1_000_000_000)')
args = parser.parse_args()
return args
def main():
args = get_parser_args()
run(args)
def run(args):
# Setup dataset folder
os.makedirs(os.path.join(args.output, args.split), exist_ok=True)
# Load original dataset
dataset = build_dataset(
dataset_name=args.dataset,
root=args.dataset_root,
split=args.split,
transform=PIL_to_bytes(args.image_format),
download=True,
)
# Run conversion
if args.retrieval:
convert_retrieval_dataset(
dataset,
args.split,
args.output,
transform=None,
image_format=args.image_format,
max_count=args.max_count,
max_size=args.max_size
)
else:
convert_dataset(
dataset,
args.split,
args.output,
transform=None,
image_format=args.image_format,
max_count=args.max_count,
max_size=args.max_size,
multilabel=args.multilabel,
)
def PIL_to_bytes(image_format):
OPTIONS = {
'webp': dict(format='webp', lossless=True),
'png': dict(format='png'),
'jpg': dict(format='jpeg'),
}
def transform(image):
bytestream = io.BytesIO()
image.save(bytestream, **OPTIONS[image_format])
return bytestream.getvalue()
return transform
def path_to_bytes(filepath):
with open(filepath, 'rb') as fp:
return fp.read()
def convert_dataset(dataset, split, output_folder, *, transform=None,
image_format='webp', max_count=10_000, max_size=1_000_000_000,
multilabel=False, verbose=True):
"""
Convert an iterable `dataset` of (image, label) pairs to webdataset (.tar) format, and store in `output_folder/split`.
Images may be passed in as either:
* File paths: pass in `transform=path_to_bytes`;
* PIL images: pass in `transform=PIL_to_bytes(image_format)` where `image_format` is e.g. "webp"; or
* Raw binary data: use a PyTorch `Dataset` that supports `transform=PIL_to_bytes(image_format)`, and pass in `transform=None` here.
Be sure that the transform is not applied twice.
Copying image files directly or writing raw binary data is fastest since it allows multiprocessing;
passing in PIL images will be slower, but should work for any format of dataset.
Labels must be zero-indexed integers (for multilabel datasets, labels must be arrays/tensors).
Classnames and zero-shot classification templates can be provided as attributes of the dataset (`.classes` and `.templates`)
or filled in manually afterward. `dataset.classes` should be a list of strings indexed by the labels,
and `dataset.templates` should be a list of strings containing `{c}` to specify where classnames are to be inserted.
"""
# Create output directory
os.makedirs(os.path.join(output_folder, split), exist_ok=True)
# Multiprocessed dataloader, should work with Dataset or list
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=1,
num_workers=8,
collate_fn=lambda batch: batch[0] # No collate, only for multiprocessing
)
if verbose:
try:
print(f'Dataset size: {len(dataset)}')
except TypeError:
print('IterableDataset has no len()')
# Save classnames
if hasattr(dataset, 'classes') and dataset.classes:
classnames_fname = os.path.join(output_folder, 'classnames.txt')
with open(classnames_fname, 'w') as classnames_file:
print(*dataset.classes, sep='\n', end='\n', file=classnames_file)
if verbose:
print("Saved class names to '%s'" % classnames_fname)
elif verbose:
print('WARNING: No class names found')
# Save zeroshot templates
if hasattr(dataset, 'templates') and dataset.templates:
templates_fname = os.path.join(output_folder, 'zeroshot_classification_templates.txt')
with open(templates_fname, 'w') as templates_file:
print(*dataset.templates, sep='\n', end='\n', file=templates_file)
if verbose:
print("Saved class names to '%s'" % templates_fname)
elif verbose:
print('WARNING: No zeroshot classification templates found')
# Save dataset type
if multilabel:
type_fname = os.path.join(output_folder, 'dataset_type.txt')
with open(type_fname, 'w') as type_file:
print('multilabel', end='\n', file=type_file)
if verbose:
print("Saved dataset type to '%s'" % type_fname)
# Write to TAR files
data_fname = os.path.join(output_folder, split, r'%d.tar')
sink = webdataset.ShardWriter(
data_fname,
maxcount=max_count,
maxsize=max_size
)
nsamples = 0
label_type = 'npy' if multilabel else 'cls'
for index, (input, output) in enumerate(tqdm(dataloader, desc='Converting')):
nsamples += 1
if isinstance(input, str) and transform is path_to_bytes:
# If copying file, determine image format from extension
extension = os.path.splitext(input)[1].replace('.', '').lower().replace('jpeg', 'jpg') or image_format
else:
extension = image_format
# Convert label if necessary
if isinstance(output, torch.Tensor):
if multilabel:
output = output.detach().cpu().numpy()
else:
output = output.item()
# Write example
sink.write({
'__key__': 's%07d' % index,
extension: transform(input) if transform else input,
label_type: output,
})
num_shards = sink.shard
sink.close()
if verbose:
print("Saved dataset to '%s'" % data_fname.replace(r'%d', '{0..%d}' % (num_shards - 1)))
# Save number of shards
nshards_fname = os.path.join(output_folder, split, 'nshards.txt')
with open(nshards_fname, 'w') as nshards_file:
print(num_shards, end='\n', file=nshards_file)
if verbose:
print("Saved number of shards = %d to '%s'" % (num_shards, nshards_fname))
print('Final dataset size:', nsamples)
def convert_retrieval_dataset(dataset, split, output_folder, *, transform=None, image_format='webp', max_count=10_000,
max_size=1_000_000_000, verbose=True):
"""
Convert an iterable `dataset` of (image, [caption1, caption2, ...]) pairs to webdataset (.tar) format, and store in `output_folder/split`.
Labels must be lists of strings, with no newlines.
Read the documentation of `convert_dataset` for more information.
"""
# Create output directory
os.makedirs(os.path.join(output_folder, split), exist_ok=True)
# Multiprocessed dataloader, should work with Dataset or list
dataloader = torch.utils.data.DataLoader(
dataset,
batch_size=1,
num_workers=8,
collate_fn=lambda batch: batch[0] # No collate, only for multiprocessing
)
if verbose:
try:
print(f'Dataset size: {len(dataset)}')
except TypeError:
print('IterableDataset has no len()')
# No classnames
# No zeroshot templates
# Save dataset type
type_fname = os.path.join(output_folder, 'dataset_type.txt')
with open(type_fname, 'w') as type_file:
print('retrieval', end='\n', file=type_file)
if verbose:
print("Saved dataset type to '%s'" % type_fname)
# Write to TAR files
data_fname = os.path.join(output_folder, split, r'%d.tar')
sink = webdataset.ShardWriter(
data_fname,
maxcount=max_count,
maxsize=max_size
)
nsamples = 0
for index, (input, output) in enumerate(tqdm(dataloader, desc='Converting')):
nsamples += 1
if isinstance(input, str) and transform is path_to_bytes:
# If copying file, determine image format from extension
extension = os.path.splitext(input)[1].replace('.', '').lower().replace('jpeg', 'jpg') or image_format
else:
extension = image_format
sink.write({
'__key__': 's%07d' % index,
extension: transform(input) if transform else input,
'txt': '\n'.join(caption.replace('\n', r'\n') for caption in output),
})
num_shards = sink.shard
sink.close()
if verbose:
print("Saved dataset to '%s'" % data_fname.replace(r'%d', '{0..%d}' % (num_shards - 1)))
# Save number of shards
nshards_fname = os.path.join(output_folder, split, 'nshards.txt')
with open(nshards_fname, 'w') as nshards_file:
print(num_shards, end='\n', file=nshards_file)
if verbose:
print("Saved number of shards = %d to '%s'" % (num_shards, nshards_fname))
print('Final dataset size:', nsamples)
if __name__ == '__main__':
sys.exit(main())
path
Coopers_Hawk/0561.jpg
Coopers_Hawk/0629.jpg
Coopers_Hawk/0717.jpg
Coopers_Hawk/1847.jpg
Northern_Goshawk/2629.jpg
Northern_Goshawk/3329.jpg
Northern_Goshawk/3387.jpg
Northern_Goshawk/3413.jpg
Northern_Goshawk/3616.jpg
Sharp_shinned_Hawk/3785.jpg
Sharp_shinned_Hawk/3786.jpg
Sharp_shinned_Hawk/4940.jpg
Sharp_shinned_Hawk/5003.jpg
Golden_Eagle/5479.jpg
Golden_Eagle/5696.jpg
Golden_Eagle/5978.jpg
Golden_Eagle/7006.jpg
White_tailed_Hawk/7839.jpg
White_tailed_Hawk/7844.jpg
White_tailed_Hawk/8062.jpg
White_tailed_Hawk/8164.jpg
Zone_tailed_Hawk/8306.jpg
Red_tailed_Hawk/9193.jpg
Red_tailed_Hawk/9201.jpg
Red_tailed_Hawk/9924.jpg
Rough_legged_Hawk/11186.jpg
Rough_legged_Hawk/11336.jpg
Rough_legged_Hawk/11897.jpg
Rough_legged_Hawk/12960.jpg
Rough_legged_Hawk/13233.jpg
Red_shouldered_Hawk/13423.jpg
Red_shouldered_Hawk/14132.jpg
Red_shouldered_Hawk/15476.jpg
Red_shouldered_Hawk/15484.jpg
Broad_winged_Hawk/15781.jpg
Broad_winged_Hawk/15914.jpg
Broad_winged_Hawk/16173.jpg
Broad_winged_Hawk/16557.jpg
Broad_winged_Hawk/16558.jpg
Swainsons_Hawk/18130.jpg
Swainsons_Hawk/18135.jpg
Swainsons_Hawk/18592.jpg
Common_Black_Hawk/20559.jpg
Common_Black_Hawk/20611.jpg
Common_Black_Hawk/20699.jpg
Common_Black_Hawk/20705.jpg
Northern_Harrier/20822.jpg
Northern_Harrier/21126.jpg
Northern_Harrier/21396.jpg
Northern_Harrier/21604.jpg
Northern_Harrier/21799.jpg
Swallow_tailed_Kite/23481.jpg
Swallow_tailed_Kite/23501.jpg
Swallow_tailed_Kite/23770.jpg
Swallow_tailed_Kite/23778.jpg
White_tailed_Kite/24002.jpg
White_tailed_Kite/24118.jpg
White_tailed_Kite/25257.jpg
White_tailed_Kite/25367.jpg
White_tailed_Kite/25517.jpg
Bald_Eagle/25676.jpg
Bald_Eagle/25922.jpg
Bald_Eagle/26131.jpg
Bald_Eagle/26336.jpg
Mississippi_Kite/28319.jpg
Mississippi_Kite/28504.jpg
Mississippi_Kite/28582.jpg
Harriss_Hawk/28755.jpg
Harriss_Hawk/30467.jpg
Snail_Kite/31366.jpg
Snail_Kite/31384.jpg
Snail_Kite/31551.jpg
Snail_Kite/31705.jpg
Snail_Kite/31847.jpg
Bushtit/32084.jpg
Bushtit/32514.jpg
Bushtit/32653.jpg
Bushtit/32768.jpg
Horned_Lark/33214.jpg
Horned_Lark/33365.jpg
Horned_Lark/33590.jpg
Horned_Lark/33863.jpg
Belted_Kingfisher/34170.jpg
Belted_Kingfisher/34307.jpg
Belted_Kingfisher/34460.jpg
Belted_Kingfisher/34974.jpg
Belted_Kingfisher/35226.jpg
Pigeon_Guillemot/35557.jpg
Pigeon_Guillemot/35777.jpg
Black_Guillemot/35966.jpg
Black_Guillemot/35978.jpg
Black_Guillemot/36346.jpg
Black_Guillemot/36741.jpg
Common_Murre/37641.jpg
Common_Murre/38269.jpg
Common_Murre/38418.jpg
Northern_Pintail/42026.jpg
Northern_Pintail/42339.jpg
Northern_Pintail/42874.jpg
Northern_Pintail/43275.jpg
American_Wigeon/43939.jpg
American_Wigeon/44572.jpg
American_Wigeon/45880.jpg
American_Wigeon/45881.jpg
Green_winged_Teal/49197.jpg
Green_winged_Teal/49207.jpg
Cinnamon_Teal/51293.jpg
Cinnamon_Teal/51300.jpg
Cinnamon_Teal/51356.jpg
Cinnamon_Teal/51906.jpg
Cinnamon_Teal/51987.jpg
Blue_winged_Teal/52323.jpg
Blue_winged_Teal/53186.jpg
Blue_winged_Teal/53522.jpg
Mottled_Duck/53971.jpg
Mottled_Duck/54152.jpg
Mottled_Duck/54243.jpg
Eurasian_Wigeon/54388.jpg
Eurasian_Wigeon/54676.jpg
Eurasian_Wigeon/55464.jpg
Mallard/56726.jpg
Mallard/57203.jpg
Mallard/57322.jpg
Mallard/58681.jpg
American_Black_Duck/59245.jpg
American_Black_Duck/59256.jpg
American_Black_Duck/59286.jpg
American_Black_Duck/59713.jpg
Gadwall/59958.jpg
Gadwall/61533.jpg
Gadwall/62294.jpg
Gadwall/62311.jpg
Lesser_Scaup/62421.jpg
Lesser_Scaup/62593.jpg
Lesser_Scaup/63628.jpg
Redhead/63952.jpg
Redhead/64063.jpg
Redhead/64730.jpg
Ring_necked_Duck/64909.jpg
Ring_necked_Duck/64935.jpg
Ring_necked_Duck/65894.jpg
Ring_necked_Duck/66469.jpg
Greater_Scaup/66983.jpg
Greater_Scaup/66991.jpg
Greater_Scaup/67058.jpg
Canvasback/68553.jpg
Canvasback/69131.jpg
Canvasback/69181.jpg
Canvasback/69198.jpg
Bufflehead/69347.jpg
Bufflehead/69648.jpg
Bufflehead/69917.jpg
Common_Goldeneye/71721.jpg
Common_Goldeneye/72137.jpg
Common_Goldeneye/72704.jpg
Barrows_Goldeneye/73549.jpg
Barrows_Goldeneye/73593.jpg
Barrows_Goldeneye/73720.jpg
Barrows_Goldeneye/73802.jpg
Muscovy_Duck/75092.jpg
Muscovy_Duck/75804.jpg
Long_tailed_Duck/77250.jpg
Long_tailed_Duck/77429.jpg
Long_tailed_Duck/78640.jpg
Hooded_Merganser/80811.jpg
Hooded_Merganser/81438.jpg
Hooded_Merganser/81601.jpg
Hooded_Merganser/82812.jpg
Black_Scoter/83217.jpg
Black_Scoter/83219.jpg
White_winged_Scoter/83255.jpg
White_winged_Scoter/83338.jpg
White_winged_Scoter/83359.jpg
White_winged_Scoter/83487.jpg
White_winged_Scoter/83616.jpg
Surf_Scoter/83789.jpg
Surf_Scoter/83977.jpg
Surf_Scoter/83980.jpg
Surf_Scoter/84003.jpg
Common_Merganser/84984.jpg
Common_Merganser/85272.jpg
Common_Merganser/85681.jpg
Common_Merganser/86615.jpg
Common_Merganser/86750.jpg
Red_breasted_Merganser/87916.jpg
Red_breasted_Merganser/89049.jpg
Red_breasted_Merganser/89453.jpg
Ruddy_Duck/90492.jpg
Ruddy_Duck/90720.jpg
Ruddy_Duck/90907.jpg
Common_Eider/91152.jpg
Common_Eider/91585.jpg
Common_Eider/92543.jpg
Common_Eider/92807.jpg
Greater_White_fronted_Goose/93924.jpg
Greater_White_fronted_Goose/95002.jpg
Brant/95943.jpg
Brant/97845.jpg
Canada_Goose/99598.jpg
Canada_Goose/99607.jpg
Canada_Goose/99882.jpg
Canada_Goose/100020.jpg
Canada_Goose/100172.jpg
Cackling_Goose/100553.jpg
Cackling_Goose/100631.jpg
Snow_Goose/102677.jpg
Rosss_Goose/103958.jpg
Trumpeter_Swan/104768.jpg
Trumpeter_Swan/105885.jpg
Trumpeter_Swan/106043.jpg
Tundra_Swan/106805.jpg
Tundra_Swan/107380.jpg
Tundra_Swan/107872.jpg
Mute_Swan/108235.jpg
Mute_Swan/108298.jpg
Mute_Swan/108300.jpg
Mute_Swan/109158.jpg
Mute_Swan/110279.jpg
Fulvous_Whistling_Duck/112390.jpg
Fulvous_Whistling_Duck/112464.jpg
Fulvous_Whistling_Duck/112722.jpg
Fulvous_Whistling_Duck/112873.jpg
Fulvous_Whistling_Duck/112945.jpg
Anhinga/113184.jpg
Anhinga/113972.jpg
Anhinga/114408.jpg
Anhinga/115393.jpg
Chimney_Swift/115602.jpg
Chimney_Swift/115603.jpg
Limpkin/116454.jpg
Limpkin/116742.jpg
Limpkin/116859.jpg
Great_Egret/117822.jpg
Great_Egret/117886.jpg
Great_Egret/118110.jpg
Great_Egret/118708.jpg
Great_Blue_Heron/119576.jpg
Great_Blue_Heron/119926.jpg
Great_Blue_Heron/120868.jpg
Great_Blue_Heron/121118.jpg
Great_Blue_Heron/121470.jpg
American_Bittern/122455.jpg
American_Bittern/122457.jpg
American_Bittern/122699.jpg
American_Bittern/122825.jpg
American_Bittern/123020.jpg
Cattle_Egret/123693.jpg
Cattle_Egret/123753.jpg
Cattle_Egret/124183.jpg
Cattle_Egret/124364.jpg
Cattle_Egret/124797.jpg
Little_Blue_Heron/129249.jpg
Little_Blue_Heron/129505.jpg
Little_Blue_Heron/130092.jpg
Little_Blue_Heron/130285.jpg
Reddish_Egret/131049.jpg
Reddish_Egret/131057.jpg
Reddish_Egret/131530.jpg
Reddish_Egret/131540.jpg
Reddish_Egret/132577.jpg
Snowy_Egret/133251.jpg
Snowy_Egret/133284.jpg
Snowy_Egret/134285.jpg
Snowy_Egret/134588.jpg
Tricolored_Heron/136511.jpg
Tricolored_Heron/136547.jpg
Tricolored_Heron/136977.jpg
Tricolored_Heron/137040.jpg
Cedar_Waxwing/137780.jpg
Cedar_Waxwing/138149.jpg
Cedar_Waxwing/139511.jpg
Bohemian_Waxwing/139598.jpg
Bohemian_Waxwing/139646.jpg
Bohemian_Waxwing/140047.jpg
Bohemian_Waxwing/140775.jpg
Bohemian_Waxwing/141717.jpg
Lapland_Longspur/142113.jpg
Lapland_Longspur/142292.jpg
Lapland_Longspur/142705.jpg
Chestnut_collared_Longspur/142766.jpg
Chestnut_collared_Longspur/142767.jpg
Chestnut_collared_Longspur/142802.jpg
Chestnut_collared_Longspur/142809.jpg
Snow_Bunting/143535.jpg
Snow_Bunting/143737.jpg
Snow_Bunting/143819.jpg
Lesser_Nighthawk/145457.jpg
Lesser_Nighthawk/145458.jpg
Lesser_Nighthawk/145521.jpg
Lesser_Nighthawk/145545.jpg
Common_Nighthawk/145652.jpg
Common_Nighthawk/145744.jpg
Common_Nighthawk/145751.jpg
Common_Nighthawk/145939.jpg
Northern_Cardinal/146223.jpg
Northern_Cardinal/146529.jpg
Northern_Cardinal/147174.jpg
Northern_Cardinal/148340.jpg
Northern_Cardinal/148486.jpg
Pyrrhuloxia/148798.jpg
Pyrrhuloxia/148908.jpg
Pyrrhuloxia/148939.jpg
Pyrrhuloxia/148955.jpg
Lazuli_Bunting/149043.jpg
Lazuli_Bunting/149250.jpg
Lazuli_Bunting/149463.jpg
Lazuli_Bunting/149543.jpg
Lazuli_Bunting/149553.jpg
Blue_Grosbeak/149697.jpg
Blue_Grosbeak/149825.jpg
Blue_Grosbeak/149978.jpg
Blue_Grosbeak/150042.jpg
Painted_Bunting/150219.jpg
Painted_Bunting/150476.jpg
Painted_Bunting/151060.jpg
Painted_Bunting/151235.jpg
Indigo_Bunting/151346.jpg
Indigo_Bunting/151895.jpg
Indigo_Bunting/152329.jpg
Indigo_Bunting/152535.jpg
Indigo_Bunting/152582.jpg
Rose_breasted_Grosbeak/153072.jpg
Rose_breasted_Grosbeak/153316.jpg
Rose_breasted_Grosbeak/153342.jpg
Rose_breasted_Grosbeak/153480.jpg
Black_headed_Grosbeak/154248.jpg
Black_headed_Grosbeak/154666.jpg
Black_headed_Grosbeak/154738.jpg
Black_headed_Grosbeak/154851.jpg
Hepatic_Tanager/155281.jpg
Hepatic_Tanager/155321.jpg
Hepatic_Tanager/155424.jpg
Hepatic_Tanager/155426.jpg
Hepatic_Tanager/155548.jpg
Western_Tanager/155609.jpg
Western_Tanager/155669.jpg
Western_Tanager/155698.jpg
Western_Tanager/156249.jpg
Western_Tanager/156307.jpg
Scarlet_Tanager/156490.jpg
Scarlet_Tanager/156719.jpg
Scarlet_Tanager/156837.jpg
Scarlet_Tanager/157181.jpg
Scarlet_Tanager/157221.jpg
Summer_Tanager/157498.jpg
Summer_Tanager/158027.jpg
Summer_Tanager/158373.jpg
Dickcissel/158550.jpg
Dickcissel/158681.jpg
Dickcissel/158752.jpg
Dickcissel/158753.jpg
Dickcissel/158755.jpg
Turkey_Vulture/159500.jpg
Turkey_Vulture/160502.jpg
Turkey_Vulture/161163.jpg
Turkey_Vulture/161552.jpg
Black_Vulture/161825.jpg
Black_Vulture/162244.jpg
Black_Vulture/163234.jpg
Black_Vulture/163235.jpg
Black_Vulture/163883.jpg
Brown_Creeper/164068.jpg
Brown_Creeper/164076.jpg
Brown_Creeper/164125.jpg
Brown_Creeper/164195.jpg
Piping_Plover/165060.jpg
Piping_Plover/165262.jpg
Piping_Plover/165946.jpg
Snowy_Plover/166309.jpg
Snowy_Plover/166357.jpg
Snowy_Plover/166358.jpg
Snowy_Plover/166463.jpg
Wilsons_Plover/166524.jpg
Wilsons_Plover/166558.jpg
Wilsons_Plover/166662.jpg
Wilsons_Plover/166874.jpg
American_Golden_Plover/166930.jpg
American_Golden_Plover/167058.jpg
American_Golden_Plover/167144.jpg
American_Golden_Plover/167343.jpg
American_Golden_Plover/167436.jpg
Pacific_Golden_Plover/168113.jpg
Pacific_Golden_Plover/168233.jpg
Pacific_Golden_Plover/168302.jpg
Black_bellied_Plover/169674.jpg
Black_bellied_Plover/170018.jpg
Black_bellied_Plover/170231.jpg
Wood_Stork/170871.jpg
Wood_Stork/170893.jpg
Wood_Stork/171841.jpg
Wood_Stork/172104.jpg
American_Dipper/172861.jpg
American_Dipper/172956.jpg
American_Dipper/173154.jpg
American_Dipper/173319.jpg
Rock_Pigeon/173531.jpg
Rock_Pigeon/174265.jpg
Rock_Pigeon/174297.jpg
Rock_Pigeon/174727.jpg
Rock_Pigeon/175183.jpg
Inca_Dove/176099.jpg
Inca_Dove/176142.jpg
Inca_Dove/176184.jpg
Inca_Dove/176392.jpg
Common_Ground_Dove/176439.jpg
Common_Ground_Dove/176477.jpg
Common_Ground_Dove/176480.jpg
Common_Ground_Dove/176583.jpg
Common_Ground_Dove/176676.jpg
Band_tailed_Pigeon/176837.jpg
Band_tailed_Pigeon/176880.jpg
Band_tailed_Pigeon/176891.jpg
Band_tailed_Pigeon/176986.jpg
Eurasian_Collared_Dove/177265.jpg
Eurasian_Collared_Dove/177603.jpg
Eurasian_Collared_Dove/179004.jpg
Eurasian_Collared_Dove/179224.jpg
White_winged_Dove/179532.jpg
White_winged_Dove/179945.jpg
White_winged_Dove/180042.jpg
White_winged_Dove/180180.jpg
White_winged_Dove/180341.jpg
Mourning_Dove/180862.jpg
Mourning_Dove/181425.jpg
Mourning_Dove/181459.jpg
Mourning_Dove/181533.jpg
Mourning_Dove/181615.jpg
Western_Scrub_Jay/182984.jpg
Western_Scrub_Jay/183344.jpg
Western_Scrub_Jay/183553.jpg
Western_Scrub_Jay/184809.jpg
Florida_Scrub_Jay/185368.jpg
Florida_Scrub_Jay/185523.jpg
Florida_Scrub_Jay/185625.jpg
American_Crow/185934.jpg
American_Crow/186786.jpg
American_Crow/186936.jpg
Common_Raven/189259.jpg
Common_Raven/189302.jpg
Common_Raven/189474.jpg
Common_Raven/190593.jpg
Chihuahuan_Raven/191301.jpg
Fish_Crow/191337.jpg
Fish_Crow/191446.jpg
Fish_Crow/191590.jpg
Blue_Jay/191761.jpg
Blue_Jay/191858.jpg
Blue_Jay/193440.jpg
Blue_Jay/193650.jpg
Blue_Jay/193947.jpg
Stellers_Jay/194900.jpg
Stellers_Jay/195166.jpg
Stellers_Jay/195417.jpg
Stellers_Jay/195993.jpg
Stellers_Jay/196018.jpg
Green_Jay/196466.jpg
Green_Jay/196814.jpg
Green_Jay/197144.jpg
Green_Jay/197160.jpg
Green_Jay/197233.jpg
Clarks_Nutcracker/197461.jpg
Clarks_Nutcracker/197770.jpg
Clarks_Nutcracker/197799.jpg
Gray_Jay/198031.jpg
Gray_Jay/198039.jpg
Gray_Jay/198840.jpg
Gray_Jay/198971.jpg
Black_billed_Magpie/199002.jpg
Black_billed_Magpie/199079.jpg
Black_billed_Magpie/199173.jpg
Black_billed_Magpie/199340.jpg
Yellow_billed_Magpie/199569.jpg
Yellow_billed_Magpie/199694.jpg
Groove_billed_Ani/199826.jpg
Groove_billed_Ani/199903.jpg
Groove_billed_Ani/200046.jpg
Groove_billed_Ani/200063.jpg
Groove_billed_Ani/200085.jpg
Yellow_billed_Cuckoo/200296.jpg
Yellow_billed_Cuckoo/200386.jpg
Yellow_billed_Cuckoo/200392.jpg
Yellow_billed_Cuckoo/200524.jpg
Yellow_billed_Cuckoo/200618.jpg
Black_billed_Cuckoo/200734.jpg
Black_billed_Cuckoo/200763.jpg
Black_billed_Cuckoo/200764.jpg
Greater_Roadrunner/201620.jpg
Greater_Roadrunner/201686.jpg
Greater_Roadrunner/201752.jpg
Greater_Roadrunner/201885.jpg
Rufous_crowned_Sparrow/202169.jpg
Rufous_crowned_Sparrow/202211.jpg
Rufous_crowned_Sparrow/202277.jpg
Saltmarsh_Sparrow/202530.jpg
Saltmarsh_Sparrow/202582.jpg
Henslows_Sparrow/202647.jpg
Henslows_Sparrow/202648.jpg
Le_Contes_Sparrow/202924.jpg
Le_Contes_Sparrow/202929.jpg
Le_Contes_Sparrow/202985.jpg
Seaside_Sparrow/203056.jpg
Seaside_Sparrow/203074.jpg
Seaside_Sparrow/203076.jpg
Seaside_Sparrow/203077.jpg
Seaside_Sparrow/203139.jpg
Nelsons_Sparrow/203238.jpg
Nelsons_Sparrow/203239.jpg
Nelsons_Sparrow/203240.jpg
Nelsons_Sparrow/203386.jpg
Grasshopper_Sparrow/203422.jpg
Grasshopper_Sparrow/203430.jpg
Grasshopper_Sparrow/203435.jpg
Grasshopper_Sparrow/203755.jpg
Grasshopper_Sparrow/203848.jpg
Black_throated_Sparrow/204020.jpg
Black_throated_Sparrow/204083.jpg
Black_throated_Sparrow/204088.jpg
Black_throated_Sparrow/204185.jpg
Black_throated_Sparrow/204241.jpg
Olive_Sparrow/204349.jpg
Olive_Sparrow/204368.jpg
Olive_Sparrow/204403.jpg
Lark_Bunting/204573.jpg
Lark_Bunting/204592.jpg
Lark_Bunting/204611.jpg
Lark_Bunting/204612.jpg
Lark_Sparrow/204909.jpg
Lark_Sparrow/204953.jpg
Lark_Sparrow/205114.jpg
Lark_Sparrow/205179.jpg
Dark_eyed_Junco/205462.jpg
Dark_eyed_Junco/206254.jpg
Dark_eyed_Junco/207033.jpg
Dark_eyed_Junco/207035.jpg
Dark_eyed_Junco/207719.jpg
Yellow_eyed_Junco/207760.jpg
Yellow_eyed_Junco/207831.jpg
Yellow_eyed_Junco/207856.jpg
Yellow_eyed_Junco/207860.jpg
Swamp_Sparrow/207984.jpg
Swamp_Sparrow/208132.jpg
Swamp_Sparrow/208138.jpg
Swamp_Sparrow/208233.jpg
Swamp_Sparrow/208316.jpg
Lincolns_Sparrow/208564.jpg
Lincolns_Sparrow/208945.jpg
Lincolns_Sparrow/208974.jpg
Song_Sparrow/209217.jpg
Song_Sparrow/209334.jpg
Song_Sparrow/210050.jpg
Song_Sparrow/211069.jpg
California_Towhee/211708.jpg
California_Towhee/211817.jpg
California_Towhee/211832.jpg
Canyon_Towhee/211861.jpg
Canyon_Towhee/211886.jpg
Fox_Sparrow/214179.jpg
Fox_Sparrow/214288.jpg
Fox_Sparrow/214681.jpg
Green_tailed_Towhee/214916.jpg
Green_tailed_Towhee/214994.jpg
Green_tailed_Towhee/215106.jpg
Eastern_Towhee/215267.jpg
Eastern_Towhee/215631.jpg
Eastern_Towhee/215956.jpg
Eastern_Towhee/216088.jpg
Spotted_Towhee/216360.jpg
Spotted_Towhee/216432.jpg
Spotted_Towhee/216677.jpg
Spotted_Towhee/217197.jpg
Vesper_Sparrow/217587.jpg
Vesper_Sparrow/217701.jpg
Vesper_Sparrow/217799.jpg
Vesper_Sparrow/217803.jpg
American_Tree_Sparrow/218041.jpg
American_Tree_Sparrow/218577.jpg
American_Tree_Sparrow/218675.jpg
American_Tree_Sparrow/218729.jpg
Black_chinned_Sparrow/218798.jpg
Brewers_Sparrow/218868.jpg
Brewers_Sparrow/218969.jpg
Brewers_Sparrow/219004.jpg
Clay_colored_Sparrow/219059.jpg
Clay_colored_Sparrow/219170.jpg
Clay_colored_Sparrow/219177.jpg
Chipping_Sparrow/219491.png
Chipping_Sparrow/219658.jpg
Chipping_Sparrow/220495.jpg
Chipping_Sparrow/220942.png
Field_Sparrow/221325.jpg
Field_Sparrow/221370.jpg
Field_Sparrow/221426.jpg
Field_Sparrow/221499.jpg
White_throated_Sparrow/221714.jpg
White_throated_Sparrow/222006.jpg
White_throated_Sparrow/223076.jpg
White_throated_Sparrow/223163.jpg
Golden_crowned_Sparrow/223833.jpg
Golden_crowned_Sparrow/224106.jpg
Golden_crowned_Sparrow/224269.jpg
Golden_crowned_Sparrow/224780.jpg
White_crowned_Sparrow/224824.jpg
White_crowned_Sparrow/224828.jpg
White_crowned_Sparrow/226522.jpg
White_crowned_Sparrow/226803.jpg
White_crowned_Sparrow/227265.jpg
Harriss_Sparrow/227350.jpg
Harriss_Sparrow/227512.jpg
Harriss_Sparrow/227527.jpg
Harriss_Sparrow/227577.jpg
Crested_Caracara/228024.jpg
Crested_Caracara/228385.jpg
Crested_Caracara/228877.jpg
Crested_Caracara/228903.jpg
Merlin/229233.jpg
Merlin/230057.jpg
Merlin/230144.jpg
Merlin/230534.jpg
Merlin/230575.jpg
Prairie_Falcon/231302.jpg
Prairie_Falcon/231323.jpg
Prairie_Falcon/231435.jpg
Prairie_Falcon/231455.jpg
Peregrine_Falcon/233275.jpg
Peregrine_Falcon/234091.jpg
American_Kestrel/235246.jpg
American_Kestrel/235253.jpg
American_Kestrel/235264.jpg
American_Kestrel/235660.jpg
American_Kestrel/236214.jpg
Magnificent_Frigatebird/236812.jpg
Magnificent_Frigatebird/237448.jpg
Magnificent_Frigatebird/237859.jpg
Magnificent_Frigatebird/238061.jpg
Magnificent_Frigatebird/238207.jpg
Common_Redpoll/238739.jpg
Common_Redpoll/239046.jpg
Common_Redpoll/239082.jpg
Common_Redpoll/239106.jpg
Common_Redpoll/239201.jpg
Hoary_Redpoll/239309.jpg
Hoary_Redpoll/239353.jpg
Hoary_Redpoll/239354.jpg
Hoary_Redpoll/239355.jpg
Evening_Grosbeak/239568.jpg
Evening_Grosbeak/239571.jpg
Evening_Grosbeak/239578.jpg
Evening_Grosbeak/239610.jpg
House_Finch/240714.jpg
House_Finch/240759.jpg
Purple_Finch/240897.jpg
Black_Rosy_Finch/240946.jpg
Black_Rosy_Finch/240955.jpg
Brown_capped_Rosy_Finch/241033.jpg
Brown_capped_Rosy_Finch/241063.jpg
Brown_capped_Rosy_Finch/241090.jpg
Brown_capped_Rosy_Finch/241092.jpg
Gray_crowned_Rosy_Finch/241263.jpg
Gray_crowned_Rosy_Finch/241395.jpg
Red_Crossbill/241564.jpg
Red_Crossbill/241937.jpg
Red_Crossbill/242149.jpg
White_winged_Crossbill/243991.jpg
White_winged_Crossbill/244623.jpg
Pine_Grosbeak/244873.jpg
Pine_Grosbeak/244874.jpg
Pine_Grosbeak/244929.jpg
Pine_Grosbeak/245002.jpg
Pine_Grosbeak/245550.jpg
Pine_Siskin/245770.jpg
Pine_Siskin/245933.jpg
Pine_Siskin/246014.jpg
Pine_Siskin/246023.jpg
Lesser_Goldfinch/246204.jpg
Lesser_Goldfinch/246306.jpg
Lesser_Goldfinch/246307.jpg
American_Goldfinch/246429.jpg
American_Goldfinch/247079.jpg
American_Goldfinch/247294.jpg
Common_Loon/248105.jpg
Common_Loon/248123.jpg
Common_Loon/248592.jpg
Common_Loon/249061.jpg
Pacific_Loon/250187.jpg
Pacific_Loon/250226.jpg
Pacific_Loon/250348.jpg
Red_throated_Loon/250714.jpg
Red_throated_Loon/250825.jpg
Red_throated_Loon/251261.jpg
Red_throated_Loon/251871.jpg
Sandhill_Crane/252873.jpg
Sandhill_Crane/253177.jpg
Sandhill_Crane/253349.jpg
Black_Oystercatcher/254613.jpg
Black_Oystercatcher/255370.jpg
Black_Oystercatcher/255850.jpg
Black_Oystercatcher/255860.jpg
American_Oystercatcher/256363.jpg
American_Oystercatcher/256433.jpg
American_Oystercatcher/256780.jpg
American_Oystercatcher/256989.jpg
American_Oystercatcher/257279.jpg
Barn_Swallow/257707.jpg
Barn_Swallow/257866.jpg
Barn_Swallow/258072.jpg
Barn_Swallow/258226.jpg
Barn_Swallow/258520.jpg
Cave_Swallow/260085.jpg
Northern_Rough_winged_Swallow/262657.jpg
Northern_Rough_winged_Swallow/262844.jpg
Northern_Rough_winged_Swallow/262854.jpg
Northern_Rough_winged_Swallow/263110.jpg
Tree_Swallow/263642.jpg
Tree_Swallow/263670.jpg
Tree_Swallow/263864.jpg
Tree_Swallow/264472.jpg
Tree_Swallow/264479.jpg
Violet_green_Swallow/265705.jpg
Violet_green_Swallow/265721.jpg
Violet_green_Swallow/266002.jpg
Violet_green_Swallow/266109.jpg
Red_winged_Blackbird/268018.jpg
Red_winged_Blackbird/268129.jpg
Red_winged_Blackbird/268500.jpg
Bobolink/268779.jpg
Bobolink/268810.jpg
Bobolink/269087.jpg
Bobolink/269217.jpg
Rusty_Blackbird/269395.jpg
Rusty_Blackbird/269425.jpg
Rusty_Blackbird/269510.jpg
Rusty_Blackbird/269565.jpg
Brewers_Blackbird/269878.jpg
Brewers_Blackbird/270185.jpg
Brewers_Blackbird/270204.jpg
Brewers_Blackbird/270263.jpg
Bullocks_Oriole/270945.jpg
Bullocks_Oriole/270969.jpg
Bullocks_Oriole/271000.jpg
Bullocks_Oriole/271423.jpg
Hooded_Oriole/271756.jpg
Hooded_Oriole/271757.jpg
Hooded_Oriole/271923.jpg
Hooded_Oriole/272024.jpg
Hooded_Oriole/272035.jpg
Baltimore_Oriole/272800.jpg
Baltimore_Oriole/273114.jpg
Baltimore_Oriole/273561.jpg
Baltimore_Oriole/273846.jpg
Audubons_Oriole/274001.jpg
Altamira_Oriole/274122.jpg
Altamira_Oriole/274270.jpg
Altamira_Oriole/274278.jpg
Scotts_Oriole/274349.jpg
Orchard_Oriole/274565.jpg
Orchard_Oriole/274791.jpg
Orchard_Oriole/274977.jpg
Orchard_Oriole/274982.jpg
Orchard_Oriole/274990.jpg
Bronzed_Cowbird/275320.jpg
Bronzed_Cowbird/275324.jpg
Brown_headed_Cowbird/275656.jpg
Brown_headed_Cowbird/275817.jpg
Brown_headed_Cowbird/275883.jpg
Brown_headed_Cowbird/275912.jpg
Brown_headed_Cowbird/276861.jpg
Boat_tailed_Grackle/277001.jpg
Boat_tailed_Grackle/277173.jpg
Boat_tailed_Grackle/277653.jpg
Boat_tailed_Grackle/277707.jpg
Great_tailed_Grackle/277971.jpg
Great_tailed_Grackle/278434.jpg
Great_tailed_Grackle/278496.jpg
Great_tailed_Grackle/278682.jpg
Great_tailed_Grackle/278789.jpg
Common_Grackle/279408.jpg
Common_Grackle/279504.jpg
Common_Grackle/279834.jpg
Common_Grackle/280459.jpg
Common_Grackle/280958.jpg
Eastern_Meadowlark/281454.jpg
Eastern_Meadowlark/281491.jpg
Eastern_Meadowlark/281734.jpg
Western_Meadowlark/282126.jpg
Western_Meadowlark/282293.jpg
Western_Meadowlark/282423.jpg
Western_Meadowlark/282900.jpg
Yellow_headed_Blackbird/283179.jpg
Yellow_headed_Blackbird/283858.jpg
Yellow_headed_Blackbird/283950.jpg
Yellow_headed_Blackbird/284113.jpg
Yellow_headed_Blackbird/284120.jpg
Northern_Shrike/284500.jpg
Northern_Shrike/285490.jpg
Northern_Shrike/285916.jpg
Loggerhead_Shrike/286064.jpg
Loggerhead_Shrike/286153.jpg
Loggerhead_Shrike/286240.jpg
Loggerhead_Shrike/286499.jpg
Bonapartes_Gull/287155.jpg
Bonapartes_Gull/287412.jpg
Bonapartes_Gull/287517.jpg
Herring_Gull/287902.jpg
Herring_Gull/288527.jpg
Herring_Gull/289004.jpg
Herring_Gull/290007.jpg
California_Gull/290151.jpg
California_Gull/290179.jpg
California_Gull/290267.jpg
California_Gull/290628.jpg
Mew_Gull/291093.jpg
Mew_Gull/291094.jpg
Mew_Gull/291317.jpg
Mew_Gull/293136.jpg
Mew_Gull/293328.jpg
Ring_billed_Gull/293666.jpg
Ring_billed_Gull/294021.jpg
Ring_billed_Gull/294083.jpg
Ring_billed_Gull/294306.jpg
Ring_billed_Gull/294857.jpg
Glaucous_winged_Gull/298433.jpg
Glaucous_winged_Gull/298616.jpg
Glaucous_winged_Gull/298626.jpg
Glaucous_winged_Gull/298801.jpg
Glaucous_winged_Gull/298833.jpg
Iceland_Gull/299515.jpg
Iceland_Gull/299516.jpg
Iceland_Gull/300417.jpg
Iceland_Gull/300418.jpg
Heermanns_Gull/300938.jpg
Heermanns_Gull/301030.jpg
Heermanns_Gull/301078.jpg
Heermanns_Gull/301104.jpg
Glaucous_Gull/301812.jpg
Glaucous_Gull/301827.jpg
Glaucous_Gull/302184.jpg
Glaucous_Gull/302567.jpg
Glaucous_Gull/302692.jpg
Great_Black_backed_Gull/303095.jpg
Great_Black_backed_Gull/303234.jpg
Great_Black_backed_Gull/303477.jpg
Great_Black_backed_Gull/303516.jpg
Great_Black_backed_Gull/304523.jpg
Western_Gull/305899.jpg
Western_Gull/306080.jpg
Western_Gull/306586.jpg
Western_Gull/306893.jpg
Western_Gull/306950.jpg
Thayers_Gull/307716.jpg
Thayers_Gull/307779.jpg
Thayers_Gull/307818.jpg
Thayers_Gull/307822.jpg
Laughing_Gull/308022.jpg
Laughing_Gull/308164.jpg
Laughing_Gull/308266.jpg
Franklins_Gull/309043.jpg
Franklins_Gull/309135.jpg
Franklins_Gull/309193.jpg
Black_legged_Kittiwake/309259.jpg
Black_legged_Kittiwake/309334.jpg
Black_legged_Kittiwake/309654.jpg
Black_legged_Kittiwake/310481.jpg
Black_Skimmer/311960.jpg
Black_Skimmer/312079.jpg
Black_Skimmer/312167.jpg
Black_Skimmer/312512.jpg
Black_Skimmer/312679.jpg
Black_Tern/314133.jpg
Black_Tern/314970.jpg
Gull_billed_Tern/315286.jpg
Gull_billed_Tern/315294.jpg
Gull_billed_Tern/315416.jpg
Gull_billed_Tern/315435.jpg
Caspian_Tern/315661.jpg
Caspian_Tern/315662.jpg
Caspian_Tern/315688.jpg
Caspian_Tern/315968.jpg
Caspian_Tern/316252.jpg
Roseate_Tern/316924.jpg
Roseate_Tern/316925.jpg
Roseate_Tern/316941.jpg
Forsters_Tern/317495.jpg
Forsters_Tern/318187.jpg
Forsters_Tern/318546.jpg
Common_Tern/318688.jpg
Common_Tern/318803.jpg
Common_Tern/319029.jpg
Common_Tern/319099.jpg
Common_Tern/319411.jpg
Arctic_Tern/320674.jpg
Arctic_Tern/321528.jpg
Arctic_Tern/322024.jpg
Arctic_Tern/322181.jpg
Arctic_Tern/322922.jpg
Least_Tern/323045.jpg
Least_Tern/323052.jpg
Least_Tern/323472.jpg
Least_Tern/323483.jpg
Royal_Tern/323513.jpg
Royal_Tern/323783.jpg
Sandwich_Tern/324321.jpg
Sandwich_Tern/324443.jpg
Sandwich_Tern/324599.jpg
Sandwich_Tern/324756.jpg
Sandwich_Tern/324769.jpg
Gray_Catbird/324859.jpg
Gray_Catbird/324927.jpg
Gray_Catbird/324971.jpg
Gray_Catbird/325400.jpg
Gray_Catbird/325934.jpg
Northern_Mockingbird/326436.jpg
Northern_Mockingbird/326445.jpg
Northern_Mockingbird/328143.jpg
Northern_Mockingbird/328292.jpg
Northern_Mockingbird/328612.jpg
Sage_Thrasher/328718.jpg
Sage_Thrasher/328775.jpg
Sage_Thrasher/328832.jpg
Sage_Thrasher/328868.jpg
Curve_billed_Thrasher/328914.jpg
Curve_billed_Thrasher/329153.jpg
Curve_billed_Thrasher/329176.jpg
Curve_billed_Thrasher/329191.jpg
Long_billed_Thrasher/329560.jpg
Long_billed_Thrasher/329561.jpg
Long_billed_Thrasher/329562.jpg
Long_billed_Thrasher/329615.jpg
California_Thrasher/329757.jpg
California_Thrasher/329794.jpg
California_Thrasher/329830.jpg
Brown_Thrasher/330163.jpg
Brown_Thrasher/330284.jpg
Brown_Thrasher/330291.jpg
Brown_Thrasher/330538.jpg
Brown_Thrasher/330580.jpg
American_Pipit/331265.jpg
American_Pipit/331545.jpg
American_Pipit/331566.jpg
American_Pipit/331569.jpg
California_Quail/331714.jpg
California_Quail/331810.jpg
California_Quail/332297.jpg
California_Quail/333262.jpg
Scaled_Quail/333395.jpg
Northern_Bobwhite/333619.jpg
Northern_Bobwhite/333770.jpg
Northern_Bobwhite/333790.jpg
Osprey/334503.jpg
Osprey/334536.jpg
Osprey/334638.jpg
Osprey/335020.jpg
Osprey/336288.jpg
Black_crested_Titmouse/336421.jpg
Black_crested_Titmouse/336453.jpg
Black_crested_Titmouse/336499.jpg
Black_crested_Titmouse/336543.jpg
Black_crested_Titmouse/336575.jpg
Tufted_Titmouse/336977.jpg
Tufted_Titmouse/338353.jpg
Tufted_Titmouse/338568.jpg
Oak_Titmouse/338683.jpg
Oak_Titmouse/338693.jpg
Bridled_Titmouse/338900.jpg
Black_capped_Chickadee/339253.jpg
Black_capped_Chickadee/339446.jpg
Black_capped_Chickadee/339713.jpg
Black_capped_Chickadee/341033.jpg
Carolina_Chickadee/341523.jpg
Carolina_Chickadee/341851.jpg
Carolina_Chickadee/341860.jpg
Carolina_Chickadee/342293.jpg
Mountain_Chickadee/342457.jpg
Mountain_Chickadee/342534.jpg
Mountain_Chickadee/342563.jpg
Mountain_Chickadee/342571.jpg
Mountain_Chickadee/342697.jpg
Boreal_Chickadee/342835.jpg
Boreal_Chickadee/342836.jpg
Boreal_Chickadee/342847.jpg
Boreal_Chickadee/342953.jpg
Boreal_Chickadee/342967.jpg
Chestnut_backed_Chickadee/343229.jpg
Chestnut_backed_Chickadee/343361.jpg
Chestnut_backed_Chickadee/343455.jpg
Chestnut_backed_Chickadee/343684.jpg
Chestnut_backed_Chickadee/343685.jpg
Canada_Warbler/343789.jpg
Canada_Warbler/343790.jpg
Wilsons_Warbler/343994.jpg
Wilsons_Warbler/343996.jpg
Wilsons_Warbler/344021.jpg
Mourning_Warbler/344176.jpg
Common_Yellowthroat/344414.jpg
Common_Yellowthroat/345188.jpg
Common_Yellowthroat/345325.jpg
Common_Yellowthroat/345451.jpg
Common_Yellowthroat/345523.jpg
Worm_eating_Warbler/346416.jpg
Yellow_breasted_Chat/346442.jpg
Yellow_breasted_Chat/346698.jpg
Yellow_breasted_Chat/346703.jpg
Yellow_breasted_Chat/346747.jpg
Yellow_breasted_Chat/346759.jpg
Black_and_white_Warbler/346908.jpg
Black_and_white_Warbler/346913.jpg
Black_and_white_Warbler/347317.jpg
Black_and_white_Warbler/347492.jpg
Black_and_white_Warbler/347648.jpg
Painted_Redstart/347978.jpg
Painted_Redstart/347980.jpg
Painted_Redstart/347982.jpg
Connecticut_Warbler/348035.jpg
Connecticut_Warbler/348036.jpg
Connecticut_Warbler/348037.jpg
Connecticut_Warbler/348067.jpg
Connecticut_Warbler/348090.jpg
Orange_crowned_Warbler/348166.jpg
Orange_crowned_Warbler/348253.jpg
Orange_crowned_Warbler/348381.jpg
Tennessee_Warbler/348538.jpg
Tennessee_Warbler/348584.jpg
Tennessee_Warbler/348598.jpg
Tennessee_Warbler/348616.jpg
Tennessee_Warbler/348638.jpg
Nashville_Warbler/348685.jpg
Nashville_Warbler/348691.jpg
Nashville_Warbler/348693.jpg
Nashville_Warbler/348695.jpg
Nashville_Warbler/348842.jpg
Louisiana_Waterthrush/348992.jpg
Northern_Waterthrush/349047.jpg
Northern_Waterthrush/349108.jpg
Northern_Waterthrush/349120.jpg
Northern_Waterthrush/349137.jpg
Prothonotary_Warbler/349311.jpg
Prothonotary_Warbler/349367.jpg
Prothonotary_Warbler/349487.jpg
Prothonotary_Warbler/349507.jpg
Prothonotary_Warbler/349511.jpg
Ovenbird/350013.jpg
Ovenbird/350075.jpg
Ovenbird/350278.jpg
Northern_Parula/350582.jpg
Northern_Parula/350585.jpg
Black_throated_Blue_Warbler/350737.jpg
Bay_breasted_Warbler/350969.jpg
Cerulean_Warbler/351071.jpg
Hooded_Warbler/351187.jpg
Hooded_Warbler/351214.jpg
Hooded_Warbler/351215.jpg
Yellow_rumped_Warbler/351333.jpg
Yellow_rumped_Warbler/351557.jpg
Yellow_rumped_Warbler/351619.jpg
Yellow_rumped_Warbler/351773.jpg
Yellow_rumped_Warbler/351788.jpg
Prairie_Warbler/352276.jpg
Prairie_Warbler/352277.jpg
Prairie_Warbler/352287.jpg
Prairie_Warbler/352288.jpg
Blackburnian_Warbler/352648.jpg
Blackburnian_Warbler/352651.jpg
Blackburnian_Warbler/352667.jpg
Magnolia_Warbler/352840.jpg
Magnolia_Warbler/352869.jpg
Magnolia_Warbler/352985.jpg
Palm_Warbler/353311.jpg
Palm_Warbler/353356.jpg
Palm_Warbler/353511.jpg
Palm_Warbler/353548.jpg
Chestnut_sided_Warbler/353600.jpg
Chestnut_sided_Warbler/353670.jpg
Chestnut_sided_Warbler/353726.jpg
Chestnut_sided_Warbler/353737.jpg
Yellow_Warbler/353938.jpg
Yellow_Warbler/354182.jpg
Yellow_Warbler/354253.jpg
Yellow_Warbler/354323.jpg
Pine_Warbler/354426.jpg
Pine_Warbler/354536.jpg
American_Redstart/354739.jpg
American_Redstart/354765.jpg
American_Redstart/354880.jpg
American_Redstart/354932.jpg
American_Redstart/355579.jpg
Blackpoll_Warbler/355744.jpg
Blackpoll_Warbler/355800.jpg
Blackpoll_Warbler/355856.jpg
Blackpoll_Warbler/355862.jpg
Cape_May_Warbler/355894.jpg
Cape_May_Warbler/355999.jpg
Townsends_Warbler/356218.jpg
Townsends_Warbler/356223.jpg
Townsends_Warbler/356228.jpg
Black_throated_Green_Warbler/356502.jpg
Black_throated_Green_Warbler/356558.jpg
Black_throated_Green_Warbler/356584.jpg
Golden_winged_Warbler/356684.jpg
Golden_winged_Warbler/356752.jpg
Golden_winged_Warbler/356806.jpg
Golden_winged_Warbler/356812.jpg
Blue_winged_Warbler/356962.jpg
Blue_winged_Warbler/356969.jpg
Blue_winged_Warbler/356971.jpg
House_Sparrow/358126.jpg
House_Sparrow/358818.jpg
House_Sparrow/359320.jpg
American_White_Pelican/359935.jpg
American_White_Pelican/360045.jpg
American_White_Pelican/360046.jpg
Brown_Pelican/362301.jpg
Brown_Pelican/363467.jpg
Brown_Pelican/363763.jpg
Brown_Pelican/363813.jpg
Brown_Pelican/364363.jpg
Double_crested_Cormorant/364541.jpg
Double_crested_Cormorant/364685.jpg
Double_crested_Cormorant/364764.jpg
Double_crested_Cormorant/365382.jpg
Neotropic_Cormorant/366868.jpg
Neotropic_Cormorant/367584.jpg
Neotropic_Cormorant/367993.jpg
Neotropic_Cormorant/368157.jpg
Neotropic_Cormorant/368195.jpg
Great_Cormorant/370495.jpg
Great_Cormorant/370500.jpg
Great_Cormorant/370516.jpg
Great_Cormorant/370524.jpg
Brandts_Cormorant/371769.jpg
Brandts_Cormorant/371880.jpg
Brandts_Cormorant/371924.jpg
Brandts_Cormorant/372203.jpg
Wild_Turkey/372322.jpg
Wild_Turkey/373031.jpg
Wild_Turkey/374274.jpg
Ring_necked_Pheasant/375137.jpg
Ring_necked_Pheasant/375192.jpg
Ring_necked_Pheasant/375875.jpg
Ring_necked_Pheasant/376251.jpg
Ring_necked_Pheasant/376882.jpg
Ruffed_Grouse/377185.jpg
Ruffed_Grouse/377286.jpg
Ruffed_Grouse/377297.jpg
Ruffed_Grouse/377354.jpg
Greater_Sage_Grouse/377841.jpg
Greater_Sage_Grouse/378031.jpg
Sooty_Grouse/378509.jpg
Dusky_Grouse/378823.jpg
Dusky_Grouse/378838.jpg
Dusky_Grouse/378865.jpg
Spruce_Grouse/379038.jpg
Spruce_Grouse/379048.jpg
Spruce_Grouse/379166.jpg
Spruce_Grouse/379230.jpg
Willow_Ptarmigan/379441.jpg
Willow_Ptarmigan/379771.jpg
Willow_Ptarmigan/380051.jpg
Rock_Ptarmigan/381514.jpg
Rock_Ptarmigan/381648.jpg
Rock_Ptarmigan/381666.jpg
Rock_Ptarmigan/381680.jpg
Greater_Prairie_Chicken/382034.jpg
Greater_Prairie_Chicken/382063.jpg
Greater_Prairie_Chicken/382089.jpg
Greater_Prairie_Chicken/382176.jpg
Sharp_tailed_Grouse/382288.jpg
Sharp_tailed_Grouse/382463.jpg
Northern_Flicker/382710.jpg
Northern_Flicker/382764.jpg
Northern_Flicker/383173.jpg
Northern_Flicker/383289.jpg
Northern_Flicker/383640.jpg
Pileated_Woodpecker/385340.jpg
Pileated_Woodpecker/386642.jpg
Pileated_Woodpecker/387058.jpg
Pileated_Woodpecker/387106.jpg
Pileated_Woodpecker/387470.jpg
Golden_fronted_Woodpecker/387841.jpg
Golden_fronted_Woodpecker/387974.jpg
Golden_fronted_Woodpecker/387982.jpg
Golden_fronted_Woodpecker/387991.jpg
Golden_fronted_Woodpecker/388025.jpg
Red_bellied_Woodpecker/388109.jpg
Red_bellied_Woodpecker/389064.jpg
Red_bellied_Woodpecker/389565.jpg
Red_bellied_Woodpecker/390104.jpg
Red_headed_Woodpecker/390678.jpg
Red_headed_Woodpecker/390766.jpg
Red_headed_Woodpecker/390955.jpg
Red_headed_Woodpecker/391283.jpg
Acorn_Woodpecker/391501.jpg
Acorn_Woodpecker/391509.jpg
Acorn_Woodpecker/391909.jpg
Acorn_Woodpecker/392295.jpg
Acorn_Woodpecker/392614.jpg
Lewiss_Woodpecker/392895.jpg
Lewiss_Woodpecker/392981.jpg
Lewiss_Woodpecker/393052.jpg
Gila_Woodpecker/393141.jpg
Gila_Woodpecker/393176.jpg
Gila_Woodpecker/393367.jpg
Gila_Woodpecker/393376.jpg
Gila_Woodpecker/393398.jpg
White_headed_Woodpecker/393523.jpg
White_headed_Woodpecker/393581.jpg
White_headed_Woodpecker/393584.jpg
White_headed_Woodpecker/393585.jpg
White_headed_Woodpecker/393590.jpg
Black_backed_Woodpecker/393627.jpg
Black_backed_Woodpecker/393667.jpg
Black_backed_Woodpecker/393725.jpg
Black_backed_Woodpecker/393726.jpg
Black_backed_Woodpecker/393744.jpg
Red_cockaded_Woodpecker/393775.jpg
Red_cockaded_Woodpecker/393829.jpg
American_Three_toed_Woodpecker/394079.jpg
American_Three_toed_Woodpecker/394085.jpg
Nuttalls_Woodpecker/394236.jpg
Nuttalls_Woodpecker/394276.jpg
Nuttalls_Woodpecker/394435.jpg
Downy_Woodpecker/394729.jpg
Downy_Woodpecker/396687.jpg
Downy_Woodpecker/396876.jpg
Ladder_backed_Woodpecker/397082.jpg
Ladder_backed_Woodpecker/397092.jpg
Ladder_backed_Woodpecker/397154.jpg
Ladder_backed_Woodpecker/397230.jpg
Ladder_backed_Woodpecker/397337.jpg
Hairy_Woodpecker/397433.jpg
Hairy_Woodpecker/397887.jpg
Hairy_Woodpecker/398004.jpg
Red_naped_Sapsucker/399031.jpg
Red_naped_Sapsucker/399042.jpg
Red_breasted_Sapsucker/399170.jpg
Red_breasted_Sapsucker/399187.jpg
Red_breasted_Sapsucker/399215.jpg
Red_breasted_Sapsucker/399229.jpg
Yellow_bellied_Sapsucker/400004.jpg
Yellow_bellied_Sapsucker/400215.jpg
Yellow_bellied_Sapsucker/400433.jpg
Yellow_bellied_Sapsucker/400435.jpg
Yellow_bellied_Sapsucker/400560.jpg
Clarks_Grebe/400990.jpg
Clarks_Grebe/401042.jpg
Clarks_Grebe/401081.jpg
Western_Grebe/401381.jpg
Western_Grebe/401437.jpg
Western_Grebe/401632.jpg
Western_Grebe/401639.jpg
Western_Grebe/402231.jpg
Horned_Grebe/402305.jpg
Horned_Grebe/402593.jpg
Horned_Grebe/403263.jpg
Horned_Grebe/403484.jpg
Horned_Grebe/404628.jpg
Red_necked_Grebe/404744.jpg
Red_necked_Grebe/404889.jpg
Red_necked_Grebe/405520.jpg
Red_necked_Grebe/405942.jpg
Eared_Grebe/406004.jpg
Eared_Grebe/406704.jpg
Eared_Grebe/407707.jpg
Pied_billed_Grebe/408163.jpg
Pied_billed_Grebe/408312.jpg
Pied_billed_Grebe/409307.jpg
Least_Grebe/410481.jpg
Least_Grebe/410673.jpg
Least_Grebe/410801.jpg
Least_Grebe/410806.jpg
Blue_gray_Gnatcatcher/410976.jpg
Blue_gray_Gnatcatcher/411244.jpg
Blue_gray_Gnatcatcher/411840.jpg
Blue_gray_Gnatcatcher/411896.jpg
Blue_gray_Gnatcatcher/411916.jpg
Monk_Parakeet/412097.jpg
Monk_Parakeet/412721.jpg
Monk_Parakeet/412815.jpg
Monk_Parakeet/413168.jpg
Monk_Parakeet/413275.jpg
Phainopepla/413538.jpg
Phainopepla/413718.jpg
Phainopepla/413726.jpg
Phainopepla/413842.jpg
American_Coot/414142.jpg
American_Coot/414618.jpg
American_Coot/416377.jpg
Common_Gallinule/416452.jpg
Common_Gallinule/416746.jpg
Common_Gallinule/416803.jpg
Purple_Gallinule/416853.jpg
Sora/417163.jpg
Sora/417702.jpg
Sora/417757.jpg
Sora/417815.jpg
King_Rail/417966.jpg
King_Rail/418026.jpg
King_Rail/418039.jpg
Virginia_Rail/418069.jpg
Virginia_Rail/418346.jpg
Virginia_Rail/418436.jpg
Virginia_Rail/418655.jpg
Clapper_Rail/418753.jpg
Clapper_Rail/419250.jpg
Clapper_Rail/419266.jpg
Black_necked_Stilt/419808.jpg
Black_necked_Stilt/419827.jpg
Black_necked_Stilt/420599.jpg
Black_necked_Stilt/420857.jpg
Black_necked_Stilt/420954.jpg
American_Avocet/421867.jpg
American_Avocet/422760.jpg
American_Avocet/422997.jpg
Ruby_crowned_Kinglet/424348.jpg
Ruby_crowned_Kinglet/425199.jpg
Ruby_crowned_Kinglet/425446.jpg
Ruby_crowned_Kinglet/425736.jpg
Ruby_crowned_Kinglet/425745.jpg
Golden_crowned_Kinglet/426168.jpg
Golden_crowned_Kinglet/426371.jpg
Golden_crowned_Kinglet/426373.jpg
Golden_crowned_Kinglet/426970.jpg
Verdin/427125.jpg
Verdin/427265.jpg
Verdin/427318.jpg
Verdin/427507.jpg
Red_Phalarope/427723.jpg
Red_Phalarope/428179.jpg
Red_Phalarope/428250.jpg
Red_necked_Phalarope/428313.jpg
Red_necked_Phalarope/428339.jpg
Red_necked_Phalarope/429019.jpg
Red_necked_Phalarope/429706.jpg
Wilsons_Phalarope/430014.jpg
Wilsons_Phalarope/430094.jpg
Wilsons_Phalarope/430255.jpg
Wilsons_Phalarope/430720.jpg
Spotted_Sandpiper/430789.jpg
Spotted_Sandpiper/431158.jpg
Spotted_Sandpiper/431252.jpg
Spotted_Sandpiper/431385.jpg
Surfbird/431576.jpg
Surfbird/431795.jpg
Surfbird/431933.jpg
Surfbird/431937.jpg
Ruddy_Turnstone/432289.jpg
Ruddy_Turnstone/432347.jpg
Ruddy_Turnstone/432697.jpg
Ruddy_Turnstone/433773.jpg
Black_Turnstone/434622.jpg
Black_Turnstone/434623.jpg
Black_Turnstone/434717.jpg
Black_Turnstone/434941.jpg
Upland_Sandpiper/435235.jpg
Sanderling/435563.jpg
Sanderling/435911.jpg
Sanderling/436010.jpg
Sanderling/436970.jpg
Dunlin/438912.jpg
Dunlin/438914.jpg
Dunlin/439094.jpg
Dunlin/439097.jpg
Bairds_Sandpiper/440405.jpg
Bairds_Sandpiper/440515.jpg
Bairds_Sandpiper/440709.jpg
Bairds_Sandpiper/440765.jpg
Red_Knot/442020.jpg
Red_Knot/442561.jpg
White_rumped_Sandpiper/442664.jpg
White_rumped_Sandpiper/442705.jpg
White_rumped_Sandpiper/442860.jpg
White_rumped_Sandpiper/443099.jpg
Stilt_Sandpiper/443281.jpg
Stilt_Sandpiper/443292.jpg
Purple_Sandpiper/443665.jpg
Purple_Sandpiper/444437.jpg
Purple_Sandpiper/444689.jpg
Purple_Sandpiper/444788.jpg
Purple_Sandpiper/445192.jpg
Western_Sandpiper/445386.jpg
Western_Sandpiper/445806.jpg
Western_Sandpiper/445822.jpg
Western_Sandpiper/445914.jpg
Pectoral_Sandpiper/446424.jpg
Pectoral_Sandpiper/446445.jpg
Pectoral_Sandpiper/446768.jpg
Pectoral_Sandpiper/447006.jpg
Pectoral_Sandpiper/447096.jpg
Least_Sandpiper/447382.jpg
Least_Sandpiper/447460.jpg
Least_Sandpiper/447787.jpg
Least_Sandpiper/448614.jpg
Semipalmated_Sandpiper/449188.jpg
Semipalmated_Sandpiper/449207.jpg
Semipalmated_Sandpiper/449214.jpg
Semipalmated_Sandpiper/449301.jpg
Wilsons_Snipe/450184.jpg
Wilsons_Snipe/450191.jpg
Wilsons_Snipe/450612.jpg
Wilsons_Snipe/450618.jpg
Short_billed_Dowitcher/450781.jpg
Short_billed_Dowitcher/450814.jpg
Short_billed_Dowitcher/450915.jpg
Short_billed_Dowitcher/451095.jpg
Long_billed_Dowitcher/451775.jpg
Long_billed_Dowitcher/452093.jpg
Long_billed_Dowitcher/452263.jpg
Long_billed_Dowitcher/452585.jpg
Marbled_Godwit/452956.jpg
Marbled_Godwit/453132.jpg
Marbled_Godwit/453768.jpg
Marbled_Godwit/453805.jpg
Marbled_Godwit/453989.jpg
Long_billed_Curlew/455138.jpg
Long_billed_Curlew/455589.jpg
Long_billed_Curlew/455670.jpg
Long_billed_Curlew/455718.jpg
Whimbrel/455809.jpg
Whimbrel/455822.jpg
Whimbrel/457287.jpg
Whimbrel/457964.jpg
Whimbrel/458080.jpg
American_Woodcock/458261.jpg
American_Woodcock/458297.jpg
Lesser_Yellowlegs/458548.jpg
Lesser_Yellowlegs/458703.jpg
Lesser_Yellowlegs/458991.jpg
Lesser_Yellowlegs/459484.jpg
Greater_Yellowlegs/460514.jpg
Greater_Yellowlegs/460579.jpg
Greater_Yellowlegs/460643.jpg
Greater_Yellowlegs/460906.jpg
Greater_Yellowlegs/461187.jpg
Willet/462408.jpg
Willet/462527.jpg
Willet/463224.jpg
Willet/463255.jpg
Willet/463496.jpg
Solitary_Sandpiper/463747.jpg
Solitary_Sandpiper/463973.jpg
Solitary_Sandpiper/463974.jpg
Solitary_Sandpiper/463977.jpg
Solitary_Sandpiper/463978.jpg
Red_breasted_Nuthatch/464738.jpg
Red_breasted_Nuthatch/464903.jpg
Red_breasted_Nuthatch/464930.jpg
Red_breasted_Nuthatch/464962.jpg
White_breasted_Nuthatch/467836.jpg
White_breasted_Nuthatch/467951.jpg
Brown_headed_Nuthatch/468871.jpg
Brown_headed_Nuthatch/468946.jpg
Brown_headed_Nuthatch/468988.jpg
Brown_headed_Nuthatch/469111.jpg
Pygmy_Nuthatch/469253.jpg
Pygmy_Nuthatch/469417.jpg
Pygmy_Nuthatch/469445.jpg
Northern_Saw_whet_Owl/469559.jpg
Northern_Saw_whet_Owl/469679.jpg
Northern_Saw_whet_Owl/469897.jpg
Northern_Saw_whet_Owl/469903.jpg
Northern_Saw_whet_Owl/470023.jpg
Short_eared_Owl/470608.jpg
Short_eared_Owl/470683.jpg
Short_eared_Owl/472205.jpg
Short_eared_Owl/472433.jpg
Long_eared_Owl/472902.jpg
Long_eared_Owl/474433.jpg
Long_eared_Owl/474456.jpg
Burrowing_Owl/475008.jpg
Burrowing_Owl/475704.jpg
Burrowing_Owl/475986.jpg
Burrowing_Owl/477034.jpg
Snowy_Owl/477601.jpg
Snowy_Owl/478223.jpg
Snowy_Owl/478432.jpg
Snowy_Owl/478923.jpg
Great_Horned_Owl/479973.jpg
Great_Horned_Owl/480079.jpg
Great_Horned_Owl/480903.jpg
Great_Horned_Owl/481305.jpg
Ferruginous_Pygmy_Owl/482387.jpg
Ferruginous_Pygmy_Owl/482443.jpg
Ferruginous_Pygmy_Owl/482596.jpg
Ferruginous_Pygmy_Owl/482669.jpg
Ferruginous_Pygmy_Owl/482698.jpg
Eastern_Screech_Owl/483406.jpg
Eastern_Screech_Owl/483482.jpg
Eastern_Screech_Owl/484269.jpg
Western_Screech_Owl/484466.jpg
Western_Screech_Owl/484467.jpg
Western_Screech_Owl/484469.jpg
Western_Screech_Owl/484533.jpg
Western_Screech_Owl/484572.jpg
Great_Gray_Owl/484839.jpg
Great_Gray_Owl/484870.jpg
Great_Gray_Owl/485472.jpg
Great_Gray_Owl/485725.jpg
Spotted_Owl/487290.jpg
Spotted_Owl/487364.jpg
Spotted_Owl/487416.jpg
Barred_Owl/487599.jpg
Barred_Owl/487606.jpg
Barred_Owl/488241.jpg
Barred_Owl/488479.jpg
Barred_Owl/488798.jpg
Northern_Hawk_Owl/490395.jpg
Northern_Hawk_Owl/490794.jpg
Northern_Hawk_Owl/490806.jpg
Northern_Hawk_Owl/491165.jpg
European_Starling/491764.jpg
European_Starling/493026.jpg
Northern_Gannet/494414.jpg
Northern_Gannet/495305.jpg
Northern_Gannet/495823.jpg
Northern_Gannet/495857.jpg
Northern_Gannet/495973.jpg
Wrentit/496257.jpg
Wrentit/496305.jpg
Wrentit/496322.jpg
Wrentit/496329.jpg
White_Ibis/498847.jpg
White_Ibis/498940.jpg
White_Ibis/499040.jpg
White_Ibis/499563.jpg
White_Ibis/499705.jpg
White_faced_Ibis/500101.jpg
White_faced_Ibis/500119.jpg
White_faced_Ibis/500360.jpg
Glossy_Ibis/500796.jpg
Glossy_Ibis/501570.jpg
Black_chinned_Hummingbird/503009.jpg
Black_chinned_Hummingbird/503286.jpg
Black_chinned_Hummingbird/503441.jpg
Ruby_throated_Hummingbird/503987.jpg
Ruby_throated_Hummingbird/505437.jpg
Ruby_throated_Hummingbird/506319.jpg
Ruby_throated_Hummingbird/506320.jpg
Ruby_throated_Hummingbird/506321.jpg
Annas_Hummingbird/506429.jpg
Annas_Hummingbird/506555.jpg
Annas_Hummingbird/507567.jpg
Annas_Hummingbird/507907.jpg
Costas_Hummingbird/509138.jpg
Costas_Hummingbird/509198.jpg
Costas_Hummingbird/509465.jpg
Broad_billed_Hummingbird/509907.jpg
Broad_billed_Hummingbird/509969.jpg
Broad_billed_Hummingbird/510069.jpg
Broad_tailed_Hummingbird/510498.jpg
Broad_tailed_Hummingbird/510627.jpg
Broad_tailed_Hummingbird/510665.jpg
Rufous_Hummingbird/511449.jpg
Rufous_Hummingbird/511958.jpg
Rufous_Hummingbird/512223.jpg
Rufous_Hummingbird/512857.jpg
Rufous_Hummingbird/512892.jpg
Allens_Hummingbird/513139.jpg
Allens_Hummingbird/513386.jpg
Cactus_Wren/514254.jpg
Cactus_Wren/514376.jpg
Cactus_Wren/514461.jpg
Cactus_Wren/514547.jpg
Canyon_Wren/514600.jpg
Canyon_Wren/514678.jpg
Canyon_Wren/514705.jpg
Canyon_Wren/514759.jpg
Marsh_Wren/514957.jpg
Marsh_Wren/515112.jpg
Marsh_Wren/515137.jpg
Marsh_Wren/515204.jpg
Marsh_Wren/515772.jpg
Sedge_Wren/515883.jpg
Sedge_Wren/516102.jpg
Rock_Wren/516341.jpg
Rock_Wren/516346.jpg
Rock_Wren/516526.jpg
Bewicks_Wren/516922.jpg
Bewicks_Wren/516971.jpg
Bewicks_Wren/517189.jpg
Carolina_Wren/517360.jpg
Carolina_Wren/517369.jpg
Carolina_Wren/517782.jpg
Carolina_Wren/518318.jpg
Carolina_Wren/518595.jpg
House_Wren/518835.jpg
House_Wren/518972.jpg
House_Wren/520400.jpg
Winter_Wren/520753.jpg
Winter_Wren/520768.jpg
Pacific_Wren/520920.jpg
Elegant_Trogon/521173.jpg
Elegant_Trogon/521208.jpg
Elegant_Trogon/521209.jpg
Veery/521269.jpg
Veery/521489.jpg
Veery/521523.jpg
Hermit_Thrush/521695.jpg
Hermit_Thrush/521730.jpg
Hermit_Thrush/522360.jpg
Hermit_Thrush/522386.jpg
Gray_cheeked_Thrush/522620.jpg
Gray_cheeked_Thrush/522623.jpg
Swainsons_Thrush/522961.jpg
Swainsons_Thrush/522984.jpg
Swainsons_Thrush/523160.jpg
Swainsons_Thrush/523260.jpg
Wood_Thrush/523354.jpg
Wood_Thrush/523364.jpg
Wood_Thrush/523379.jpg
Wood_Thrush/523541.jpg
Varied_Thrush/523597.jpg
Varied_Thrush/523805.jpg
Varied_Thrush/523985.jpg
Varied_Thrush/524094.jpg
Varied_Thrush/524099.jpg
Mountain_Bluebird/524360.jpg
Mountain_Bluebird/524561.jpg
Mountain_Bluebird/524908.jpg
Mountain_Bluebird/524983.jpg
Western_Bluebird/525428.jpg
Western_Bluebird/526007.jpg
Western_Bluebird/526459.jpg
Western_Bluebird/526460.jpg
Eastern_Bluebird/526765.jpg
Eastern_Bluebird/526852.jpg
Eastern_Bluebird/526976.jpg
Eastern_Bluebird/527918.jpg
Eastern_Bluebird/528766.jpg
American_Robin/529670.jpg
American_Robin/530040.jpg
American_Robin/530041.jpg
American_Robin/530839.jpg
Olive_sided_Flycatcher/531191.jpg
Olive_sided_Flycatcher/531208.jpg
Olive_sided_Flycatcher/531333.jpg
Olive_sided_Flycatcher/531334.jpg
Western_Wood_Pewee/531459.jpg
Western_Wood_Pewee/531481.jpg
Western_Wood_Pewee/531632.jpg
Western_Wood_Pewee/531755.jpg
Western_Wood_Pewee/531757.jpg
Eastern_Wood_Pewee/531868.jpg
Eastern_Wood_Pewee/531884.jpg
Eastern_Wood_Pewee/531987.jpg
Eastern_Wood_Pewee/532039.jpg
Eastern_Wood_Pewee/532201.jpg
Pacific_slope_Flycatcher/532532.jpg
Pacific_slope_Flycatcher/532677.jpg
Yellow_bellied_Flycatcher/532714.jpg
Yellow_bellied_Flycatcher/532819.jpg
Yellow_bellied_Flycatcher/532822.jpg
Yellow_bellied_Flycatcher/532827.jpg
Hammonds_Flycatcher/533074.jpg
Hammonds_Flycatcher/533076.jpg
Hammonds_Flycatcher/533077.jpg
Least_Flycatcher/533233.jpg
Least_Flycatcher/533340.jpg
Least_Flycatcher/533345.jpg
Least_Flycatcher/533353.jpg
Least_Flycatcher/533400.jpg
Dusky_Flycatcher/533523.jpg
Dusky_Flycatcher/533553.jpg
Dusky_Flycatcher/533558.jpg
Dusky_Flycatcher/533568.jpg
Cordilleran_Flycatcher/533658.jpg
Willow_Flycatcher/533773.jpg
Willow_Flycatcher/533774.jpg
Willow_Flycatcher/533810.jpg
Willow_Flycatcher/534012.jpg
Acadian_Flycatcher/534090.jpg
Acadian_Flycatcher/534091.jpg
Acadian_Flycatcher/534108.jpg
Gray_Flycatcher/534274.jpg
Gray_Flycatcher/534310.jpg
Gray_Flycatcher/534326.jpg
Gray_Flycatcher/534358.jpg
Gray_Flycatcher/534370.jpg
Vermilion_Flycatcher/534690.jpg
Vermilion_Flycatcher/534784.jpg
Vermilion_Flycatcher/534879.jpg
Vermilion_Flycatcher/535302.jpg
Vermilion_Flycatcher/536084.jpg
Black_Phoebe/536955.jpg
Black_Phoebe/537242.jpg
Black_Phoebe/537318.jpg
Black_Phoebe/537701.jpg
Black_Phoebe/537925.jpg
Eastern_Phoebe/538056.jpg
Eastern_Phoebe/538589.jpg
Eastern_Phoebe/539360.jpg
Eastern_Phoebe/539606.jpg
Says_Phoebe/539893.jpg
Says_Phoebe/540039.jpg
Ash_throated_Flycatcher/540459.jpg
Ash_throated_Flycatcher/540522.jpg
Ash_throated_Flycatcher/540581.jpg
Ash_throated_Flycatcher/540657.jpg
Great_Crested_Flycatcher/540705.jpg
Great_Crested_Flycatcher/540780.jpg
Great_Crested_Flycatcher/541314.gif
Great_Crested_Flycatcher/541362.jpg
Brown_crested_Flycatcher/541516.jpg
Brown_crested_Flycatcher/541638.jpg
Brown_crested_Flycatcher/541706.jpg
Brown_crested_Flycatcher/541728.jpg
Brown_crested_Flycatcher/541737.jpg
Great_Kiskadee/541839.jpg
Great_Kiskadee/542304.jpg
Great_Kiskadee/542798.jpg
Great_Kiskadee/542834.jpg
Couchs_Kingbird/544352.jpg
Couchs_Kingbird/544378.jpg
Couchs_Kingbird/544404.jpg
Gray_Kingbird/544466.jpg
Gray_Kingbird/544514.jpg
Gray_Kingbird/544558.jpg
Gray_Kingbird/544761.jpg
Gray_Kingbird/544765.jpg
Scissor_tailed_Flycatcher/544968.jpg
Scissor_tailed_Flycatcher/545061.jpg
Scissor_tailed_Flycatcher/545177.jpg
Scissor_tailed_Flycatcher/545474.jpg
Scissor_tailed_Flycatcher/545643.jpg
Tropical_Kingbird/546721.jpg
Tropical_Kingbird/546749.jpg
Tropical_Kingbird/546852.jpg
Tropical_Kingbird/547217.jpg
Eastern_Kingbird/547441.jpg
Eastern_Kingbird/547483.jpg
Eastern_Kingbird/547490.jpg
Western_Kingbird/549474.jpg
Western_Kingbird/549508.jpg
Western_Kingbird/549678.jpg
Cassins_Kingbird/550030.jpg
Bells_Vireo/552110.jpg
Cassins_Vireo/552260.jpg
Cassins_Vireo/552309.jpg
Yellow_throated_Vireo/552392.jpg
Yellow_throated_Vireo/552416.jpg
Yellow_throated_Vireo/552479.jpg
Yellow_throated_Vireo/552522.jpg
Warbling_Vireo/552597.jpg
Warbling_Vireo/552975.jpg
Warbling_Vireo/553059.jpg
Warbling_Vireo/553061.jpg
White_eyed_Vireo/553507.jpg
White_eyed_Vireo/553558.jpg
White_eyed_Vireo/553605.jpg
Huttons_Vireo/553730.jpg
Huttons_Vireo/553856.jpg
Huttons_Vireo/553888.jpg
Huttons_Vireo/553909.jpg
Red_eyed_Vireo/553944.jpg
Red_eyed_Vireo/554276.jpg
Red_eyed_Vireo/554283.jpg
Red_eyed_Vireo/554471.jpg
Red_eyed_Vireo/554535.jpg
Philadelphia_Vireo/554697.jpg
Philadelphia_Vireo/554715.jpg
Philadelphia_Vireo/554788.jpg
Philadelphia_Vireo/554792.jpg
Philadelphia_Vireo/554796.jpg
Plumbeous_Vireo/554899.jpg
Plumbeous_Vireo/554902.jpg
Blue_headed_Vireo/555157.jpg
Blue_headed_Vireo/555225.jpg
Blue_headed_Vireo/555232.jpg
Blue_headed_Vireo/555243.jpg
Black_crowned_Night_Heron/555822.jpg
Black_crowned_Night_Heron/556956.jpg
Black_crowned_Night_Heron/556959.jpg
Black_crowned_Night_Heron/557614.jpg
Semipalmated_Plover/558727.jpg
Semipalmated_Plover/558847.jpg
Semipalmated_Plover/558858.jpg
Semipalmated_Plover/559447.jpg
Killdeer/560046.jpg
Killdeer/560054.jpg
Killdeer/561534.jpg
Killdeer/562435.jpg
Cliff_Swallow/563192.jpg
Purple_Martin/563409.jpg
Townsends_Solitaire/564382.jpg
Townsends_Solitaire/564447.jpg
Townsends_Solitaire/564517.jpg
Least_Bittern/564964.jpg
Least_Bittern/564973.jpg
Yellow_crowned_Night_Heron/565315.jpg
Yellow_crowned_Night_Heron/566085.jpg
Yellow_crowned_Night_Heron/566509.jpg
Yellow_crowned_Night_Heron/566959.jpg
Cassins_Finch/567594.jpg
Cassins_Finch/567757.jpg
Gambels_Quail/567993.jpg
Gambels_Quail/568019.jpg
Gambels_Quail/568106.jpg
Gambels_Quail/568222.jpg
Zone_tailed_Hawk/568344.jpg
Chestnut_collared_Longspur/571975.jpg
Mountain_Plover/572074.jpg
Chihuahuan_Raven/575435.jpg
Saltmarsh_Sparrow/575563.jpg
Olive_Sparrow/576347.jpg
Olive_Sparrow/576482.jpg
Yellow_eyed_Junco/577170.jpg
Canyon_Towhee/577927.jpg
Cassins_Sparrow/578258.jpg
Cassins_Sparrow/578360.jpg
Cassins_Sparrow/578405.jpg
Cassins_Sparrow/578424.jpg
Cassins_Sparrow/578499.jpg
Black_chinned_Sparrow/578606.jpg
Black_chinned_Sparrow/578742.jpg
Hoary_Redpoll/579565.jpg
Purple_Finch/580791.jpg
Purple_Finch/581206.jpg
Brown_capped_Rosy_Finch/582121.jpg
Audubons_Oriole/582203.jpg
Audubons_Oriole/582216.jpg
Audubons_Oriole/582279.jpg
Audubons_Oriole/582394.jpg
Scaled_Quail/582711.jpg
Scaled_Quail/582714.jpg
Scaled_Quail/582715.jpg
Bridled_Titmouse/582901.jpg
Bridled_Titmouse/582935.jpg
Mourning_Warbler/585528.jpg
Mourning_Warbler/586285.jpg
Mourning_Warbler/586542.jpg
Worm_eating_Warbler/587083.jpg
Worm_eating_Warbler/587348.jpg
Worm_eating_Warbler/587371.jpg
Louisiana_Waterthrush/588428.jpg
Louisiana_Waterthrush/588764.jpg
Louisiana_Waterthrush/589167.jpg
Cerulean_Warbler/590344.jpg
Hooded_Warbler/590876.jpg
White_tailed_Ptarmigan/592628.jpg
Rock_Sandpiper/596107.jpg
Rock_Sandpiper/596409.jpg
Elf_Owl/597046.jpg
Elf_Owl/597072.jpg
Dusky_Flycatcher/599261.jpg
Cordilleran_Flycatcher/599594.jpg
Plumbeous_Vireo/600257.jpg
Plumbeous_Vireo/600264.jpg
Plumbeous_Vireo/600295.jpg
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment