# print("Please build and install Nvidia apex package with option '--cuda_ext' according to https://github.com/NVIDIA/apex#from-source .")
@dataclass
classCLIPVisionCfg:
layers:Union[Tuple[int,int,int,int],int]=12
width:int=768
head_width:int=64
mlp_ratio:float=4.0
patch_size:int=16
image_size:Union[Tuple[int,int],int]=224
ls_init_value:Optional[float]=None# layer scale initial value
patch_dropout:float=0.0# what fraction of patches to dropout during training (0 would mean disabled and no patches dropped) - 0.5 to 0.75 recommended in the paper for optimal results
global_average_pool:bool=False# whether to global average pool the last embedding layer, instead of using CLS token (https://arxiv.org/abs/2205.01580)
drop_path_rate:Optional[float]=None# drop path rate
timm_model_name:str=None# a valid model name overrides layers, width, patch_size
timm_model_pretrained:bool=False# use (imagenet) pretrained weights for named model
timm_pool:str="avg"# feature pooling for timm model ('abs_attn', 'rot_attn', 'avg', '')
timm_proj:str="linear"# linear projection for timm model output ('linear', 'mlp', '')
timm_proj_bias:bool=False# enable bias final projection
eva_model_name:str=None# a valid eva model name overrides layers, width, patch_size