Commit 6f43e8fa authored by mashun1's avatar mashun1
Browse files

open_clip

parents
Pipeline #1689 canceled with stages
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This diff is collapsed.
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion-400m' \
--dataset-type webdataset \
--lr "2.56e-5" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 3072 \
--wd 0.2 \
--batch-size 1024 \
--aug-cfg scale='(0.4, 1.0)' \
--epochs 1 \
--train-num-samples 131072000 \
--workers 6 \
--model ViT-B-16-CL16 \
--pretrained '/path/to/ckpt' \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--grad-checkpointing \
--log-every-n-steps 256 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion-400m' \
--dataset-type webdataset \
--lr "2.048e-3" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 782 \
--wd 0.2 \
--batch-size 8192 \
--aug-cfg scale='(0.4, 1.0)' \
--epochs 6 \
--workers 6 \
--model ViT-B-16-CL16 \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--force-image-size 112 \
--grad-checkpointing \
--log-every-n-steps 32 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion-400m' \
--dataset-type webdataset \
--lr "2.24e-5" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 3571 \
--wd 0.2 \
--batch-size 896 \
--aug-cfg scale='(0.4, 1.0)' color_jitter='(0.32, 0.32, 0.32, 0.08)' color_jitter_prob=0.8 gray_scale_prob=0.2 \
--epochs 1 \
--train-num-samples 131072000 \
--workers 6 \
--model ViT-L-16-CL16-GAP \
--pretrained '/path/to/ckpt' \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--grad-checkpointing \
--log-every-n-steps 293 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion-400m' \
--dataset-type webdataset \
--lr "1.024e-3" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 1563 \
--wd 0.2 \
--batch-size 4096 \
--aug-cfg scale='(0.4, 1.0)' color_jitter='(0.32, 0.32, 0.32, 0.08)' color_jitter_prob=0.8 gray_scale_prob=0.2 \
--epochs 6 \
--workers 6 \
--model ViT-L-16-CL16-GAP \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--force-image-size 64 \
--grad-checkpointing \
--log-every-n-steps 64 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion-400m' \
--dataset-type webdataset \
--lr "2.24e-5" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 3571 \
--wd 0.2 \
--batch-size 896 \
--aug-cfg scale='(0.4, 1.0)' color_jitter='(0.32, 0.32, 0.32, 0.08)' color_jitter_prob=0.8 gray_scale_prob=0.2 \
--epochs 1 \
--train-num-samples 131072000 \
--workers 6 \
--model ViT-L-16-CL32-GAP \
--pretrained '/path/to/ckpt' \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--grad-checkpointing \
--log-every-n-steps 293 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion-400m' \
--dataset-type webdataset \
--lr "1.024e-3" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 1563 \
--wd 0.2 \
--batch-size 4096 \
--aug-cfg scale='(0.4, 1.0)' color_jitter='(0.32, 0.32, 0.32, 0.08)' color_jitter_prob=0.8 gray_scale_prob=0.2 \
--epochs 6 \
--workers 6 \
--model ViT-L-16-CL8-Syntax-GAP \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--force-image-size 96 \
--grad-checkpointing \
--log-every-n-steps 64 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
# have not been tested. use it at your own discretion
# the original experiment was run on tpu v3-256.
# this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups.
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion2b_or_datacomp1b' \
--train-num-samples 131072000 \
--dataset-type webdataset \
--lr "5.12e-5" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 800 \
--wd 0.2 \
--batch-size 4096 \
--aug-cfg scale='(0.4, 1.0)' color_jitter='(0.32, 0.32, 0.32, 0.08)' color_jitter_prob=0.8 gray_scale_prob=0.2 \
--epochs 4 \
--workers 6 \
--model ViT-H-14-CL32-GAP \
--pretrained '/path/to/pretrain84_ckpt' \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--force-image-size 224 \
--force-patch-dropout 0.3 \
--grad-checkpointing \
--log-every-n-steps 64 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
# have not been tested. use it at your own discretion
# the original experiment was run on tpu v3-256.
# this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups.
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion2b_or_datacomp1b' \
--train-num-samples 4e8 \
--dataset-type webdataset \
--lr "2.048e-3" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 3200 \
--wd 0.2 \
--batch-size 8192 \
--aug-cfg scale='(0.4, 1.0)' color_jitter='(0.32, 0.32, 0.32, 0.08)' color_jitter_prob=0.8 gray_scale_prob=0.2 \
--epochs 32 \
--workers 6 \
--model ViT-H-14-CL8-Syntax-GAP \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--force-image-size 84 \
--grad-checkpointing \
--log-every-n-steps 32 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
# have not been tested. use it at your own discretion
# the original experiment was run on tpu v3-256.
# this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups.
torchrun --nproc_per_node 8 -m open_clip_train.main \
--save-frequency 1 \
--save-most-recent \
--zeroshot-frequency 1 \
--train-data '/path/to/laion2b_or_datacomp1b' \
--train-num-samples 131072000 \
--dataset-type webdataset \
--lr "6.4e-6" \
--beta1 0.9 \
--beta2 0.95 \
--warmup 1600 \
--wd 0.2 \
--batch-size 2048 \
--aug-cfg scale='(0.4, 1.0)' color_jitter='(0.32, 0.32, 0.32, 0.08)' color_jitter_prob=0.8 gray_scale_prob=0.2 \
--epochs 1 \
--workers 6 \
--model ViT-H-14-CL32-GAP \
--pretrained '/path/to/finetune224_ckpt' \
--precision 'amp_bf16' \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--force-image-size 336 \
--force-patch-dropout 0.4 \
--grad-checkpointing \
--log-every-n-steps 64 \
--seed 0 \
--logs ./logs/ \
--imagenet-val '/path/to/imagenet/val'
\ No newline at end of file
#!/bin/bash
#SBATCH --partition=g40423
#SBATCH --job-name=testopenclip
#SBATCH --nodes 30
#SBATCH --ntasks-per-node=8
#SBATCH --cpus-per-task=12
#SBATCH --output=%x_%j.out
#SBATCH --comment=laion
#SBATCH --open-mode=append
#SBATCH --exclusive
module load openmpi
module load cuda/11.7
export MASTER_ADDR=`hostname`
export MASTER_PORT=12802
export NCCL_PROTO=simple
export FI_EFA_FORK_SAFE=1
export FI_LOG_LEVEL=1
export FI_EFA_USE_DEVICE_RDMA=1
export NCCL_DEBUG=info
export PYTHONFAULTHANDLER=1
export CUDA_LAUNCH_BLOCKING=0
export OMPI_MCA_mtl_base_verbose=1
export FI_EFA_ENABLE_SHM_TRANSFER=0
export FI_PROVIDER=efa
export FI_EFA_TX_MIN_CREDITS=64
export NCCL_TREE_THRESHOLD=0
cd /admin/home-mitchellw/open_clip/src
export PYTHONPATH="$PYTHONPATH:/admin/home-mitchellw/open_clip/src"
EXP_NAME="test-B-32-laion5b-lr1e-3-bs90k"
srun --comment laion --cpu_bind=v --accel-bind=gn python -m open_clip_train.main \
--save-frequency 1 \
--train-data="pipe:aws s3 cp s3://s-datasets/laion5b/{laion2B-data/{000000..231349}.tar,laion2B-multi-data/{000000..226687}.tar,laion1B-nolang-data/{000000..127231}.tar} -" \
--train-num-samples 135646078 \
--dataset-type webdataset \
--dataset-resampled \
--warmup 2000 \
--batch-size=375 \
--epochs=97 \
--lr 1e-3 \
--workers=8 \
--report-to wandb \
--name ${EXP_NAME} \
--logs /scratch/logs/ \
--model ViT-B-32 \
--seed 0 \
--ddp-static-graph \
--local-loss \
--gather-with-grad \
--grad-checkpointing \
--precision amp_bfloat16 \
--wandb-project-name open_clip6 \
--resume "latest" \
--remote-sync s3://s-laion/mitchellw/logs
from clip_benchmark.datasets.builder import build_dataset
import pandas as pd
import os
root_path = "datasets" # set this to smth meaningful
ds = build_dataset("mscoco_captions", root=root_path, split="val", task="captioning") # this downloads the dataset if it is not there already
coco = ds.coco
imgs = coco.loadImgs(coco.getImgIds())
future_df = {"filepath":[], "title":[]}
for img in imgs:
caps = coco.imgToAnns[img["id"]]
for cap in caps:
future_df["filepath"].append(img["file_name"])
future_df["title"].append(cap["caption"])
pd.DataFrame.from_dict(future_df).to_csv(
os.path.join(root_path, "train2014.csv"), index=False, sep="\t"
)
\ No newline at end of file
import pandas as pd
from pathlib import Path
from typing import Optional
def convert_to_csv(data_root: str,
image_subfolder: str = "",
text_subfolder: str = "",
save_path: str = None,
counts: Optional[int] = 10000):
if not save_path.endswith(".csv"):
save_path += ".csv"
data_root = Path(data_root)
images_root = data_root / image_subfolder
texts_root = data_root / text_subfolder
image_files = [
*images_root.glob("*.png"), *images_root.glob("*.jpg"),
*images_root.glob("*.jpeg"), *images_root.glob("*.bmp")
]
text_files = [*texts_root.glob("*.txt")]
image_files = {image_file.stem: image_file for image_file in image_files}
text_files = {text_file.stem: text_file for text_file in text_files}
keys = (image_files.keys() & text_files.keys())
keys = list(keys)[:counts]
text_files = {k: v for k, v in text_files.items() if k in keys}
image_files = {k: v for k, v in image_files.items() if k in keys}
results = []
for key in keys:
with open(text_files[key], "r") as f:
title = f.read().strip()
results.append({"filepath": image_files[key], "title": title})
df = pd.DataFrame(results)
df.to_csv(save_path, index=False)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--data_root", type=str, help="存有images和texts文件夹的目录")
parser.add_argument("--image_subfolder", type=str, default="")
parser.add_argument("--text_subfolder", type=str, default="")
parser.add_argument("--save_path", type=str, help="csv保存路径")
args = parser.parse_args()
convert_to_csv(args.data_root, args.image_subfolder, args.text_subfolder, args.save_path)
This diff is collapsed.
# 模型唯一标识
modelCode=999
# 模型名称
modelName=open_clip_pytorch
# 模型描述
modelDescription=多模态图像分类
# 应用场景
appScenario=训练,推理,图像分类,电商,绘画,交通
# 框架类型
frameType=Pytorch
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project]
name = "open_clip_torch"
# NOTE for full list of authors see https://github.com/mlfoundations/open_clip?tab=readme-ov-file#citing
# below covers most active / recent maintainers
authors = [
{name = "Ross Wightman", email = "ross@huggingface.co"},
{name = "Gabriel Ilharco"},
{name = "Mitchell Wortsman"},
{name = "Romain Beaumont"},
]
description = "Open reproduction of consastive language-image pretraining (CLIP) and related."
readme = "README.md"
requires-python = ">=3.8"
keywords = ["pytorch", "clip", "image-text", "language-image", "multimodal"]
license = {text = "MIT"}
classifiers = [
'Development Status :: 4 - Beta',
'Intended Audience :: Education',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
]
dependencies = [
'torch>=1.9.0',
'torchvision',
'regex',
'ftfy',
'tqdm',
'huggingface-hub',
'timm',
"transformers[sentencepiece]",
"transformers"
]
dynamic = ["version"]
[project.optional-dependencies]
training = [
'torch>=2.0',
'webdataset>=0.2.5',
'pandas',
'transformers[sentencepiece]',
"transformers",
'timm>=1.0.7',
'fsspec',
]
test = [
'pytest-split',
'pytest',
'open_clip_torch[training]',
"transformers"
]
[project.urls]
homepage = "https://github.com/mlfoundations/open_clip"
repository = "https://github.com/mlfoundations/open_clip"
[tool.pdm.version]
source = "file"
path = "src/open_clip/version.py"
[tool.pdm.build]
excludes = ["./**/.git", "./**/logs/*"]
package-dir = "src"
includes = ["src/open_clip", "src/open_clip_train"]
[tool.pytest.ini_options]
testpaths = ['tests']
markers = [
'regression_test'
]
\ No newline at end of file
[pytest]
markers =
regression_test
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment