Commit 9ff47a7e authored by mashun1's avatar mashun1
Browse files

latte

parents
Pipeline #792 canceled with stages
#!/usr/bin/env bash
#SBATCH --job-name=Latte-ffs # nom du job
#SBATCH --partition group-name
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --gres=gpu:8 # nombre de GPU �| réserver (un unique GPU ici)
#SBATCH --cpus-per-task=16
#SBATCH --time=500:00:00 # temps exécution maximum demande (HH:MM:SS)
#SBATCH --output=slurm_log/%j.out # nom du fichier de sortie
#SBATCH --error=slurm_log/%j.err # nom du fichier d'erreur (ici commun avec la sortie)
source ~/.bashrc
conda activate latte
srun python train.py --config ./configs/ffs/ffs_train.yaml
\ No newline at end of file
#!/usr/bin/env bash
#SBATCH --job-name=Latte-ffs # nom du job
#SBATCH --partition group-name
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --gres=gpu:8 # nombre de GPU �| réserver (un unique GPU ici)
#SBATCH --cpus-per-task=16
#SBATCH --time=500:00:00 # temps exécution maximum demande (HH:MM:SS)
#SBATCH --output=slurm_log/%j.out # nom du fichier de sortie
#SBATCH --error=slurm_log/%j.err # nom du fichier d'erreur (ici commun avec la sortie)
source ~/.bashrc
conda activate latte
srun python train.py --config ./configs/sky/sky_train.yaml
\ No newline at end of file
#!/usr/bin/env bash
#SBATCH --job-name=Latte-ffs # nom du job
#SBATCH --partition group-name
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --gres=gpu:8 # nombre de GPU �| réserver (un unique GPU ici)
#SBATCH --cpus-per-task=16
#SBATCH --time=500:00:00 # temps exécution maximum demande (HH:MM:SS)
#SBATCH --output=slurm_log/%j.out # nom du fichier de sortie
#SBATCH --error=slurm_log/%j.err # nom du fichier d'erreur (ici commun avec la sortie)
source ~/.bashrc
conda activate latte
srun python train.py --config ./configs/taichi/taichi_train.yaml
\ No newline at end of file
#!/usr/bin/env bash
#SBATCH --job-name=Latte-ffs # nom du job
#SBATCH --partition group-name
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --gres=gpu:8 # nombre de GPU �| réserver (un unique GPU ici)
#SBATCH --cpus-per-task=16
#SBATCH --time=500:00:00 # temps exécution maximum demande (HH:MM:SS)
#SBATCH --output=slurm_log/%j.out # nom du fichier de sortie
#SBATCH --error=slurm_log/%j.err # nom du fichier d'erreur (ici commun avec la sortie)
source ~/.bashrc
conda activate latte
srun python train.py --config ./configs/ucf101/ucf101_train.yaml
\ No newline at end of file
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
"""
A minimal training script for Latte using PyTorch DDP.
"""
import torch
# Maybe use fp16 percision training need to set to False
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
import io
import os
import math
import argparse
import torch.distributed as dist
from glob import glob
from time import time
from copy import deepcopy
from einops import rearrange
from models import get_models
from datasets import get_dataset
from models.clip import TextEmbedder
from diffusion import create_diffusion
from omegaconf import OmegaConf
from torch.utils.data import DataLoader
from diffusers.models import AutoencoderKL
from diffusers.optimization import get_scheduler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data.distributed import DistributedSampler
from utils import (clip_grad_norm_, create_logger, update_ema,
requires_grad, cleanup, create_tensorboard,
write_tensorboard, setup_distributed,
get_experiment_dir, text_preprocessing)
import numpy as np
from transformers import T5EncoderModel, T5Tokenizer
#################################################################################
# Training Loop #
#################################################################################
def main(args):
assert torch.cuda.is_available(), "Training currently requires at least one GPU."
# Setup DDP:
setup_distributed()
# dist.init_process_group("nccl")
# assert args.global_batch_size % dist.get_world_size() == 0, f"Batch size must be divisible by world size."
# rank = dist.get_rank()
# device = rank % torch.cuda.device_count()
# local_rank = rank
rank = int(os.environ["RANK"])
local_rank = int(os.environ["LOCAL_RANK"])
device = torch.device("cuda", local_rank)
seed = args.global_seed + rank
torch.manual_seed(seed)
torch.cuda.set_device(device)
print(f"Starting rank={rank}, local rank={local_rank}, seed={seed}, world_size={dist.get_world_size()}.")
# Setup an experiment folder:
if rank == 0:
os.makedirs(args.results_dir, exist_ok=True) # Make results folder (holds all experiment subfolders)
experiment_index = len(glob(f"{args.results_dir}/*"))
model_string_name = args.model.replace("/", "-") # e.g., Latte-XL/2 --> Latte-XL-2 (for naming folders)
num_frame_string = 'F' + str(args.num_frames) + 'S' + str(args.frame_interval)
experiment_dir = f"{args.results_dir}/{experiment_index:03d}-{model_string_name}-{num_frame_string}-{args.dataset}" # Create an experiment folder
experiment_dir = get_experiment_dir(experiment_dir, args)
checkpoint_dir = f"{experiment_dir}/checkpoints" # Stores saved model checkpoints
os.makedirs(checkpoint_dir, exist_ok=True)
logger = create_logger(experiment_dir)
tb_writer = create_tensorboard(experiment_dir)
OmegaConf.save(args, os.path.join(experiment_dir, 'config.yaml'))
logger.info(f"Experiment directory created at {experiment_dir}")
else:
logger = create_logger(None)
tb_writer = None
# Create model:
assert args.image_size % 8 == 0, "Image size must be divisible by 8 (for the VAE encoder)."
sample_size = args.image_size // 8
args.latent_size = sample_size
model = get_models(args)
# Note that parameter initialization is done within the Latte constructor
ema = deepcopy(model).to(device) # Create an EMA of the model for use after training
requires_grad(ema, False)
diffusion = create_diffusion(timestep_respacing="") # default: 1000 steps, linear noise schedule
# vae = AutoencoderKL.from_pretrained(f"stabilityai/sd-vae-ft-ema").to(device)
vae = AutoencoderKL.from_pretrained(args.pretrained_model_path, subfolder="vae").to(device)
# # use pretrained model?
if args.pretrained:
checkpoint = torch.load(args.pretrained, map_location=lambda storage, loc: storage)
if "ema" in checkpoint: # supports checkpoints from train.py
logger.info('Using ema ckpt!')
checkpoint = checkpoint["ema"]
model_dict = model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {}
for k, v in checkpoint.items():
if k in model_dict:
pretrained_dict[k] = v
else:
logger.info('Ignoring: {}'.format(k))
logger.info('Successfully Load {}% original pretrained model weights '.format(len(pretrained_dict) / len(checkpoint.items()) * 100))
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
logger.info('Successfully load model at {}!'.format(args.pretrained))
if args.use_compile:
model = torch.compile(model)
# set distributed training
model = DDP(model.to(device), device_ids=[local_rank])
if args.extras == 78:
# Load the tokenizers
tokenizer = T5Tokenizer.from_pretrained(args.pretrained_model_path, subfolder="tokenizer")
# Load T5
text_encoder = T5EncoderModel.from_pretrained(args.pretrained_model_path, subfolder="text_encoder")
logger.info(f"Model Parameters: {sum(p.numel() for p in model.parameters()):,}")
opt = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0)
# Freeze vae and text_encoder
vae.requires_grad_(False)
if args.extras == 78:
text_encoder.requires_grad_(False)
# Setup data:
dataset = get_dataset(args)
sampler = DistributedSampler(
dataset,
num_replicas=dist.get_world_size(),
rank=rank,
shuffle=True,
seed=args.global_seed
)
loader = DataLoader(
dataset,
batch_size=int(args.local_batch_size),
shuffle=False,
sampler=sampler,
num_workers=args.num_workers,
pin_memory=True,
drop_last=True
)
logger.info(f"Dataset contains {len(dataset):,} videos ({args.data_path})")
# Scheduler
lr_scheduler = get_scheduler(
name="constant",
optimizer=opt,
num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps,
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
)
# Prepare models for training:
update_ema(ema, model.module, decay=0) # Ensure EMA is initialized with synced weights
model.train() # important! This enables embedding dropout for classifier-free guidance
ema.eval() # EMA model should always be in eval mode
# Variables for monitoring/logging purposes:
train_steps = 0
log_steps = 0
running_loss = 0
first_epoch = 0
start_time = time()
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(loader))
# Afterwards we recalculate our number of training epochs
num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
# Potentially load in the weights and states from a previous save
if args.resume_from_checkpoint:
# TODO, need to checkout
# Get the most recent checkpoint
dirs = os.listdir(os.path.join(experiment_dir, 'checkpoints'))
dirs = [d for d in dirs if d.endswith("pt")]
dirs = sorted(dirs, key=lambda x: int(x.split(".")[0]))
path = dirs[-1]
logger.info(f"Resuming from checkpoint {path}")
model.load_state(os.path.join(dirs, path))
train_steps = int(path.split(".")[0])
first_epoch = train_steps // num_update_steps_per_epoch
resume_step = train_steps % num_update_steps_per_epoch
if args.pretrained:
train_steps = int(args.pretrained.split("/")[-1].split('.')[0])
for epoch in range(first_epoch, num_train_epochs):
sampler.set_epoch(epoch)
for step, video_data in enumerate(loader):
# Skip steps until we reach the resumed step
if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step:
continue
x = video_data['video'].to(device, non_blocking=True)
video_name = video_data['video_name']
# x = x.to(device)
# y = y.to(device) # y is text prompt; no need put in gpu
with torch.no_grad():
# Map input images to latent space + normalize latents:
b, _, _, _, _ = x.shape
x = rearrange(x, 'b f c h w -> (b f) c h w').contiguous()
x = vae.encode(x).latent_dist.sample().mul_(0.18215)
x = rearrange(x, '(b f) c h w -> b f c h w', b=b).contiguous()
if args.extras == 78: # text-to-video
raise 'T2V training are Not supported at this moment!'
elif args.extras == 2:
model_kwargs = dict(y=video_name)
else:
model_kwargs = dict(y=None)
t = torch.randint(0, diffusion.num_timesteps, (x.shape[0],), device=device)
loss_dict = diffusion.training_losses(model, x, t, model_kwargs)
loss = loss_dict["loss"].mean()
loss.backward()
if train_steps < args.start_clip_iter: # if train_steps >= start_clip_iter, will clip gradient
gradient_norm = clip_grad_norm_(model.module.parameters(), args.clip_max_norm, clip_grad=False)
else:
gradient_norm = clip_grad_norm_(model.module.parameters(), args.clip_max_norm, clip_grad=True)
opt.step()
lr_scheduler.step()
opt.zero_grad()
update_ema(ema, model.module)
# Log loss values:
running_loss += loss.item()
log_steps += 1
train_steps += 1
if train_steps % args.log_every == 0:
# Measure training speed:
torch.cuda.synchronize()
end_time = time()
steps_per_sec = log_steps / (end_time - start_time)
# Reduce loss history over all processes:
avg_loss = torch.tensor(running_loss / log_steps, device=device)
dist.all_reduce(avg_loss, op=dist.ReduceOp.SUM)
avg_loss = avg_loss.item() / dist.get_world_size()
# logger.info(f"(step={train_steps:07d}) Train Loss: {avg_loss:.4f}, Train Steps/Sec: {steps_per_sec:.2f}")
logger.info(f"(step={train_steps:07d}/epoch={epoch:04d}) Train Loss: {avg_loss:.4f}, Gradient Norm: {gradient_norm:.4f}, Train Steps/Sec: {steps_per_sec:.2f}")
write_tensorboard(tb_writer, 'Train Loss', avg_loss, train_steps)
write_tensorboard(tb_writer, 'Gradient Norm', gradient_norm, train_steps)
# Reset monitoring variables:
running_loss = 0
log_steps = 0
start_time = time()
# Save Latte checkpoint:
if train_steps % args.ckpt_every == 0 and train_steps > 0:
if rank == 0:
checkpoint = {
# "model": model.module.state_dict(),
"ema": ema.state_dict(),
# "opt": opt.state_dict(),
# "args": args
}
checkpoint_path = f"{checkpoint_dir}/{train_steps:07d}.pt"
torch.save(checkpoint, checkpoint_path)
logger.info(f"Saved checkpoint to {checkpoint_path}")
dist.barrier()
model.eval() # important! This disables randomized embedding dropout
# do any sampling/FID calculation/etc. with ema (or model) in eval mode ...
logger.info("Done!")
cleanup()
if __name__ == "__main__":
# Default args here will train Latte with the hyperparameters we used in our paper (except training iters).
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default="./configs/train.yaml")
args = parser.parse_args()
main(OmegaConf.load(args.config))
export CUDA_VISIBLE_DEVICES=5
# torchrun --nnodes=1 --nproc_per_node=2 --master_port=29509 train.py --config ./configs/ffs/ffs_train.yaml
python train.py --config ./configs/ffs/ffs_train.yaml
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=4,5
torchrun --nnodes=1 --nproc_per_node=2 --master_port=29509 train.py --config ./configs/sky/sky_train.yaml
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=4,5
torchrun --nnodes=1 --nproc_per_node=2 --master_port=29509 train.py --config ./configs/taichi/taichi_train.yaml
\ No newline at end of file
# export CUDA_VISIBLE_DEVICES=4,5
torchrun --nnodes=1 --nproc_per_node=2 --master_port=29509 train.py --config ./configs/ucf101/ucf101_train.yaml
\ No newline at end of file
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
"""
A minimal training script for Latte using PyTorch DDP.
"""
import torch
# Maybe use fp16 percision training need to set to False
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
import io
import os
import math
import argparse
import torch.distributed as dist
from glob import glob
from time import time
from copy import deepcopy
from einops import rearrange
from models import get_models
from datasets import get_dataset
from models.clip import TextEmbedder
from diffusion import create_diffusion
from omegaconf import OmegaConf
from torch.utils.data import DataLoader
from diffusers.models import AutoencoderKL
from diffusers.optimization import get_scheduler
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data.distributed import DistributedSampler
from utils import (clip_grad_norm_, create_logger, update_ema,
requires_grad, cleanup, create_tensorboard,
write_tensorboard, setup_distributed, fetch_files_by_numbers,
get_experiment_dir, separation_content_motion,)
#################################################################################
# Training Loop #
#################################################################################
def main(args):
assert torch.cuda.is_available(), "Training currently requires at least one GPU."
# Setup DDP:
setup_distributed()
# dist.init_process_group("nccl")
# assert args.global_batch_size % dist.get_world_size() == 0, f"Batch size must be divisible by world size."
# rank = dist.get_rank()
# device = rank % torch.cuda.device_count()
# local_rank = rank
rank = int(os.environ["RANK"])
local_rank = int(os.environ["LOCAL_RANK"])
device = torch.device("cuda", local_rank)
seed = args.global_seed + rank
torch.manual_seed(seed)
torch.cuda.set_device(device)
print(f"Starting rank={rank}, local rank={local_rank}, seed={seed}, world_size={dist.get_world_size()}.")
# Setup an experiment folder:
if rank == 0:
os.makedirs(args.results_dir, exist_ok=True) # Make results folder (holds all experiment subfolders)
experiment_index = len(glob(f"{args.results_dir}/*"))
model_string_name = args.model.replace("/", "-") # e.g., Latte-XL/2 --> Latte-XL-2 (for naming folders)
num_frame_string = 'F' + str(args.num_frames) + 'S' + str(args.frame_interval)
experiment_dir = f"{args.results_dir}/{experiment_index:03d}-{model_string_name}-{num_frame_string}-{args.dataset}" # Create an experiment folder
experiment_dir = get_experiment_dir(experiment_dir, args)
checkpoint_dir = f"{experiment_dir}/checkpoints" # Stores saved model checkpoints
os.makedirs(checkpoint_dir, exist_ok=True)
logger = create_logger(experiment_dir)
tb_writer = create_tensorboard(experiment_dir)
OmegaConf.save(args, os.path.join(experiment_dir, 'config.yaml'))
logger.info(f"Experiment directory created at {experiment_dir}")
else:
logger = create_logger(None)
tb_writer = None
# Create model:
assert args.image_size % 8 == 0, "Image size must be divisible by 8 (for the VAE encoder)."
sample_size = args.image_size // 8
args.latent_size = sample_size
model = get_models(args)
# Note that parameter initialization is done within the Latte constructor
ema = deepcopy(model).to(device) # Create an EMA of the model for use after training
requires_grad(ema, False)
diffusion = create_diffusion(timestep_respacing="") # default: 1000 steps, linear noise schedule
# vae = AutoencoderKL.from_pretrained(f"stabilityai/sd-vae-ft-ema").to(device)
if args.extras == 78:
vae = AutoencoderKL.from_pretrained(args.pretrained_model_path, subfolder="vae").to(device)
else:
vae = AutoencoderKL.from_pretrained(args.pretrained_model_path, subfolder="sd-vae-ft-mse").to(device)
# # use pretrained model?
if args.pretrained:
checkpoint = torch.load(args.pretrained, map_location=lambda storage, loc: storage)
if "ema" in checkpoint: # supports checkpoints from train.py
logger.info('Using ema ckpt!')
checkpoint = checkpoint["ema"]
model_dict = model.state_dict()
# 1. filter out unnecessary keys
pretrained_dict = {}
for k, v in checkpoint.items():
# if 'y_embedder' in k and args.dataset != 'ImageNet' and 'pretrained' in args.pretrained:
# logger.info('Warning: ignoring the weights from y_embedder!')
# continue
# if 'y_embedder' in k:
# # if 'y_embedder' in k and 'pretrained' in args.pretrained:
# if 'y_embedder' in k:
# logger.info('Warning: ignoring the {} weights!'.format(k))
# continue
if k in model_dict:
pretrained_dict[k] = v
# logger.info('Successfully Load weights from {}'.format(k))
# elif 'x_embedder' in k: # replace model parameter name
# pretrained_dict['patch_embedder'] = v
# elif 't_embedder' in k: # replace model parameter name
# pretrained_dict['timestep_embedder'] = v
else:
logger.info('Ignoring: {}'.format(k))
logger.info('Successfully Load {}% original pretrained model weights '.format(len(pretrained_dict) / len(checkpoint.items()) * 100))
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
logger.info('Successfully load model at {}!'.format(args.pretrained))
if args.use_compile:
model = torch.compile(model)
if args.enable_xformers_memory_efficient_attention:
logger.info("Using Xformers!")
model.enable_xformers_memory_efficient_attention()
if args.gradient_checkpointing:
logger.info("Using gradient checkpointing!")
model.enable_gradient_checkpointing()
if args.fixed_spatial:
trainable_modules = (
"attn_temp",
)
model.requires_grad_(False)
for name, module in model.named_modules():
if name.endswith(tuple(trainable_modules)):
for params in module.parameters():
logger.info("WARNING: Only train {} parametes!".format(name))
params.requires_grad = True
logger.info("WARNING: Only train {} parametes!".format(trainable_modules))
# set distributed training
model = DDP(model.to(device), device_ids=[local_rank])
if args.extras == 78:
text_encoder = TextEmbedder(args.pretrained_model_path, dropout_prob=0.1).to(device)
logger.info(f"Model Parameters: {sum(p.numel() for p in model.parameters()):,}")
opt = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0)
# Freeze vae and text_encoder
vae.requires_grad_(False)
if args.extras == 78:
text_encoder.requires_grad_(False)
if args.dataset == 'webvideo2mlaion':
# Setup video dataset:
file_list = os.listdir(args.image_data_path) # all file format must be the same!
file_count = int(len(file_list) / dist.get_world_size())
args.laion_meta_files = fetch_files_by_numbers(rank * file_count, file_count, file_list)
file_list = os.listdir(args.webvideo_data_path) # all file format must be the same!
file_count = int(len(file_list) / dist.get_world_size())
args.webvideo_meta_files = fetch_files_by_numbers(rank * file_count, file_count, file_list)
if args.test_run:
args.laion_meta_files = ['file_000.csv']
args.webvideo_meta_files = ['file_000.csv']
# Setup data:
dataset = get_dataset(args)
if args.dataset == 'webvideo2mlaion':
sampler = DistributedSampler(
dataset,
num_replicas=1, # important
rank=0, # important
shuffle=True,
seed=args.global_seed
)
else:
sampler = DistributedSampler(
dataset,
num_replicas=dist.get_world_size(),
rank=rank,
shuffle=True,
seed=args.global_seed
)
sampler = DistributedSampler(
dataset,
num_replicas=dist.get_world_size(),
rank=rank,
shuffle=True,
seed=args.global_seed
)
loader = DataLoader(
dataset,
batch_size=int(args.local_batch_size),
shuffle=False,
sampler=sampler,
num_workers=args.num_workers,
pin_memory=True,
drop_last=True
)
logger.info(f"Dataset contains {len(dataset):,} videos ({args.webvideo_data_path})")
# Scheduler
lr_scheduler = get_scheduler(
name="constant",
optimizer=opt,
num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps,
num_training_steps=args.max_train_steps * args.gradient_accumulation_steps,
)
# Prepare models for training:
update_ema(ema, model.module, decay=0) # Ensure EMA is initialized with synced weights
model.train() # important! This enables embedding dropout for classifier-free guidance
ema.eval() # EMA model should always be in eval mode
# Variables for monitoring/logging purposes:
train_steps = 0
log_steps = 0
running_loss = 0
first_epoch = 0
start_time = time()
# We need to recalculate our total training steps as the size of the training dataloader may have changed.
num_update_steps_per_epoch = math.ceil(len(loader))
# Afterwards we recalculate our number of training epochs
num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch)
# Potentially load in the weights and states from a previous save
if args.resume_from_checkpoint:
# TODO, need to checkout
# Get the most recent checkpoint
dirs = os.listdir(os.path.join(experiment_dir, 'checkpoints'))
dirs = [d for d in dirs if d.endswith("pt")]
dirs = sorted(dirs, key=lambda x: int(x.split(".")[0]))
path = dirs[-1]
logger.info(f"Resuming from checkpoint {path}")
model.load_state(os.path.join(dirs, path))
train_steps = int(path.split(".")[0])
first_epoch = train_steps // num_update_steps_per_epoch
resume_step = train_steps % num_update_steps_per_epoch
for epoch in range(first_epoch, num_train_epochs):
sampler.set_epoch(epoch)
for step, video_data in enumerate(loader):
# Skip steps until we reach the resumed step
if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step:
continue
x = video_data['video'].to(device, non_blocking=True)
video_name = video_data['video_name']
if args.dataset == "ucf101_img":
image_name = video_data['image_name']
image_names = []
for caption in image_name:
single_caption = [int(item) for item in caption.split('=====')]
image_names.append(torch.as_tensor(single_caption))
# x = x.to(device)
# y = y.to(device) # y is text prompt; no need put in gpu
with torch.no_grad():
# Map input images to latent space + normalize latents:
b, _, _, _, _ = x.shape
x = rearrange(x, 'b f c h w -> (b f) c h w').contiguous()
x = vae.encode(x).latent_dist.sample().mul_(0.18215)
x = rearrange(x, '(b f) c h w -> b f c h w', b=b).contiguous()
if args.extras == 78: # text-to-video
raise 'T2V training are Not supported at this moment!'
elif args.extras == 2:
if args.dataset == "ucf101_img":
model_kwargs = dict(y=video_name, y_image=image_names, use_image_num=args.use_image_num) # tav unet
else:
model_kwargs = dict(y=video_name) # tav unet
else:
model_kwargs = dict(y=None, use_image_num=args.use_image_num)
t = torch.randint(0, diffusion.num_timesteps, (x.shape[0],), device=device)
loss_dict = diffusion.training_losses(model, x, t, model_kwargs)
loss = loss_dict["loss"].mean()
loss.backward()
if train_steps < args.start_clip_iter: # if train_steps >= start_clip_iter, will clip gradient
gradient_norm = clip_grad_norm_(model.module.parameters(), args.clip_max_norm, clip_grad=False)
else:
gradient_norm = clip_grad_norm_(model.module.parameters(), args.clip_max_norm, clip_grad=True)
opt.step()
lr_scheduler.step()
opt.zero_grad()
update_ema(ema, model.module)
# Log loss values:
running_loss += loss.item()
log_steps += 1
train_steps += 1
if train_steps % args.log_every == 0:
# Measure training speed:
torch.cuda.synchronize()
end_time = time()
steps_per_sec = log_steps / (end_time - start_time)
# Reduce loss history over all processes:
avg_loss = torch.tensor(running_loss / log_steps, device=device)
dist.all_reduce(avg_loss, op=dist.ReduceOp.SUM)
avg_loss = avg_loss.item() / dist.get_world_size()
# logger.info(f"(step={train_steps:07d}) Train Loss: {avg_loss:.4f}, Train Steps/Sec: {steps_per_sec:.2f}")
logger.info(f"(step={train_steps:07d}/epoch={epoch:04d}) Train Loss: {avg_loss:.4f}, Gradient Norm: {gradient_norm:.4f}, Train Steps/Sec: {steps_per_sec:.2f}")
write_tensorboard(tb_writer, 'Train Loss', avg_loss, train_steps)
write_tensorboard(tb_writer, 'Gradient Norm', gradient_norm, train_steps)
# Reset monitoring variables:
running_loss = 0
log_steps = 0
start_time = time()
# Save Latte checkpoint:
if train_steps % args.ckpt_every == 0 and train_steps > 0:
if rank == 0:
checkpoint = {
# "model": model.module.state_dict(),
"ema": ema.state_dict(),
# "opt": opt.state_dict(),
# "args": args
}
checkpoint_path = f"{checkpoint_dir}/{train_steps:07d}.pt"
torch.save(checkpoint, checkpoint_path)
logger.info(f"Saved checkpoint to {checkpoint_path}")
dist.barrier()
model.eval() # important! This disables randomized embedding dropout
# do any sampling/FID calculation/etc. with ema (or model) in eval mode ...
logger.info("Done!")
cleanup()
if __name__ == "__main__":
# Default args here will train Latte-XL/2 with the hyperparameters we used in our paper (except training iters).
parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default="./configs/tuneavideo.yaml")
args = parser.parse_args()
main(OmegaConf.load(args.config))
import os
import math
import torch
import logging
import random
import subprocess
import numpy as np
import torch.distributed as dist
# from torch._six import inf
from torch import inf
from PIL import Image
from typing import Union, Iterable
from collections import OrderedDict
from torch.utils.tensorboard import SummaryWriter
from diffusers.utils import is_bs4_available, is_ftfy_available
import html
import re
import urllib.parse as ul
if is_bs4_available():
from bs4 import BeautifulSoup
if is_ftfy_available():
import ftfy
_tensor_or_tensors = Union[torch.Tensor, Iterable[torch.Tensor]]
#################################################################################
# Training Clip Gradients #
#################################################################################
def get_grad_norm(
parameters: _tensor_or_tensors, norm_type: float = 2.0) -> torch.Tensor:
r"""
Copy from torch.nn.utils.clip_grad_norm_
Clips gradient norm of an iterable of parameters.
The norm is computed over all gradients together, as if they were
concatenated into a single vector. Gradients are modified in-place.
Args:
parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
single Tensor that will have gradients normalized
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
infinity norm.
error_if_nonfinite (bool): if True, an error is thrown if the total
norm of the gradients from :attr:`parameters` is ``nan``,
``inf``, or ``-inf``. Default: False (will switch to True in the future)
Returns:
Total norm of the parameter gradients (viewed as a single vector).
"""
if isinstance(parameters, torch.Tensor):
parameters = [parameters]
grads = [p.grad for p in parameters if p.grad is not None]
norm_type = float(norm_type)
if len(grads) == 0:
return torch.tensor(0.)
device = grads[0].device
if norm_type == inf:
norms = [g.detach().abs().max().to(device) for g in grads]
total_norm = norms[0] if len(norms) == 1 else torch.max(torch.stack(norms))
else:
total_norm = torch.norm(torch.stack([torch.norm(g.detach(), norm_type).to(device) for g in grads]), norm_type)
return total_norm
def clip_grad_norm_(
parameters: _tensor_or_tensors, max_norm: float, norm_type: float = 2.0,
error_if_nonfinite: bool = False, clip_grad = True) -> torch.Tensor:
r"""
Copy from torch.nn.utils.clip_grad_norm_
Clips gradient norm of an iterable of parameters.
The norm is computed over all gradients together, as if they were
concatenated into a single vector. Gradients are modified in-place.
Args:
parameters (Iterable[Tensor] or Tensor): an iterable of Tensors or a
single Tensor that will have gradients normalized
max_norm (float or int): max norm of the gradients
norm_type (float or int): type of the used p-norm. Can be ``'inf'`` for
infinity norm.
error_if_nonfinite (bool): if True, an error is thrown if the total
norm of the gradients from :attr:`parameters` is ``nan``,
``inf``, or ``-inf``. Default: False (will switch to True in the future)
Returns:
Total norm of the parameter gradients (viewed as a single vector).
"""
if isinstance(parameters, torch.Tensor):
parameters = [parameters]
grads = [p.grad for p in parameters if p.grad is not None]
max_norm = float(max_norm)
norm_type = float(norm_type)
if len(grads) == 0:
return torch.tensor(0.)
device = grads[0].device
if norm_type == inf:
norms = [g.detach().abs().max().to(device) for g in grads]
total_norm = norms[0] if len(norms) == 1 else torch.max(torch.stack(norms))
else:
total_norm = torch.norm(torch.stack([torch.norm(g.detach(), norm_type).to(device) for g in grads]), norm_type)
# print(total_norm)
if clip_grad:
if error_if_nonfinite and torch.logical_or(total_norm.isnan(), total_norm.isinf()):
raise RuntimeError(
f'The total norm of order {norm_type} for gradients from '
'`parameters` is non-finite, so it cannot be clipped. To disable '
'this error and scale the gradients by the non-finite norm anyway, '
'set `error_if_nonfinite=False`')
clip_coef = max_norm / (total_norm + 1e-6)
# Note: multiplying by the clamped coef is redundant when the coef is clamped to 1, but doing so
# avoids a `if clip_coef < 1:` conditional which can require a CPU <=> device synchronization
# when the gradients do not reside in CPU memory.
clip_coef_clamped = torch.clamp(clip_coef, max=1.0)
for g in grads:
g.detach().mul_(clip_coef_clamped.to(g.device))
# gradient_cliped = torch.norm(torch.stack([torch.norm(g.detach(), norm_type).to(device) for g in grads]), norm_type)
# print(gradient_cliped)
return total_norm
def get_experiment_dir(root_dir, args):
# if args.pretrained is not None and 'Latte-XL-2-256x256.pt' not in args.pretrained:
# root_dir += '-WOPRE'
if args.use_compile:
root_dir += '-Compile' # speedup by torch compile
if args.fixed_spatial:
root_dir += '-FixedSpa'
if args.enable_xformers_memory_efficient_attention:
root_dir += '-Xfor'
if args.gradient_checkpointing:
root_dir += '-Gc'
if args.mixed_precision:
root_dir += '-Amp'
if args.image_size == 512:
root_dir += '-512'
return root_dir
#################################################################################
# Training Logger #
#################################################################################
def create_logger(logging_dir):
"""
Create a logger that writes to a log file and stdout.
"""
if dist.get_rank() == 0: # real logger
logging.basicConfig(
level=logging.INFO,
# format='[\033[34m%(asctime)s\033[0m] %(message)s',
format='[%(asctime)s] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S',
handlers=[logging.StreamHandler(), logging.FileHandler(f"{logging_dir}/log.txt")]
)
logger = logging.getLogger(__name__)
else: # dummy logger (does nothing)
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
return logger
def create_tensorboard(tensorboard_dir):
"""
Create a tensorboard that saves losses.
"""
if dist.get_rank() == 0: # real tensorboard
# tensorboard
writer = SummaryWriter(tensorboard_dir)
return writer
def write_tensorboard(writer, *args):
'''
write the loss information to a tensorboard file.
Only for pytorch DDP mode.
'''
if dist.get_rank() == 0: # real tensorboard
writer.add_scalar(args[0], args[1], args[2])
#################################################################################
# EMA Update/ DDP Training Utils #
#################################################################################
@torch.no_grad()
def update_ema(ema_model, model, decay=0.9999):
"""
Step the EMA model towards the current model.
"""
ema_params = OrderedDict(ema_model.named_parameters())
model_params = OrderedDict(model.named_parameters())
for name, param in model_params.items():
# TODO: Consider applying only to params that require_grad to avoid small numerical changes of pos_embed
ema_params[name].mul_(decay).add_(param.data, alpha=1 - decay)
def requires_grad(model, flag=True):
"""
Set requires_grad flag for all parameters in a model.
"""
for p in model.parameters():
p.requires_grad = flag
def cleanup():
"""
End DDP training.
"""
dist.destroy_process_group()
def setup_distributed(backend="nccl", port=None):
"""Initialize distributed training environment.
support both slurm and torch.distributed.launch
see torch.distributed.init_process_group() for more details
"""
num_gpus = torch.cuda.device_count()
if "SLURM_JOB_ID" in os.environ:
rank = int(os.environ["SLURM_PROCID"])
world_size = int(os.environ["SLURM_NTASKS"])
node_list = os.environ["SLURM_NODELIST"]
addr = subprocess.getoutput(f"scontrol show hostname {node_list} | head -n1")
# specify master port
if port is not None:
os.environ["MASTER_PORT"] = str(port)
elif "MASTER_PORT" not in os.environ:
# os.environ["MASTER_PORT"] = "29566"
os.environ["MASTER_PORT"] = str(29567 + num_gpus)
if "MASTER_ADDR" not in os.environ:
os.environ["MASTER_ADDR"] = addr
os.environ["WORLD_SIZE"] = str(world_size)
os.environ["LOCAL_RANK"] = str(rank % num_gpus)
os.environ["RANK"] = str(rank)
else:
rank = int(os.environ["RANK"])
world_size = int(os.environ["WORLD_SIZE"])
# torch.cuda.set_device(rank % num_gpus)
dist.init_process_group(
backend=backend,
world_size=world_size,
rank=rank,
)
#################################################################################
# Testing Utils #
#################################################################################
def save_video_grid(video, nrow=None):
b, t, h, w, c = video.shape
if nrow is None:
nrow = math.ceil(math.sqrt(b))
ncol = math.ceil(b / nrow)
padding = 1
video_grid = torch.zeros((t, (padding + h) * nrow + padding,
(padding + w) * ncol + padding, c), dtype=torch.uint8)
print(video_grid.shape)
for i in range(b):
r = i // ncol
c = i % ncol
start_r = (padding + h) * r
start_c = (padding + w) * c
video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i]
return video_grid
#################################################################################
# MMCV Utils #
#################################################################################
def collect_env():
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.utils import collect_env as collect_base_env
from mmcv.utils import get_git_hash
"""Collect the information of the running environments."""
env_info = collect_base_env()
env_info['MMClassification'] = get_git_hash()[:7]
for name, val in env_info.items():
print(f'{name}: {val}')
print(torch.cuda.get_arch_list())
print(torch.version.cuda)
#################################################################################
# Pixart-alpha Utils #
#################################################################################
bad_punct_regex = re.compile(
r"[" + "#®•©™&@·º½¾¿¡§~" + "\)" + "\(" + "\]" + "\[" + "\}" + "\{" + "\|" + "\\" + "\/" + "\*" + r"]{1,}"
)
def text_preprocessing(text, clean_caption=False):
if clean_caption and not is_bs4_available():
clean_caption = False
if clean_caption and not is_ftfy_available():
clean_caption = False
if not isinstance(text, (tuple, list)):
text = [text]
def process(text: str):
if clean_caption:
text = clean_caption(text)
text = clean_caption(text)
else:
text = text.lower().strip()
return text
return [process(t) for t in text]
# Copied from diffusers.pipelines.deepfloyd_if.pipeline_if.IFPipeline._clean_caption
def clean_caption(caption):
caption = str(caption)
caption = ul.unquote_plus(caption)
caption = caption.strip().lower()
caption = re.sub("<person>", "person", caption)
# urls:
caption = re.sub(
r"\b((?:https?:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))", # noqa
"",
caption,
) # regex for urls
caption = re.sub(
r"\b((?:www:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))", # noqa
"",
caption,
) # regex for urls
# html:
caption = BeautifulSoup(caption, features="html.parser").text
# @<nickname>
caption = re.sub(r"@[\w\d]+\b", "", caption)
# 31C0—31EF CJK Strokes
# 31F0—31FF Katakana Phonetic Extensions
# 3200—32FF Enclosed CJK Letters and Months
# 3300—33FF CJK Compatibility
# 3400—4DBF CJK Unified Ideographs Extension A
# 4DC0—4DFF Yijing Hexagram Symbols
# 4E00—9FFF CJK Unified Ideographs
caption = re.sub(r"[\u31c0-\u31ef]+", "", caption)
caption = re.sub(r"[\u31f0-\u31ff]+", "", caption)
caption = re.sub(r"[\u3200-\u32ff]+", "", caption)
caption = re.sub(r"[\u3300-\u33ff]+", "", caption)
caption = re.sub(r"[\u3400-\u4dbf]+", "", caption)
caption = re.sub(r"[\u4dc0-\u4dff]+", "", caption)
caption = re.sub(r"[\u4e00-\u9fff]+", "", caption)
#######################################################
# все виды тире / all types of dash --> "-"
caption = re.sub(
r"[\u002D\u058A\u05BE\u1400\u1806\u2010-\u2015\u2E17\u2E1A\u2E3A\u2E3B\u2E40\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]+", # noqa
"-",
caption,
)
# кавычки к одному стандарту
caption = re.sub(r"[`´«»“”¨]", '"', caption)
caption = re.sub(r"[‘’]", "'", caption)
# &quot;
caption = re.sub(r"&quot;?", "", caption)
# &amp
caption = re.sub(r"&amp", "", caption)
# ip adresses:
caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption)
# article ids:
caption = re.sub(r"\d:\d\d\s+$", "", caption)
# \n
caption = re.sub(r"\\n", " ", caption)
# "#123"
caption = re.sub(r"#\d{1,3}\b", "", caption)
# "#12345.."
caption = re.sub(r"#\d{5,}\b", "", caption)
# "123456.."
caption = re.sub(r"\b\d{6,}\b", "", caption)
# filenames:
caption = re.sub(r"[\S]+\.(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)", "", caption)
#
caption = re.sub(r"[\"\']{2,}", r'"', caption) # """AUSVERKAUFT"""
caption = re.sub(r"[\.]{2,}", r" ", caption) # """AUSVERKAUFT"""
caption = re.sub(bad_punct_regex, r" ", caption) # ***AUSVERKAUFT***, #AUSVERKAUFT
caption = re.sub(r"\s+\.\s+", r" ", caption) # " . "
# this-is-my-cute-cat / this_is_my_cute_cat
regex2 = re.compile(r"(?:\-|\_)")
if len(re.findall(regex2, caption)) > 3:
caption = re.sub(regex2, " ", caption)
caption = ftfy.fix_text(caption)
caption = html.unescape(html.unescape(caption))
caption = re.sub(r"\b[a-zA-Z]{1,3}\d{3,15}\b", "", caption) # jc6640
caption = re.sub(r"\b[a-zA-Z]+\d+[a-zA-Z]+\b", "", caption) # jc6640vc
caption = re.sub(r"\b\d+[a-zA-Z]+\d+\b", "", caption) # 6640vc231
caption = re.sub(r"(worldwide\s+)?(free\s+)?shipping", "", caption)
caption = re.sub(r"(free\s)?download(\sfree)?", "", caption)
caption = re.sub(r"\bclick\b\s(?:for|on)\s\w+", "", caption)
caption = re.sub(r"\b(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)(\simage[s]?)?", "", caption)
caption = re.sub(r"\bpage\s+\d+\b", "", caption)
caption = re.sub(r"\b\d*[a-zA-Z]+\d+[a-zA-Z]+\d+[a-zA-Z\d]*\b", r" ", caption) # j2d1a2a...
caption = re.sub(r"\b\d+\.?\d*[xх×]\d+\.?\d*\b", "", caption)
caption = re.sub(r"\b\s+\:\s+", r": ", caption)
caption = re.sub(r"(\D[,\./])\b", r"\1 ", caption)
caption = re.sub(r"\s+", " ", caption)
caption.strip()
caption = re.sub(r"^[\"\']([\w\W]+)[\"\']$", r"\1", caption)
caption = re.sub(r"^[\'\_,\-\:;]", r"", caption)
caption = re.sub(r"[\'\_,\-\:\-\+]$", r"", caption)
caption = re.sub(r"^\.\S+$", "", caption)
return caption.strip()
<svg width="956" height="480" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" overflow="hidden"><defs><clipPath id="clip0"><rect x="0" y="0" width="956" height="480"/></clipPath><image width="59" height="97" xlink:href="" preserveAspectRatio="none" id="img1"></image><image width="59" height="97" xlink:href="" preserveAspectRatio="none" id="img2"></image><image width="58" height="97" xlink:href="" preserveAspectRatio="none" id="img3"></image><image width="59" height="97" xlink:href="" preserveAspectRatio="none" id="img4"></image><image width="59" height="97" xlink:href="" preserveAspectRatio="none" id="img5"></image><image width="58" height="97" xlink:href="" preserveAspectRatio="none" id="img6"></image><image width="58" height="97" xlink:href="" preserveAspectRatio="none" id="img7"></image><image width="59" height="97" xlink:href="" preserveAspectRatio="none" id="img8"></image></defs><g clip-path="url(#clip0)"><rect x="0" y="0" width="956" height="480.084" fill="#FFFFFF"/><use width="100%" height="100%" xlink:href="#img1" fill="none" transform="translate(8 344)"></use><use width="100%" height="100%" xlink:href="#img2" fill="none" transform="translate(28 344)"></use><use width="100%" height="100%" xlink:href="#img3" fill="none" transform="translate(48 344)"></use><use width="100%" height="100%" xlink:href="#img4" fill="none" transform="translate(67 344)"></use><use width="100%" height="100%" xlink:href="#img5" fill="none" transform="translate(87 344)"></use><use width="100%" height="100%" xlink:href="#img6" fill="none" transform="translate(107 344)"></use><use width="100%" height="100%" xlink:href="#img7" fill="none" transform="translate(127 344)"></use><use width="100%" height="100%" xlink:href="#img8" fill="none" transform="translate(146 344)"></use><rect x="2.50004" y="342.56" width="207" height="98.0171" stroke="#172C51" stroke-width="1.3331" stroke-miterlimit="8" stroke-dasharray="10.6648 3.9993" fill="none"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 2.50067 468)">Video Frame Sequences</text><rect x="3" y="281.049" width="208" height="46.008" fill="#D9D9D9"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 32.7162 312)">Embed to Tokens</text><rect x="3.49997" y="207.536" width="208" height="56.0098" stroke="#172C51" stroke-width="1.3331" stroke-miterlimit="8" fill="#FBE5D6"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 21.6583 244)">Transformer Blocks</text><rect x="1.99997" y="144.025" width="208" height="46.0081" fill="#C6DBFE"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 53.3473 175)">Layer Norm</text><rect x="3" y="81.0142" width="208" height="46.008" fill="#C3E3BD"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 22.2814 112)">Linear and Reshape</text><path d="M4.00004 15.1695C4.00004 9.5535 8.5519 5.00084 14.1669 5.00084L95.8331 5.00084C101.448 5.00084 106 9.5535 106 15.1695L106 55.8429C106 61.4589 101.448 66.0116 95.8331 66.0116L14.1669 66.0116C8.5519 66.0116 4.00004 61.4589 4.00004 55.8429Z" fill="#D4D7E3" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 29.6895 44)">Noise</text><path d="M109 15.1695C109 9.5535 113.552 5.00084 119.167 5.00084L200.833 5.00084C206.448 5.00084 211 9.5535 211 15.1695L211 55.8429C211 61.4589 206.448 66.0116 200.833 66.0116L119.167 66.0116C113.552 66.0116 109 61.4589 109 55.8429Z" fill="#D4D7E3" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 122.905 44)">Variance</text><path d="M0.333275-2.30907e-06 0.333343 9.81802-0.333207 9.81803-0.333275 2.30907e-06ZM6.80234e-05 9.81803 3.99935 7.1518 0.000104969 15.1504-3.99925 7.15185Z" transform="matrix(1 0 0 -1.00017 106.5 342.71)"/><path d="M106.167 281.665 106.167 271.88 106.833 271.88 106.833 281.665ZM106.5 271.88 102.501 274.547 106.5 266.546 110.499 274.546Z"/><path d="M106.167 207.666 106.167 195.867 106.833 195.867 106.833 207.666ZM106.5 195.867 102.501 198.533 106.5 190.533 110.499 198.533Z"/><path d="M0.333275-2.04228e-06 0.333348 11.7972-0.333203 11.7972-0.333275 2.04228e-06ZM7.22921e-05 11.7972 3.99936 9.13098 0.000104969 17.1296-3.99925 9.13103Z" transform="matrix(1 0 0 -1.00017 106.5 144.655)"/><path d="M0.333275-2.31441e-06 0.333343 9.78307-0.333207 9.78307-0.333275 2.31441e-06ZM6.7938e-05 9.78307 3.99935 7.11684 0.000104969 15.1155-3.99925 7.1169Z" transform="matrix(1 0 0 -1.00017 52.5 81.6297)"/><path d="M0.333275-2.31441e-06 0.333343 9.78307-0.333207 9.78307-0.333275 2.31441e-06ZM6.7938e-05 9.78307 3.99935 7.11684 0.000104969 15.1155-3.99925 7.1169Z" transform="matrix(1 0 0 -1.00017 162.5 80.6295)"/><rect x="607.5" y="50.5088" width="162" height="348.061" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="#FBE5D6" fill-opacity="0.501961"/><rect x="627" y="351.061" width="113" height="31.0053" fill="#C6DBFE" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 637.868 374)">Layer Norm</text><rect x="627" y="302.053" width="113" height="30.0052" fill="#FFE2BB" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 661.864 324)">MHA</text><rect x="627" y="125.022" width="113" height="30.0051" fill="#C6DBFE"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 637.868 147)">Layer Norm</text><rect x="627" y="74.0129" width="113" height="31.0054" fill="#C2E8F7" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 664.531 96)">MLP</text><path d="M681.5 283.549 681.5 295.552M675.5 289.55 687.5 289.55" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M675.5 289.55C675.5 286.236 678.186 283.549 681.5 283.549 684.814 283.549 687.5 286.236 687.5 289.55 687.5 292.865 684.814 295.552 681.5 295.552 678.186 295.552 675.5 292.865 675.5 289.55Z" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M681.167 411.803 681.167 388.233 681.833 388.233 681.833 411.803ZM677.501 389.567 681.5 381.567 685.499 389.566Z" fill="#5F5F5F"/><path d="M681.167 351.456 681.167 339.225 681.833 339.225 681.833 351.456ZM677.501 340.558 681.5 332.558 685.499 340.558Z" fill="#5F5F5F"/><path d="M681.167 300.783 681.167 277.214 681.833 277.214 681.833 300.783ZM677.501 278.547 681.5 270.547 685.499 278.547Z" fill="#5F5F5F"/><path d="M681.167 125.416 681.167 113.185 681.833 113.185 681.833 125.416ZM677.501 114.519 681.5 106.519 685.499 114.519Z" fill="#5F5F5F"/><path d="M682 54.5095 682 65.5114M676.5 60.0104 687.5 60.0104" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M676.5 60.0104C676.5 56.9724 678.962 54.5095 682 54.5095 685.038 54.5095 687.5 56.9724 687.5 60.0104 687.5 63.0485 685.038 65.5114 682 65.5114 678.962 65.5114 676.5 63.0485 676.5 60.0104Z" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="627" y="238.041" width="113" height="30.0052" fill="#C6DBFE" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 637.704 260)">Layer Norm</text><rect x="627" y="188.033" width="113" height="30.0052" fill="#FFE2BB" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 661.7 210)">MHA</text><path d="M681.5 170.53 681.5 182.532M675.5 176.531 687.5 176.531" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M675.5 176.531C675.5 173.217 678.186 170.53 681.5 170.53 684.814 170.53 687.5 173.217 687.5 176.531 687.5 179.845 684.814 182.532 681.5 182.532 678.186 182.532 675.5 179.845 675.5 176.531Z" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M681.167 238.436 681.167 226.205 681.833 226.205 681.833 238.436ZM677.501 227.538 681.5 219.538 685.499 227.538Z" fill="#5F5F5F"/><rect x="633" y="415.072" width="105" height="36.0063" fill="#DAE3F3" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 642.467 440)">Embedding</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 642.663 410)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 651.208 414)">𝑠</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 653.351 289)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 661.896 293)">𝑡</text><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 641.612 23)">Variant 3</text><path d="M681.167 186.763 681.167 163.194 681.833 163.194 681.833 186.763ZM677.501 164.527 681.5 156.527 685.499 164.527Z" fill="#5F5F5F"/><path d="M681.167 395.634 681.167 394.714 754.271 394.714 753.938 395.047 753.938 289.55 754.271 289.884 694.448 289.884 694.448 289.217 754.604 289.217 754.604 395.38 681.5 395.38 681.833 395.047 681.833 395.634ZM695.781 293.55 687.782 289.55 695.781 285.55Z" fill="#5F5F5F"/><path d="M681.167 283.909 681.167 280.221 753.492 280.221 753.159 280.554 753.159 176.531 753.492 176.864 693.669 176.864 693.669 176.197 753.825 176.197 753.825 280.887 681.5 280.887 681.833 280.554 681.833 283.909ZM695.002 180.531 687.004 176.531 695.002 172.531Z" fill="#5F5F5F"/><path d="M681.167 170.7 681.167 167.206 753.653 167.206 753.32 167.539 753.32 59.5104 753.653 59.8437 694.33 59.8437 694.33 59.177 753.986 59.177 753.986 167.872 681.5 167.872 681.833 167.539 681.833 170.7ZM695.663 63.5104 687.664 59.5104 695.663 55.5104Z" fill="#5F5F5F"/><path d="M0.333275-7.7147e-07 0.333365 38.6809-0.333186 38.6809-0.333275 7.7147e-07ZM3.99939 37.3478 0.000104969 45.3464-3.99922 37.3478Z" fill="#5F5F5F" transform="matrix(1 0 0 -1.00017 681.5 74.8594)"/><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 652.725 175)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 661.27 179)">𝑠</text><rect x="790.5" y="50.5088" width="162" height="348.061" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="#FBE5D6" fill-opacity="0.501961"/><rect x="815" y="324.056" width="113" height="30.0052" fill="#C6DBFE" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 825.669 346)">Layer Norm</text><rect x="803" y="268.047" width="64.0001" height="30.0052" fill="#FFE2BB" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 813.468 290)">MHA</text><rect x="815" y="147.026" width="113" height="31.0055" fill="#C6DBFE" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 825.669 169)">Layer Norm</text><rect x="815" y="97.0169" width="113" height="30.0052" fill="#C2E8F7" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 852.331 118)">MLP</text><path d="M868.167 415.034 868.167 361.228 868.833 361.228 868.833 415.034ZM864.501 362.562 868.5 354.562 872.499 362.562Z" fill="#5F5F5F"/><path d="M839.167 323.229 839.167 307.219 839.833 307.219 839.833 323.229ZM835.501 308.552 839.5 300.552 843.499 308.552Z" fill="#5F5F5F"/><path d="M869.167 147.42 869.167 135.189 869.833 135.189 869.833 147.42ZM865.501 136.522 869.5 128.522 873.499 136.522Z" fill="#5F5F5F"/><path d="M869 69.5121 869 81.5142M863.5 75.5132 874.5 75.5132" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M863.5 75.5132C863.5 72.1989 865.962 69.5121 869 69.5121 872.038 69.5121 874.5 72.1989 874.5 75.5132 874.5 78.8275 872.038 81.5142 869 81.5142 865.962 81.5142 863.5 78.8275 863.5 75.5132Z" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="815" y="214.037" width="114" height="30.0052" fill="#E5D6C6" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 846.418 236)">Fusion</text><path d="M870 194.534 870 205.536M864.5 200.035 875.5 200.035" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><path d="M864.5 200.035C864.5 196.997 866.962 194.534 870 194.534 873.038 194.534 875.5 196.997 875.5 200.035 875.5 203.073 873.038 205.536 870 205.536 866.962 205.536 864.5 203.073 864.5 200.035Z" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none" fill-rule="evenodd"/><rect x="820" y="415.072" width="106" height="36.0063" fill="#DAE3F3" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 830.268 440)">Embedding</text><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 829.413 23)">Variant</text><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 896.535 23)">4</text><path d="M869.167 212.547 869.167 185.198 869.833 185.198 869.833 212.547ZM865.501 186.531 869.5 178.531 873.499 186.531Z" fill="#5F5F5F"/><path d="M0.333275-5.29008e-07 0.33337 59.4647-0.333181 59.4647-0.333275 5.29008e-07ZM3.99939 58.1316 0.000104969 66.1302-3.99921 58.1316Z" fill="#5F5F5F" transform="matrix(1 0 0 -1.00017 869.5 96.647)"/><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 816.915 314)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 825.46 318)">𝑠</text><rect x="874" y="268.047" width="64.0001" height="30.0052" fill="#FFE2BB" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 884.503 290)">MHA</text><path d="M907.167 322.229 907.167 306.219 907.833 306.219 907.833 322.229ZM903.501 307.552 907.5 299.552 911.499 307.552Z" fill="#5F5F5F"/><path d="M839.167 268.22 839.167 252.209 839.833 252.209 839.833 268.22ZM835.501 253.543 839.5 245.543 843.499 253.543Z" fill="#5F5F5F"/><path d="M907.167 268.22 907.167 252.209 907.833 252.209 907.833 268.22ZM903.501 253.543 907.5 245.543 911.499 253.543Z" fill="#5F5F5F"/><path d="M869.167 382.102 869.167 381.091 943.929 381.091 943.595 381.424 943.595 200.535 943.929 200.868 882 200.868 882 200.202 944.262 200.202 944.262 381.758 869.5 381.758 869.833 381.424 869.833 382.102ZM883.333 204.535 875.334 200.535 883.333 196.535Z" fill="#5F5F5F"/><path d="M869.833 188.339 869.833 189.427 869.5 189.094 943.985 189.094 943.652 189.427 943.652 75.5132 943.985 75.8465 881.329 75.8465 881.329 75.1798 944.318 75.1798 944.318 189.76 869.167 189.76 869.167 188.339ZM882.662 79.5132 874.664 75.5132 882.662 71.5132Z" fill="#5F5F5F"/><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 887.428 314)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 895.973 318)">𝑡</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 839.115 200)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 847.661 204)">𝑠</text><path d="M779.5 0.500053 779.5 480.5" stroke="#000000" stroke-width="0.66655" stroke-miterlimit="8" stroke-dasharray="2.6662 1.99965" fill="none" fill-rule="evenodd"/><path d="M225.5 0.500053 225.5 480.5" stroke="#000000" stroke-width="0.66655" stroke-miterlimit="8" stroke-dasharray="2.6662 1.99965" fill="none" fill-rule="evenodd"/><rect x="423.5" y="50.5088" width="163" height="348.061" stroke="#5F5F5F" stroke-width="1.3331" stroke-miterlimit="8" fill="none"/><rect x="451" y="415.072" width="106" height="36.0063" fill="#DAE3F3" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 460.755 440)">Embedding</text><rect x="443" y="350.061" width="122" height="33.0058" fill="#FBE5D6" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 474.445 375)">Spatial</text><rect x="443" y="294.051" width="122" height="32.0056" fill="#FBE5D6" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 474.445 318)">Spatial</text><rect x="443" y="124.022" width="122" height="32.0056" fill="#FBE5D6" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 463.357 148)">Temporal</text><rect x="443" y="180.031" width="122" height="33.0058" fill="#FBE5D6" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 463.357 205)">Temporal</text><rect x="443" y="237.041" width="122" height="32.0056" fill="#FBE5D6" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 474.445 261)">Spatial</text><rect x="443" y="67.0117" width="122" height="33.0058" fill="#FBE5D6" fill-opacity="0.501961"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 463.357 92)">Temporal</text><path d="M502.167 348.725 502.167 334.224 502.833 334.224 502.833 348.725ZM498.501 335.557 502.5 327.557 506.499 335.557Z" fill="#5F5F5F"/><path d="M0.333275-1.65298e-06 0.333347 14.4984-0.333203 14.4984-0.333275 1.65298e-06ZM3.99937 13.1652 0.000104969 21.1639-3.99924 13.1653Z" fill="#5F5F5F" transform="matrix(1 0 0 -1.00017 501.5 292.715)"/><path d="M0.333275-1.65315e-06 0.333347 14.4962-0.333203 14.4962-0.333275 1.65315e-06ZM3.99937 13.163 0.000104969 21.1617-3.99924 13.1631Z" fill="#5F5F5F" transform="matrix(1 0 0 -1.00017 502.5 236.703)"/><path d="M0.333275-1.65298e-06 0.333347 14.4984-0.333203 14.4984-0.333275 1.65298e-06ZM3.99937 13.1652 0.000104969 21.1639-3.99924 13.1653Z" fill="#5F5F5F" transform="matrix(1 0 0 -1.00017 502.5 178.695)"/><path d="M0.333275-1.65298e-06 0.333347 14.4984-0.333203 14.4984-0.333275 1.65298e-06ZM3.99937 13.1652 0.000104969 21.1639-3.99924 13.1653Z" fill="#5F5F5F" transform="matrix(1 0 0 -1.00017 502.5 122.685)"/><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 475.168 410)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 483.713 414)">𝑠</text><path d="M501.167 412.803 501.167 389.233 501.833 389.233 501.833 412.803ZM497.501 390.567 501.5 382.567 505.499 390.567Z" fill="#5F5F5F"/><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 475.793 229)">𝑧</text><text fill="#5F5F5F" font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 484.338 233)">𝑡</text><path d="M0.333275-9.74489e-07 0.333361 29.2337-0.33319 29.2337-0.333275 9.74489e-07ZM3.99938 27.9006 0.000104969 35.8993-3.99922 27.9007Z" fill="#5F5F5F" transform="matrix(1 0 0 -1.00017 501.5 65.4106)"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 464.999 23)">Variant 2</text><path d="M596.5 0.500053 596.5 480.5" stroke="#000000" stroke-width="0.66655" stroke-miterlimit="8" stroke-dasharray="2.6662 1.99965" fill="none" fill-rule="evenodd"/><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 491.304 474)">(b)</text><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 671.614 474)">(c)</text><text fill="#5F5F5F" font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 862.593 474)">(d)</text><text font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 293.292 341)">𝑧</text><text font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 301.837 345)">𝑡</text><rect x="240.5" y="50.5088" width="163" height="348.061" stroke="#172C51" stroke-width="1.3331" stroke-miterlimit="8" fill="none"/><rect x="268" y="415.072" width="106" height="35.0061" fill="#DAE3F3"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 277.956 439)">Embedding</text><rect x="260" y="350.061" width="122" height="32.0056" fill="#FBE5D6"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 291.646 374)">Spatial</text><rect x="260" y="293.051" width="122" height="32.0056" fill="#FBE5D6"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 280.558 317)">Temporal</text><rect x="260" y="123.021" width="122" height="33.0058" fill="#FBE5D6"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 291.646 148)">Spatial</text><rect x="260" y="180.031" width="122" height="32.0056" fill="#FBE5D6"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 280.558 204)">Temporal</text><rect x="260" y="236.041" width="122" height="33.0058" fill="#FBE5D6"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 291.646 261)">Spatial</text><rect x="260" y="66.0116" width="122" height="33.0057" fill="#FBE5D6"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 280.558 91)">Temporal</text><path d="M319.167 347.725 319.167 333.224 319.833 333.224 319.833 347.725ZM315.501 334.557 319.5 326.557 323.499 334.557Z"/><path d="M0.333275-1.65298e-06 0.333347 14.4984-0.333203 14.4984-0.333275 1.65298e-06ZM3.99937 13.1652 0.000104969 21.1639-3.99924 13.1653Z" transform="matrix(1 0 0 -1.00017 319.5 291.715)"/><path d="M0.333275-1.65315e-06 0.333347 14.4962-0.333203 14.4962-0.333275 1.65315e-06ZM3.99937 13.163 0.000104969 21.1617-3.99924 13.1631Z" transform="matrix(1 0 0 -1.00017 319.5 235.703)"/><path d="M0.333275-1.65298e-06 0.333347 14.4984-0.333203 14.4984-0.333275 1.65298e-06ZM3.99937 13.1652 0.000104969 21.1639-3.99924 13.1653Z" transform="matrix(1 0 0 -1.00017 319.5 178.695)"/><path d="M0.333275-1.65298e-06 0.333347 14.4984-0.333203 14.4984-0.333275 1.65298e-06ZM3.99937 13.1652 0.000104969 21.1639-3.99924 13.1653Z" transform="matrix(1 0 0 -1.00017 319.5 121.685)"/><text font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="19" transform="matrix(1 0 0 1.00017 292.666 409)">𝑧</text><text font-family="Cambria Math,Cambria Math_MSFontService,sans-serif" font-weight="400" font-size="14" transform="matrix(1 0 0 1.00017 301.211 413)">𝑠</text><path d="M319.167 411.803 319.167 388.233 319.833 388.233 319.833 411.803ZM315.501 389.567 319.5 381.567 323.499 389.566Z"/><path d="M0.333275-1.02863e-06 0.33336 27.3443-0.333191 27.3443-0.333275 1.02863e-06ZM3.99938 26.0112 0.000104969 34.0098-3.99922 26.0112Z" transform="matrix(1 0 0 -1.00017 319.5 65.5213)"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 282.2 23)">Variant 1</text><path d="M413.5 0.500053 413.5 480.5" stroke="#000000" stroke-width="0.66655" stroke-miterlimit="8" stroke-dasharray="2.6662 1.99965" fill="none" fill-rule="evenodd"/><text font-family="Times New Roman,Times New Roman_MSFontService,sans-serif" font-weight="400" font-size="21" transform="matrix(1 0 0 1.00017 309.909 473)">(a)</text></g></svg>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment