Commit 2a934cec authored by raojy's avatar raojy
Browse files

first

parent 4b618aa3
{"prompt" : "Write a promotional copy for a beachfront villa. Interleave images of the villa's exterior, the infinity pool, and the ocean view from the master bedroom."}
{"prompt": "香氛蜡烛的产品宣传图,多图"}
{"prompt": "讲一下经典童话《卖火柴的小女孩》,但这次请给出一个温暖的平行宇宙改编版图文绘本。在最后一次擦亮火柴时,出现的不是幻象,而是一只拥有魔法的驯鹿,它载着小女孩飞向了有糖果和壁炉的城堡"}
{"prompt": "<image>\nDesign some travel diaries for my chubby orange cat,he strolled through a fragrant flower shop, took a break amidst the ink-black clouds of Huangshan Mountain, and finally gazed at the deep blue of the submarine.", "image": ["./examples/interleave/data/images/image1.png"]}
{"prompt": "Design three makeup looks suitable for Black women, each appropriate for a different occasion: work, banquet, and date."}
{"prompt": "Create a slide outlining our model for integrating beneficiary insights with long-term financial sustainability. We are prioritizing inclusive UX research—specifically moving away from extractive methods toward participatory design. This involves deploying offline-first mobile tools and pictorial feedback loops to account for low-literacy populations and the power dynamics inherent in social service delivery. These qualitative insights must be linked to our SROI framework and Theory of Change, using counterfactuals and Lean Data protocols like SMS and IVR to produce statistically defensible impact metrics. This rigorous data pipeline is what allows us to move toward trust-based philanthropy and secure multi-year unrestricted funding through real-time impact dashboards. The objective is to demonstrate that high-quality UX research directly drives superior measurable outcomes, making the organization investment-ready for institutional donors. Integrating these methodologies is the only way to bridge the reporting gap and sustain systemic change."}
\ No newline at end of file
{"prompt": "<image>\nThe scene shows a maze with a green circular agent, colored diamond-shaped keys, and colored hollow rectangular doors. Find the Yellow key and then navigate to the matching Yellow door, showing the complete movement process step by step.", "image": ["./examples/interleave/data/images_reasoning/G-45_key_door_matching_data-generator/00003/first_frame.png"]}
{"prompt": "<image>\nThe pink rectangle marked with a green border is the only object that will move. It will move horizontally to align directly below the blue circle marked with a red star. Track the movement with the green border as the object moves.", "image": ["./examples/interleave/data/images_reasoning/G-8_track_object_movement_data-generator/00004/first_frame.png"]}
{"prompt": "<image>\nIn the scene, there is a black ball and several colored balls of different sizes. The black ball can eat balls that are smaller than itself. After eating a ball, the black ball grows larger. Find the correct sequence to eat all colored balls step by step.", "image": ["./examples/interleave/data/images_reasoning/O-31_ball_eating_data-generator/00004/first_frame.png"]}
{"prompt": "<image>\nIn the scene there are two objects and their corresponding target outlines; each outline matches its object in color and shape. Move each object to its matching outline via shortest path. Show the movement step by step.", "image": ["./examples/interleave/data/images_reasoning/O-27_move_2_object_to_2_target_data-generator/00002/first_frame.png"]}
{"prompt": "<image>\nDemonstrate subtractive color mixing. The image shows two pigment colors. The center region is marked with a white rectangular outline. Predict and show what color appears in the marked mixing zone when these two pigments combine.", "image": ["./examples/interleave/data/images_reasoning/O-2_pigment_color_mixing_subtractive_data-generator/00004/first_frame.png"]}
from __future__ import annotations
import argparse
import json
import math
import random
from pathlib import Path
from typing import Sequence
import numpy as np
import torch
from PIL import Image
import sensenova_u1
from sensenova_u1.utils import (
DEFAULT_IMAGE_PATCH_SIZE,
DEFAULT_VRAM_MODE,
InferenceProfiler,
add_offload_args,
best_available_device,
load_and_merge_lora_weight_from_safetensors,
load_model_and_tokenizer,
make_offload_ctx,
seed_all_accelerators,
vram_mode_to_prefetch_count,
)
NORM_MEAN = (0.5, 0.5, 0.5)
NORM_STD = (0.5, 0.5, 0.5)
DEFAULT_SEED = 42
SUPPORTED_RESOLUTIONS: dict[str, tuple[int, int]] = {
"1:1": (1536, 1536),
"16:9": (2048, 1152),
"9:16": (1152, 2048),
"3:2": (1888, 1248),
"2:3": (1248, 1888),
"4:3": (1760, 1312),
"3:4": (1312, 1760),
"1:2": (1088, 2144),
"2:1": (2144, 1088),
"1:3": (864, 2592),
"3:1": (2592, 864),
}
DEFAULT_RESOLUTION = "16:9"
DEFAULT_WIDTH, DEFAULT_HEIGHT = SUPPORTED_RESOLUTIONS[DEFAULT_RESOLUTION]
def _warn_if_unsupported(width: int, height: int) -> None:
if (width, height) in SUPPORTED_RESOLUTIONS.values():
return
buckets = ", ".join(f"{r}->{w}x{h}" for r, (w, h) in SUPPORTED_RESOLUTIONS.items())
print(
f"[warn] ({width}x{height}) is outside the trained resolution set; "
f"quality may degrade. Supported buckets: {buckets}"
)
# Interleave inference requires a system prompt that describes the
# think / no-think protocol expected by the model during training.
DEFAULT_SYSTEM_MESSAGE = """You are a multimodal assistant capable of reasoning with both text and images. You support two modes:\n\nThink Mode: When reasoning is needed, you MUST start with a <think></think> block and place all reasoning inside it. You MUST interleave text with generated images using tags like <image1>, <image2>. Images can ONLY be generated between <think> and </think>, and may be referenced in the final answer.\n\nNon-Think Mode: When no reasoning is needed, directly provide the answer without reasoning. Do not use tags like <image1>, <image2>; present any images naturally alongside the text.\n\nAfter the think block, always provide a concise, user-facing final answer. The answer may include text, images, or both. Match the user's language in both reasoning and the final answer."""
def _set_seed(seed: int) -> None:
"""Make sampling reproducible across python / numpy / torch (+ every available accelerator backend)."""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
seed_all_accelerators(seed)
def _round_by(n: int, factor: int) -> int:
return round(n / factor) * factor
def _ceil_by(n: int, factor: int) -> int:
return math.ceil(n / factor) * factor
def _floor_by(n: int, factor: int) -> int:
return math.floor(n / factor) * factor
def smart_resize(
height: int,
width: int,
factor: int = 32,
min_pixels: int = 512 * 512,
max_pixels: int = (4 * 2048 * 2048) // 8,
) -> tuple[int, int]:
"""Return ``(h, w)`` that are divisible by ``factor``, keep aspect ratio,
and fall inside ``[min_pixels, max_pixels]``.
Adapted from the Qwen2.5-VL utility used by the training pipeline so
generated-image sizes stay in the buckets the model saw during SFT.
"""
if max(height, width) / max(1, min(height, width)) > 200:
raise ValueError(f"absolute aspect ratio must be < 200, got {max(height, width) / min(height, width)}")
h_bar = max(factor, _round_by(height, factor))
w_bar = max(factor, _round_by(width, factor))
if h_bar * w_bar > max_pixels:
beta = math.sqrt((height * width) / max_pixels)
h_bar = max(factor, _floor_by(height / beta, factor))
w_bar = max(factor, _floor_by(width / beta, factor))
elif h_bar * w_bar < min_pixels:
beta = math.sqrt(min_pixels / (height * width))
h_bar = _ceil_by(height * beta, factor)
w_bar = _ceil_by(width * beta, factor)
return h_bar, w_bar
def _denorm(x: torch.Tensor) -> torch.Tensor:
mean = torch.tensor(NORM_MEAN, device=x.device, dtype=x.dtype).view(1, 3, 1, 1)
std = torch.tensor(NORM_STD, device=x.device, dtype=x.dtype).view(1, 3, 1, 1)
return (x * std + mean).clamp(0, 1)
def _to_pil(batch: torch.Tensor) -> Image.Image:
"""Convert a single [1, 3, H, W] normalized tensor to a PIL image."""
arr = _denorm(batch.float()).permute(0, 2, 3, 1).cpu().numpy()
arr = (arr * 255.0).round().astype(np.uint8)
return Image.fromarray(arr[0])
class SenseNovaU1Interleave:
"""Thin wrapper around ``AutoModel.from_pretrained`` for interleaved text+image generation.
Because ``sensenova_u1`` has already registered the config / model with
transformers at import time, no ``trust_remote_code=True`` is needed.
"""
def __init__(
self,
model_path: str,
device: str = "cuda",
dtype: torch.dtype = torch.bfloat16,
gguf_checkpoint: str | None = None,
device_map: str | None = None,
max_memory: str | None = None,
vram_mode: str = DEFAULT_VRAM_MODE,
) -> None:
self.device = device
self.vram_mode = vram_mode
self.prefetch_count = vram_mode_to_prefetch_count(vram_mode)
self.model, self.tokenizer = load_model_and_tokenizer(
model_path,
dtype=dtype,
device=device,
gguf_checkpoint=gguf_checkpoint,
for_offload=self.prefetch_count > 0,
device_map=device_map,
max_memory=max_memory,
)
@torch.inference_mode()
def generate(
self,
prompt: str,
input_images: Sequence[Image.Image] = (),
image_size: tuple[int, int] = (DEFAULT_WIDTH, DEFAULT_HEIGHT),
cfg_scale: float = 4.0,
img_cfg_scale: float = 1.0,
timestep_shift: float = 3.0,
cfg_interval: tuple[float, float] = (0.0, 1.0),
num_steps: int = 50,
think_mode: bool = True,
system_message: str = DEFAULT_SYSTEM_MESSAGE,
seed: int = 0,
) -> tuple[str, list[Image.Image]]:
with make_offload_ctx(self.model, self.prefetch_count, self.device) as offloaded:
text, image_tensors = offloaded.interleave_gen(
self.tokenizer,
prompt,
images=list(input_images),
image_size=image_size,
cfg_scale=cfg_scale,
img_cfg_scale=img_cfg_scale,
timestep_shift=timestep_shift,
cfg_interval=cfg_interval,
num_steps=num_steps,
system_message=system_message,
think_mode=think_mode,
seed=seed,
verbose=True,
)
return text, [_to_pil(img) for img in image_tensors]
def _load_input_images(paths: Sequence[str], image_root: str = "") -> list[Image.Image]:
"""Load images from ``paths``. When ``image_root`` is set, it is prepended
to any non-absolute path; absolute paths are used as-is."""
images: list[Image.Image] = []
for p in paths:
resolved = p if not image_root or Path(p).is_absolute() else str(Path(image_root) / p)
if not Path(resolved).exists():
raise FileNotFoundError(f"input image not found: {resolved}")
images.append(Image.open(resolved).convert("RGB"))
return images
def _resolve_image_size(
input_images: Sequence[Image.Image],
fallback_w: int,
fallback_h: int,
) -> tuple[int, int]:
"""Pick generation (W, H). With input images, follow the first one so
edits stay aligned (snapped to 32-aligned buckets via ``smart_resize``).
Without input images, use the caller-provided fallback as-is — it is
expected to already be one of ``SUPPORTED_RESOLUTIONS``."""
if input_images:
w, h = input_images[0].size
resized_h, resized_w = smart_resize(h, w)
return resized_w, resized_h
return fallback_w, fallback_h
def _save_outputs(
text: str,
images: Sequence[Image.Image],
out_dir: Path,
stem: str,
input_images: Sequence[Image.Image] = (),
prompt: str = "",
) -> list[str]:
"""Persist the prompt + model output + generated images and (optionally)
the user-supplied input images so a result can be reproduced from disk
alone. Returns the relative filenames of saved input images."""
out_dir.mkdir(parents=True, exist_ok=True)
text_path = out_dir / f"{stem}.txt"
if prompt:
text_path.write_text(
f"# PROMPT\n{prompt}\n\n# OUTPUT\n{text}\n",
encoding="utf-8",
)
else:
text_path.write_text(text, encoding="utf-8")
print(f"[saved] {text_path}")
input_names: list[str] = []
for i, img in enumerate(input_images):
name = f"{stem}_input_{i}.png"
img_path = out_dir / name
img.save(img_path)
input_names.append(name)
print(f"[saved] {img_path}")
for i, img in enumerate(images):
img_path = out_dir / f"{stem}_image_{i}.png"
img.save(img_path)
print(f"[saved] {img_path}")
return input_names
def _sample_images(sample: dict, image_root: str = "") -> list[Image.Image]:
"""Load ``sample['image']`` (or ``sample['images']``). Relative paths are
resolved against ``image_root`` when provided. Missing key is treated as
no input images."""
paths = sample.get("image") or sample.get("images") or []
return _load_input_images(paths, image_root=image_root)
def _extract_prompt(sample: dict) -> str:
"""Accept both flat ``{"prompt": ...}`` and ShareGPT-style
``{"conversations": [{"from": "human", "value": ...}, ...]}``."""
if "prompt" in sample:
return sample["prompt"]
for conv in sample.get("conversations", []):
if conv.get("from") == "human":
return conv["value"]
raise ValueError("sample has no 'prompt' and no human turn in 'conversations'")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Interleaved text+image inference for SenseNova-U1.")
p.add_argument(
"--model_path",
required=True,
help="HuggingFace Hub id (e.g. sensenova/SenseNova-U1-8B-MoT) or a local path.",
)
p.add_argument(
"--lora_path",
required=False,
default=None,
help="HuggingFace Hub id or a local path to a lora model.",
)
src = p.add_mutually_exclusive_group(required=True)
src.add_argument("--prompt", help="Generate from a single text prompt.")
src.add_argument(
"--jsonl",
help=(
'JSONL file, one sample per line. Required: {"prompt": ...} or '
'{"conversations": [{"from": "human", "value": ...}, ...]}. '
'Optional: {"image": [paths], "width": W, "height": H, "seed": S, '
'"think_mode": bool}. '
"If 'image' is set, output size follows the first input image "
"(via smart_resize); 'width'/'height' are used only for "
"text-only samples."
),
)
p.add_argument(
"--image",
action="append",
default=[],
help=(
"Path to an input image (repeatable). Only valid with --prompt. "
"The prompt should contain a matching '<image>' placeholder per image."
),
)
p.add_argument(
"--image_root",
default="",
help=(
"Directory prepended to relative image paths in --jsonl samples. "
"Absolute paths (in the jsonl or --image) are used as-is."
),
)
p.add_argument("--output_dir", default="outputs", help="Directory for generated text + images.")
p.add_argument(
"--stem",
default="sample",
help="Filename stem when using --prompt. Generated files are <stem>.txt and <stem>_image_<i>.png.",
)
p.add_argument(
"--resolution",
default=DEFAULT_RESOLUTION,
choices=list(SUPPORTED_RESOLUTIONS.keys()),
help=(
f"Aspect-ratio bucket used when no input image is provided "
f"(default: {DEFAULT_RESOLUTION} -> "
f"{SUPPORTED_RESOLUTIONS[DEFAULT_RESOLUTION][0]}x"
f"{SUPPORTED_RESOLUTIONS[DEFAULT_RESOLUTION][1]}). "
"Overridden by --width/--height when both are set."
),
)
p.add_argument(
"--width",
type=int,
default=None,
help="Explicit fallback width. Overrides --resolution when both --width and --height are set.",
)
p.add_argument(
"--height",
type=int,
default=None,
help="Explicit fallback height. Overrides --resolution when both --width and --height are set.",
)
p.add_argument("--cfg_scale", type=float, default=4.0)
p.add_argument("--img_cfg_scale", type=float, default=1.0)
p.add_argument("--timestep_shift", type=float, default=3.0)
p.add_argument(
"--cfg_interval",
type=float,
nargs=2,
default=[0.0, 1.0],
metavar=("LO", "HI"),
)
p.add_argument("--num_steps", type=int, default=50)
p.add_argument(
"--think_mode",
action=argparse.BooleanOptionalAction,
default=True,
help="Enable <think></think> reasoning before the final answer. On by default; pass --no-think_mode to disable.",
)
p.add_argument(
"--system_message",
default=DEFAULT_SYSTEM_MESSAGE,
help="Override the default interleave system prompt.",
)
p.add_argument(
"--seed",
type=int,
default=DEFAULT_SEED,
help=(
f"Random seed for reproducible sampling (default: {DEFAULT_SEED}). "
"In --jsonl mode, a per-sample `seed` field overrides this."
),
)
p.add_argument(
"--device",
default=str(best_available_device()),
help="Compute device, e.g. 'cuda', 'cuda:0', 'xpu', 'xpu:0', 'cpu'. Defaults to the best available accelerator.",
)
p.add_argument(
"--dtype",
default="bfloat16",
choices=["bfloat16", "float16", "float32"],
)
add_offload_args(p)
p.add_argument(
"--gguf_checkpoint",
default=None,
help=(
"Optional path to a .gguf quantized checkpoint. When set, the dequantizing "
"diffusers GGUF Linear layer is used instead of safetensors weights. "
"Requires the [gguf] extra (gguf>=0.10.0, diffusers>=0.30.0)."
),
)
p.add_argument(
"--attn_backend",
default="auto",
choices=["auto", "flash", "sdpa"],
help=(
"Attention kernel used by the Qwen3 layers. 'auto' picks flash-attn when importable and falls back to SDPA."
),
)
p.add_argument(
"--profile",
action="store_true",
help=(
"Print timing and CUDA memory stats: model load time, average "
"per-image generation time, peak GPU memory, and the same time "
f"normalized per image token (patch size = {DEFAULT_IMAGE_PATCH_SIZE})."
),
)
return p.parse_args()
def main() -> None:
args = parse_args()
dtype = {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32}[args.dtype]
sensenova_u1.set_attn_backend(args.attn_backend)
print(f"[attn] backend={args.attn_backend!r} (effective={sensenova_u1.effective_attn_backend()!r})")
profiler = InferenceProfiler(
enabled=args.profile,
device=args.device,
config={
"vram_mode": args.vram_mode,
"attn_backend": sensenova_u1.effective_attn_backend(),
"dtype": args.dtype,
"gguf": args.gguf_checkpoint,
},
)
with profiler.time_load():
engine = SenseNovaU1Interleave(
args.model_path,
device=args.device,
dtype=dtype,
gguf_checkpoint=args.gguf_checkpoint,
device_map=args.device_map,
max_memory=args.max_memory,
vram_mode=args.vram_mode,
)
if args.lora_path is not None:
print(f"load lora {args.lora_path}")
engine.model = load_and_merge_lora_weight_from_safetensors(engine.model, args.lora_path)
cfg_interval = tuple(args.cfg_interval)
out_dir = Path(args.output_dir)
if args.width is not None and args.height is not None:
fallback_w, fallback_h = args.width, args.height
_warn_if_unsupported(fallback_w, fallback_h)
else:
fallback_w, fallback_h = SUPPORTED_RESOLUTIONS[args.resolution]
# Single-sample inference: --prompt + optional --image (repeatable).
if args.prompt is not None:
print("prompt:", args.prompt)
input_images = _load_input_images(args.image)
w, h = _resolve_image_size(input_images, fallback_w, fallback_h)
# _set_seed(args.seed)
with profiler.time_generate(w, h, 1) as gen:
text, images = engine.generate(
args.prompt,
input_images=input_images,
image_size=(w, h),
cfg_scale=args.cfg_scale,
img_cfg_scale=args.img_cfg_scale,
timestep_shift=args.timestep_shift,
cfg_interval=cfg_interval,
num_steps=args.num_steps,
think_mode=args.think_mode,
system_message=args.system_message,
seed=args.seed,
)
profiler.update_last_batch(len(images))
print(f"[text] {text}")
_save_outputs(
text,
images,
out_dir,
args.stem,
input_images=input_images,
prompt=args.prompt,
)
profiler.report()
return
# Batch inference: one sample per line in --jsonl.
with open(args.jsonl) as f:
samples = [json.loads(line) for line in f if line.strip()]
try:
from tqdm import tqdm
except ImportError:
def tqdm(x, **_kw): # type: ignore[no-redef]
return x
results_path = out_dir / "results.jsonl"
out_dir.mkdir(parents=True, exist_ok=True)
with open(results_path, "w", encoding="utf-8") as rf:
for i, sample in enumerate(tqdm(samples, desc="interleave")):
prompt = _extract_prompt(sample)
input_images = _sample_images(sample, image_root=args.image_root)
if input_images:
# When the sample ships input images, always follow their
# size (via smart_resize); any per-sample width/height is
# treated as a no-input-image fallback only.
w, h = _resolve_image_size(input_images, fallback_w, fallback_h)
elif "width" in sample and "height" in sample:
w, h = int(sample["width"]), int(sample["height"])
_warn_if_unsupported(w, h)
else:
w, h = fallback_w, fallback_h
think_mode = bool(sample.get("think_mode", args.think_mode))
# _set_seed(int(sample.get("seed", args.seed)))
with profiler.time_generate(w, h, 1) as gen:
text, images = engine.generate(
prompt,
input_images=input_images,
image_size=(w, h),
cfg_scale=args.cfg_scale,
img_cfg_scale=args.img_cfg_scale,
timestep_shift=args.timestep_shift,
cfg_interval=cfg_interval,
num_steps=args.num_steps,
think_mode=think_mode,
system_message=args.system_message,
seed=args.seed,
)
profiler.update_last_batch(len(images))
stem = f"{i + 1:04d}" + ("_think" if think_mode else "_no_think")
input_names = _save_outputs(
text,
images,
out_dir,
stem,
input_images=input_images,
prompt=prompt,
)
rf.write(
json.dumps(
{
"index": i,
"prompt": prompt,
"text": text,
"input_images": input_names,
"images": [f"{stem}_image_{j}.png" for j in range(len(images))],
"width": w,
"height": h,
"think_mode": think_mode,
},
ensure_ascii=False,
)
+ "\n"
)
rf.flush()
print(f"[saved] {results_path}")
profiler.report()
if __name__ == "__main__":
main()
#!/usr/bin/env bash
repo_root=path/to/SenseNova-U1
model_path=${MODEL_PATH}
example_dir=${repo_root}/examples/interleave
output_dir=${OUTPUT_DIR:-${example_dir}/output}
# 1) Single sample, text prompt only.
# Output resolution comes from --resolution (default 16:9 -> 2048x1152).
python "${example_dir}/inference.py" \
--model_path "${model_path}" \
--prompt "I want to learn how to cook tomato and egg stir-fry. Please give me a beginner-friendly illustrated tutorial." \
--output_dir "${output_dir}/text" \
--stem "demo_text" \
--profile
# 2) Single sample, text prompt + one input image.
# Each '<image>' placeholder in the prompt binds to one --image path,
# in order. Output resolution follows the first input image
# (via smart_resize), ignoring --resolution/--width/--height.
python "${example_dir}/inference.py" \
--model_path "${model_path}" \
--prompt "<image>\n图文交错生成小猫游览故宫的场景" \
--image "${example_dir}/data/images/image0.jpg" \
--output_dir "${output_dir}/text_image" \
--stem "demo_text_image" \
--profile
# 3) Each line in the JSONL is one sample:
# {"prompt": "...", "image": ["images/a.jpg", ...],
# "width": 2048, "height": 1152, "seed": 42, "think_mode": true}
# Relative 'image' paths are resolved against --image_root; absolute
# paths are used as-is. If 'image' is set, the output size follows
# the first input image; 'width'/'height' only apply to text-only samples.
python "${example_dir}/inference.py" \
--model_path "${model_path}" \
--jsonl "${example_dir}/data/samples.jsonl" \
--output_dir "${output_dir}/jsonl" \
--profile
# For exmample, running interleaved reasoning samples on VBVR-Bench (Image) that
# requires multi-step visual generation given an input image.
# Note: VBVR-Bench runs on "no-think" mode.
python "${example_dir}/inference.py" \
--model_path "${model_path}" \
--jsonl "${example_dir}/data/samples_reasoning.jsonl" \
--image_root "${repo_root}" \
--output_dir "${output_dir}/reasoning" \
--no-think_mode \
--profile
\ No newline at end of file
from __future__ import annotations
import argparse
import base64
import json
import re
from datetime import datetime
from pathlib import Path
from typing import Any
import requests
DEFAULT_BASE_URL = "http://0.0.0.0:8000/v1"
DEFAULT_API_KEY = "dummy"
DEFAULT_MODEL = "sensenova-u1"
INTERLEAVE_SYSTEM_PROMPT = (
"You are a multimodal assistant capable of reasoning with both text and images. "
"You support two modes:\n\n"
"Think Mode: When reasoning is needed, you MUST start with a <think></think> block "
"and place all reasoning inside it. You MUST interleave text with generated images "
"using tags like <image1>, <image2>. Images can ONLY be generated between <think> and "
"</think>, and may be referenced in the final answer.\n\n"
"Non-Think Mode: When no reasoning is needed, directly provide the answer without reasoning. "
"Do not use tags like <image1>, <image2>; present any images naturally alongside the text.\n\n"
"After the think block, always provide a concise, user-facing final answer. "
"The answer may include text, images, or both. Match the user's language in both reasoning "
"and the final answer."
)
GENERATION_SYSTEM_PROMPT = (
"You are an image generation and editing assistant that accurately understands and executes "
"user intent.\n\nYou support two modes:\n\n1. Think Mode:\nIf the task requires reasoning, you "
"MUST start with a <think></think> block. Put all reasoning inside the block using plain text. "
"DO NOT include any image tags. Keep it reasonable and directly useful for producing the final "
"image.\n\n2. Non-Think Mode:\nIf no reasoning is needed, directly produce the final image.\n\n"
"Task Types:\n\nA. Text-to-Image Generation:\n"
"- Generate a high-quality image based on the user's description.\n"
"- Ensure visual clarity, semantic consistency, and completeness.\n"
"- DO NOT introduce elements that contradict or override the user's intent.\n\n"
"B. Image Editing:\n"
"- Use the provided image(s) as input or reference for modification or transformation.\n"
"- The result can be an edited image or a new image based on the reference(s).\n"
"- Preserve all unspecified attributes unless explicitly changed.\n\n"
"General Rules:\n"
"- For any visible text in the image, follow the language specified for the rendered text in "
"the user's description, not the language of the prompt. If no language is specified, use the "
"user's input language."
)
"""
_aspect_ratio_to_resolution: ClassVar[dict] = {
"1:1": {"1K": (1024, 1024), "1.5K": (1536, 1536), "2K": (2048, 2048)},
"16:9": {"1.5K": (2048, 1152), "2K": (2720, 1536)},
"9:16": {"1.5K": (1152, 2048), "2K": (1536, 2720)},
"3:2": {"1.5K": (1888, 1248), "2K": (2496, 1664)},
"2:3": {"1.5K": (1248, 1888), "2K": (1664, 2496)},
"4:3": {"1.5K": (1760, 1312), "2K": (2368, 1760)},
"3:4": {"1.5K": (1312, 1760), "2K": (1760, 2368)},
"1:2": {"1.5K": (1088, 2144), "2K": (1440, 2880)},
"2:1": {"1.5K": (2144, 1088), "2K": (2880, 1440)},
"1:3": {"1.5K": (864, 2592), "2K": (1152, 3456)},
"3:1": {"1.5K": (2592, 864), "2K": (3456, 1152)},
}
"""
IMAGE_CONFIG_DEFAULT = {
"aspect_ratio": "16:9",
"image_size": "2K",
"image_type": "jpeg",
"seed": 42,
# If set to True, the generated image will have the same resolution as the input image.
# If set to False, the resolution of the generated image will be determined by the image_size and aspect_ratio.
"dynamic_resolution": True,
# if you want to determine the resolution of the generated image by yourself, set the height and width.
# the default value is -1.
"height": -1,
"width": -1,
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="OpenAI-compatible API test client for LightLLM + LightX2V.")
parser.add_argument(
"--mode",
required=True,
choices=["t2i", "it2i", "interleave", "vqa"],
help="Test mode. If omitted, the script asks interactively.",
)
parser.add_argument("--prompt", required=True, help="User prompt. If omitted, ask interactively.")
parser.add_argument(
"--image_path",
default=None,
help="Input image path for it2i / interleave.",
)
parser.add_argument("--url", default=DEFAULT_BASE_URL)
parser.add_argument("--api-key", default=DEFAULT_API_KEY)
parser.add_argument("--model", default=DEFAULT_MODEL)
parser.add_argument(
"--out-dir",
default="./api_test_outputs",
help="Directory to save generated images and raw responses.",
)
parser.add_argument("--temperature", type=float, default=0.8)
parser.add_argument("--top-p", type=float, default=0.95)
parser.add_argument("--max-tokens", type=int, default=4096)
parser.add_argument(
"--enable-thinking",
action=argparse.BooleanOptionalAction,
default=True,
help="Pass chat_template_kwargs.enable_thinking to backend.",
)
parser.add_argument(
"--seed",
type=int,
default=IMAGE_CONFIG_DEFAULT["seed"],
help="Sampling seed for image config / streaming request.",
)
parser.add_argument(
"--aspect-ratio",
default=IMAGE_CONFIG_DEFAULT["aspect_ratio"],
help="Aspect ratio for generated image (e.g. 16:9, 1:1).",
)
parser.add_argument(
"--image-size",
default=IMAGE_CONFIG_DEFAULT["image_size"],
help="Image size preset for generation (e.g. 1.5K, 2K).",
)
parser.add_argument(
"--height",
type=int,
default=IMAGE_CONFIG_DEFAULT["height"],
help="Manual image height. Use with --width; keep -1 for auto resolution.",
)
parser.add_argument(
"--width",
type=int,
default=IMAGE_CONFIG_DEFAULT["width"],
help="Manual image width. Use with --height; keep -1 for auto resolution.",
)
return parser.parse_args()
def build_image_config(args: argparse.Namespace) -> dict[str, Any]:
image_config = {
**IMAGE_CONFIG_DEFAULT,
"aspect_ratio": args.aspect_ratio,
"image_size": args.image_size,
"seed": args.seed,
"height": args.height,
"width": args.width,
}
if args.height > 0 and args.width > 0:
image_config["dynamic_resolution"] = False
return image_config
def local_image_to_data_url(path: str) -> str:
image_path = Path(path)
if not image_path.exists():
raise FileNotFoundError(f"image not found: {image_path}")
suffix = image_path.suffix.lower()
if suffix in {".jpg", ".jpeg"}:
mime = "image/jpeg"
elif suffix == ".png":
mime = "image/png"
elif suffix == ".webp":
mime = "image/webp"
else:
mime = "image/jpeg"
data = base64.b64encode(image_path.read_bytes()).decode("utf-8")
return f"data:{mime};base64,{data}"
def save_data_url_to_file(data_url: str, path: Path) -> None:
matched = re.match(r"data:image/(?P<subtype>[\w+.-]+);base64,(?P<b64>.+)", data_url, re.DOTALL)
if not matched:
raise ValueError(f"unsupported data url prefix: {data_url[:80]}...")
raw = base64.b64decode(matched.group("b64"))
path.write_bytes(raw)
print(f"[saved] {path} ({len(raw)} bytes)")
def save_images_from_message(message: dict[str, Any], out_dir: Path, prefix: str) -> None:
images = message.get("images") or []
for idx, item in enumerate(images):
if not isinstance(item, dict):
continue
image_url = (item.get("image_url") or {}).get("url", "")
if not image_url.startswith("data:image/"):
continue
ext = "png"
if image_url.startswith("data:image/jpeg") or image_url.startswith("data:image/jpg"):
ext = "jpg"
elif image_url.startswith("data:image/webp"):
ext = "webp"
save_data_url_to_file(image_url, out_dir / f"{prefix}_{idx}.{ext}")
def build_client(base_url: str, api_key: str) -> tuple[str, dict[str, str]]:
chat_url = f"{base_url.rstrip('/')}/chat/completions"
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
return chat_url, headers
def run_t2i(args: argparse.Namespace, out_dir: Path, timestamp: str) -> None:
chat_url, headers = build_client(args.url, args.api_key)
image_config = build_image_config(args)
payload = {
"model": args.model,
"messages": [{"role": "system", "content": GENERATION_SYSTEM_PROMPT}, {"role": "user", "content": args.prompt}],
"modalities": ["image"],
"stream": False,
"n": 1,
"temperature": args.temperature,
"top_p": args.top_p,
"max_tokens": args.max_tokens,
"chat_template_kwargs": {"enable_thinking": args.enable_thinking},
"image_config": image_config,
}
response = requests.post(chat_url, headers=headers, json=payload, timeout=600)
response.raise_for_status()
data = response.json()
raw_path = out_dir / f"{timestamp}_t2i_response.json"
raw_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"[saved] {raw_path}")
message = ((data.get("choices") or [{}])[0]).get("message") or {}
print("\n--- assistant content ---")
print(message.get("content", ""))
save_images_from_message(message, out_dir=out_dir, prefix=f"{timestamp}_t2i")
def run_it2i(args: argparse.Namespace, out_dir: Path, timestamp: str) -> None:
chat_url, headers = build_client(args.url, args.api_key)
assert args.image_path is not None, "image_path is required"
image_config = build_image_config(args)
payload = {
"model": args.model,
"messages": [
{"role": "system", "content": GENERATION_SYSTEM_PROMPT},
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": local_image_to_data_url(args.image)}},
{"type": "text", "text": args.prompt},
],
},
],
"modalities": ["image"],
"stream": False,
"n": 1,
"temperature": args.temperature,
"top_p": args.top_p,
"max_tokens": args.max_tokens,
"chat_template_kwargs": {"enable_thinking": args.enable_thinking},
"image_config": image_config,
}
response = requests.post(chat_url, headers=headers, json=payload, timeout=600)
response.raise_for_status()
data = response.json()
raw_path = out_dir / f"{timestamp}_it2i_response.json"
raw_path.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"[saved] {raw_path}")
message = ((data.get("choices") or [{}])[0]).get("message") or {}
print("\n--- assistant content ---")
print(message.get("content", ""))
save_images_from_message(message, out_dir=out_dir, prefix=f"{timestamp}_it2i")
def run_interleave_stream(args: argparse.Namespace, out_dir: Path, timestamp: str) -> None:
chat_url, headers = build_client(args.url, args.api_key)
image_config = build_image_config(args)
content = []
if args.image_path:
content.append({"type": "image_url", "image_url": {"url": local_image_to_data_url(args.image)}})
content.append({"type": "text", "text": args.prompt})
payload = {
"model": args.model,
"messages": [
{"role": "system", "content": INTERLEAVE_SYSTEM_PROMPT},
{
"role": "user",
"content": content,
},
],
"modalities": ["text", "image"],
"stream": True,
"n": 1,
"temperature": args.temperature,
"top_p": args.top_p,
"max_tokens": args.max_tokens,
"chat_template_kwargs": {"enable_thinking": args.enable_thinking},
"image_config": image_config,
"seed": args.seed,
}
response = requests.post(chat_url, headers=headers, json=payload, stream=True, timeout=600)
response.raise_for_status()
text_chunks: list[str] = []
image_idx = 0
for line in response.iter_lines():
if not line:
continue
decoded = line.decode("utf-8")
if not decoded.startswith("data: "):
continue
body = decoded[6:]
if body.strip() == "[DONE]":
break
try:
chunk = json.loads(body)
except json.JSONDecodeError:
continue
choices = chunk.get("choices") or []
if not choices:
continue
delta = choices[0].get("delta") or {}
content = delta.get("content")
if content:
text_chunks.append(content)
print(content, end="", flush=True)
for image_item in delta.get("images") or []:
image_url = (image_item.get("image_url") or {}).get("url", "")
if image_url.startswith("data:image/"):
out_file = out_dir / f"{timestamp}_interleave_stream_{image_idx}.png"
save_data_url_to_file(image_url, out_file)
image_idx += 1
print("\n\n--- stream complete ---")
final_text = "".join(text_chunks)
text_path = out_dir / f"{timestamp}_interleave_stream.txt"
text_path.write_text(final_text, encoding="utf-8")
print(f"[saved] {text_path}")
def run_vqa(args: argparse.Namespace, out_dir: Path, timestamp: str) -> None:
chat_url, headers = build_client(args.url, args.api_key)
content = []
if args.image_path:
content.append({"type": "image_url", "image_url": {"url": local_image_to_data_url(args.image_path)}})
content.append({"type": "text", "text": args.prompt})
payload = {
"model": args.model,
"messages": [{"role": "user", "content": content}],
}
response = requests.post(chat_url, headers=headers, json=payload, timeout=600)
response.raise_for_status()
data = response.json()
message = ((data.get("choices") or [{}])[0]).get("message") or {}
print("\n--- assistant content ---")
print(message.get("content", ""))
def main() -> None:
args = parse_args()
out_dir = Path(args.out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
print(f"[config] mode={args.mode}, model={args.model}, url={args.url}")
if args.image_path is not None:
print(f"[config] input image_path={args.image_path}")
print(f"[config] output_dir={out_dir.resolve()}")
if args.mode == "t2i":
run_t2i(args, out_dir=out_dir, timestamp=timestamp)
elif args.mode == "it2i":
run_it2i(args, out_dir=out_dir, timestamp=timestamp)
elif args.mode == "interleave":
run_interleave_stream(args, out_dir=out_dir, timestamp=timestamp)
elif args.mode == "vqa":
run_vqa(args, out_dir=out_dir, timestamp=timestamp)
else:
raise ValueError(f"unknown mode: {args.mode}")
if __name__ == "__main__":
main()
{"prompt": "Close portrait of an elderly woman by a farmhouse window, textured skin, gentle smile, warm natural light, emotional documentary look. The portrait should feel polished and natural, with sharp eyes, realistic skin texture, accurate facial anatomy, and premium lighting that keeps the face as the main focus.", "width": 1536, "height": 2720}
{"prompt": "A greeting card on a wooden desk with readable Chinese text \"生日快乐\", flowers beside it, simple celebratory styling. Any text in the image must be rendered exactly as written in quotation marks, with correct spelling, clean typography, and strong readability.", "width": 1536, "height": 2720}
{"prompt": "A neon bar sign that clearly reads \"OPEN LATE\", dark interior, moody reflections, easy text rendering. Any text in the image must be rendered exactly as written in quotation marks, with correct spelling, clean typography, and strong readability.", "width": 2720, "height": 1536}
{"prompt": "Tight portrait of a surfer with saltwater droplets on tan skin, sunlit face, windblown hair, natural freckles, vivid blue eyes, coastal realism.", "width": 2048, "height": 2048}
{"prompt": "Documentary-style portrait of a street boxer in a dim gym, bruised eyebrow, determined look, sweat sheen, precise facial anatomy, gritty realism.", "width": 2048, "height": 2048}
{"prompt": "An expressive portrait with mirrored reflections fragmenting the face into geometric shapes, sophisticated editorial art style.", "width": 2048, "height": 2048}
{"prompt": "Lavender fields stretching to the horizon under a pastel sunset, a small stone farmhouse, highly detailed flowers, romantic countryside scene.", "width": 2048, "height": 2048}
{"prompt": "Stormy seascape with waves crashing against a lighthouse, dramatic sky, realistic water motion, moody coastal photography.", "width": 2048, "height": 2048}
{"prompt": "Tropical beach with turquoise water, black volcanic rocks, swaying palms, bright noon sun, ultra-clean travel photography, balanced square crop.", "width": 2048, "height": 2048}
{"prompt": "A winter portrait of a traveler in a wool coat and scarf, rosy cheeks, frosty air, bright eyes, elegant vertical framing. The portrait should feel polished and natural, with sharp eyes, realistic skin texture, accurate facial anatomy, and premium lighting that keeps the face as the main focus.", "width": 1536, "height": 2720}
{"prompt": "A violinist standing beneath stage lights, elegant bow arm and grounded posture, full body visible.", "width": 1536, "height": 2720}
{"prompt": "A woman seen through rain-covered glass from head to waist, elongated reflections, emotional and refined visual poetry.", "width": 1536, "height": 2720}
{"prompt": "A cafe takeaway cup standing on a clean counter, with the sleeve text rendered clearly as \"SenseNova-U1\", realistic paper texture, morning light, cozy interior blur, and no additional readable menu boards.", "width": 1536, "height": 2720}
{"prompt": "Cherry blossom trees arching over a temple stairway, petals drifting downward, elegant spring vertical composition.", "width": 1536, "height": 2720}
{"prompt": "A quiet church interior with sunlight touching one empty chair, tall architecture, meditative artistic stillness. The final image should feel intentional and refined, with a clear artistic mood, thoughtful composition, nuanced color control, and a gallery-like sense of visual storytelling.", "width": 1536, "height": 2720}
{"prompt": "This infographic, titled \"GAMER_INTEL // 游戏动态\", presents a stylized analysis of the mobile gaming landscape through a retro-futuristic, Windows 95-inspired interface. The design features a vibrant 80s/90s cyberpunk aesthetic with a purple-to-cyan gradient background, pixelated palm trees, leaping dolphins, and classical busts rendered in glitchy, chromatic aberration effects. The overall layout mimics an old computer desktop with overlapping windows, each containing distinct data elements.\n\nThe main title \"GAMER_INTEL // 游戏动态\" is displayed prominently at the top in bold, blocky white text with cyan and magenta outlines, accompanied by vertical Japanese text on both sides: \"プロモーション\" (Promotion) on the left and \"ケロモーショーン\" (a playful, non-standard term possibly meaning \"promotion\" or \"emotion\") on the right. Below the title, the word \"PROMOTION\" appears again in English, followed by its Japanese translation.\n\nSeveral window-like panels are layered over the background:\n\n1. **Quarterly Gaming Report Window**:\n - Title: \"Global_Campaign_Stats.exe\"\n - Content: \"Quarterly Gaming Report\"\n - Description: \"Analysis of the current mobile gaming landscape. User retention is peaking due to limited-time seasonal collaborations and ‘Gacha’ mechanics optimizations. Virtual economies are shifting toward retro-aesthetic skins.\"\n - Highlighted phrase: “Peak Performance” in bright pink text.\n\n2. **System Error Alert Box**:\n - Title: \"System Error\"\n - Message: \"Alert: Stamina Low\"\n - Warning: \"WARNING: Player engagement cycles are expiring. Deploy 'Weekend Bonus' patches immediately to mitigate churn rates and maintain server load levels.\"\n\n3. **Active Campaigns Playlist Window**:\n - Title: \"Active Campaigns\" with a lightning bolt icon\n - Subtitle: \"Playlist: Active Events\"\n - List of events:\n 1. Summer_Vibe_Logins (Daily Rewards)\n 2. Cyber_Raid_Boss (Global Challenge)\n 3. Neon_Gacha_Banner (300% Drop Rate)\n 4. Guild_Warfare_S4 (Competitive Play)\n - Highlighted phrase: “300% Drop” in green text at the bottom.\n\n4. **File Explorer Window**:\n - Title: \"File Explorer\"\n - Path: \"D:/Market_Assets\"\n - Folder content list under \"Visual Trends\":\n - IMG_01: SSR_Limited_Character\n - IMG_02: Retro_Skin_Bundle\n - IMG_03: Expansion_Pack_v2.0\n - IMG_04: Anniversary_Lobby_Theme\n - Highlighted phrase: “Limited” in blue text.\n\n5. **COLOR_PALETTE: ROI Metrics Window**:\n - Title: \"COLOR_PALETTE: ROI Metrics\"\n - Section: \"Revenue Tones\"\n - Metrics:\n - Conversion: 12.5%\n - Retention: 45%\n - DAU Growth: “+18%”\n - A color palette grid displays 20 colored squares arranged in two rows (10 per row), including black, white, red, yellow, green, cyan, blue, magenta, purple, brown, gray, and various shades.\n\nAdditional visual elements include:\n- A progress bar labeled \"PROMOTION\" near the top center.\n- A pixelated hourglass icon near the bottom center.\n- Vertical Japanese text along the left edge: \"クフウマジテイス\" (likely a stylized or fictional term).\n- The entire composition uses a grid floor effect reminiscent of Tron, enhancing the retro-digital theme.\n\nAll textual content is preserved in its original form, including technical terms like \"Gacha\", \"SSR\", \"DAU\", and file paths. The design effectively blends nostalgic computing visuals with modern gaming industry terminology to convey data about player engagement, campaign performance, visual assets, and financial metrics.", "width": 2048, "height": 2048}
{"prompt": "The infographic, titled \"Foundational Principles for Fund Management Compliance,\" is presented in a vibrant, comic book style with bold colors, dynamic speech bubbles, and action words like \"POW!\", \"BANG!\", and \"ZAP!\" to emphasize key points. The layout is structured into three main horizontal sections, each with its own header and sub-headers, visually separated by bold borders and colorful backgrounds.\n\n**Section 1: Foundational Principles for Fund Management Compliance**\nSubtitle: \"Non-negotiable standards for all operational activities\"\nThis section is divided into three panels:\n- **Panel 1: Safety First Guiding Principle** \n Visual: Superman stands heroically beside an open treasure chest filled with gold coins, symbolizing safeguarded funds. The background is yellow with radiating lines, evoking power and protection.\n- **Panel 2: Full Lifecycle Transaction Tracking** \n Visual: A flowchart illustrates the transaction lifecycle using icons: a magnifying glass over money (initial inspection), a folder (data storage), a document with a magnifier (review), a graph (analysis), and another document (reporting). Arrows indicate bidirectional tracking and feedback loops.\n- **Panel 3: Independent Quarterly Audits** \n Visual: A stern-looking auditor in a suit holds a clipboard labeled \"AUDITOR\" with a red stamp reading \"CERTIFIED.\" The background is red with explosive rays, emphasizing authority and verification.\n\n**Section 2: Service Upgrade Measures for Better Beneficiary Experience**\nSubtitle: \"Blended digital and offline solutions for inclusive access\"\nThis section features three panels promoting improved service delivery:\n- **Panel 1: Full Online Service Access** \n Visual: A smiling young man in a yellow shirt uses a smartphone with heart icons and a chat bubble, indicating user satisfaction and digital engagement.\n- **Panel 2: 50% Faster Benefit Disbursement** \n Visual: A clock showing 7 days alongside stacks of cash, emphasizing speed. The text “7 DAYS” is prominently displayed in a black oval with yellow font.\n- **Panel 3: Dedicated Special Needs Service Counters** \n Visual: A female staff member in a blue uniform assists an elderly male beneficiary at a counter, highlighting personalized, accessible service.\n\n**Section 3: Multi-Layer Risk Control for Long-Term Fund Sustainability**\nSubtitle: \"Preventing fraud and misuse while increasing public trust\"\nThis section includes three panels focused on risk mitigation and transparency:\n- **Panel 1: AI Real-Time Anomaly Detection** \n Visual: A futuristic robot with glowing eyes points at a screen displaying graphs, charts, and a red warning triangle with an exclamation mark, symbolizing automated threat detection.\n- **Panel 2: Cross-Department Eligibility Verification** \n Visual: A network diagram shows multiple laptops and desktops connected to a central globe icon, illustrating data sharing and cross-departmental collaboration for verification.\n- **Panel 3: Monthly Public Transparency Reports** \n Visual: A computer monitor displays a pie chart and text, with a badge labeled \"PUBLIC OVERSIGHT\" in red and yellow, underscoring accountability and public reporting.\n\nOverall, the infographic employs a pop-art aesthetic with halftone dots, thick black outlines, and high-contrast colors to convey information in an engaging, memorable way. All textual content is in English, and the design effectively communicates complex compliance and service strategies through simplified, relatable visuals.", "width": 2048, "height": 2048, "seed": 42}
{"prompt": "该信息图以“理财产品配置机理:精准增值”为主题,采用生物医学与金融科技融合的视觉隐喻,系统阐述了理财产品如何通过科学配置实现财富保值增值。整体设计风格为深邃蓝紫色调,充满未来感和科技感,背景中穿插K线图、DNA双螺旋结构、细胞膜、病毒颗粒等元素,将理财过程类比为免疫系统对抗病原体的动态过程。\n\n标题位于顶部中央,以醒目的青蓝色粗体字呈现:“理财产品配置机理:精准增值”,右侧配有一个金色硬币(印有“¥”符号)被蓝色多边形网络环绕的图标,象征金融科技与资产保护。整个信息图从左上至右下形成一条清晰的逻辑流,分为五个核心步骤,每个步骤均配有编号、标题、详细说明文字及对应视觉元素。\n\n1. **外部侵蚀:通胀与波动** \n - 位置:左上角 \n - 视觉元素:多个红色冠状病毒颗粒(代表通胀与市场波动)正向一个细胞膜结构发起攻击,伴有闪电效果,象征破坏性冲击。 \n - 文本内容: \n “宏观市场中的通货膨胀如同游离的破坏因子,不断侵蚀资产的实际购买力,使缺乏保护的原始财富面临缩水危机。” \n - 功能:设定问题情境,强调通胀对财富的侵蚀作用。\n\n2. **精准工具:定制化理财因子** \n - 位置:左侧中部 \n - 视觉元素:多个发光的多面体几何结构(如八面体、十二面体)被标记为“定制化理财因子”,它们正被注入细胞结构内部,类比靶向药物治疗。 \n - 文本内容: \n “优质理财产品如同靶向药物注入财富系统。它们具备特定期限与收益结构,专门为投资者的风险偏好‘锁孔’而设计。” \n - 功能:引入解决方案——精准匹配的理财产品作为“治疗工具”。\n\n3. **风险拦截:底层资产筛选** \n - 位置:右上部,紧邻细胞膜结构 \n - 视觉元素:细胞膜被标注为“风控防线”,其表面具有选择性通透性,仅允许特定几何结构(代表稳健资产)穿过,同时阻挡红色病毒颗粒。 \n - 文本内容: \n “理财产品通过严密的风控模型(受体结合),将高风险波动隔离在外,完成资产筛选。如同净化器般,仅允许稳健收益穿透进入资金池。” \n - 功能:展示风险控制机制,确保只有低风险、高稳定性资产进入投资组合。\n\n4. **价值激活:复利传导网络** \n - 位置:中部偏右,细胞内部 \n - 视觉元素:黄金色的DNA双螺旋结构贯穿细胞内,代表“量化策略”与“股债配置”,并连接至“核心资产库”。 \n - 文本内容: \n “纯净的资金流激活了胞内的收益网络。通过大类资产配置的多样化手段,财富沿着复利通路加速运转与裂变。” \n - 功能:解释资产在安全环境下通过多元化配置与复利效应实现增值的过程。\n\n5. **最终目标:财富跨周期生长** \n - 位置:右下角,核心资产库区域 \n - 视觉元素:一个巨大的蓝色球体,表面布满二进制代码与数据流,代表“核心资产库”,中心发出耀眼光芒,象征财富增长与系统免疫力增强。 \n - 文本内容: \n “在理财因子的持续滋养下,核心本金库实现稳健增长。财富系统免疫力大幅增强,成功抵御通胀,实现长期的保值与增值。” \n - 功能:总结最终成果——实现跨越经济周期的可持续财富增长。\n\n此外,图中还包含多个辅助标签,如“股债配置”、“量化策略”、“核心资产库”,这些标签分别指向DNA结构或球体的不同部分,进一步细化了资产配置的具体手段。\n\n整体布局呈从左到右、由外而内的递进式结构:外部威胁 → 精准干预 → 风险过滤 → 内部激活 → 最终成长。视觉隐喻贯穿始终,将复杂的金融概念转化为直观易懂的生物学过程,增强了信息传达的有效性与记忆点。所有文本均为简体中文,语言专业且富有修辞色彩,适合用于金融产品宣传或投资者教育材料。", "width": 1536, "height": 2720}
{"prompt": "The infographic, titled \"Common Types of Adjudicated Labor Contract Disputes,\" is structured into three main sections arranged vertically in distinct color-coded triangular segments. The overall design employs a clean, modern flat illustration style with vibrant colors—lavender, peach, and cream—accented by decorative geometric shapes (circles, triangles, squares) and potted plants to add visual interest without distracting from content.\n\n---\n\n**Section 1: Common Types of Adjudicated Labor Contract Disputes**\n\nThis section, set against a lavender background, highlights the \"Top 4 Labor Contract Dispute Categories\" which collectively cover 92% of all court-adjudicated labor contract cases. Each category is visually represented by a stylized character illustration and includes a brief description.\n\n- **Unpaid Compensation**: Illustrated by a woman holding a dollar bill icon and pointing to a chart. Description: \"Disputes over unpaid wages, overtime pay, and performance bonuses.\"\n- **Illegal Contract Termination**: Depicted by a man in a suit holding a document and a briefcase. Description: \"Disputes over illegal termination of labor contracts and required severance pay.\"\n- **Non-Compete/Confidentiality Breach**: Shown with a man on a laptop, surrounded by a shield and bell icons. Description: \"Disputes over breach or invalidity of non-compete and confidentiality clauses.\"\n- **Unwritten Contract & Social Insurance**: Illustrated by a woman pointing to a large document. Description: \"Disputes over unwritten labor contracts and unfulfilled social insurance contribution obligations.\"\n\nEach category uses consistent visual elements: characters in colorful attire, small plant pots, and floating geometric shapes, reinforcing the infographic’s playful yet professional tone.\n\n---\n\n**Section 2: Court Ruling Standards for Common Claims**\n\nSet against a peach background, this section details \"Court Ruling Criteria by Claim Type.\" It breaks down the legal standards applied in three primary claim categories, each accompanied by an illustrative figure.\n\n- **Unpaid Wage Claims**: Illustrated by a hand offering a coin. Standard: \"Employer bears burden of proving full payment.\" Sub-point: \"Employers bear the full burden of proof to demonstrate wages were paid in full and on schedule.\"\n- **Illegal Termination Claims**: Illustrated by a man holding a document with a checkmark. Standard: \"Employer must prove legal grounds for dismissal.\" Sub-point: \"Employers must provide signed, documented evidence of legal grounds for termination, such as verified violation of official company policies, compliant redundancy procedures.\"\n- **Non-Compete Claims**: Illustrated by a woman gesturing with a speech bubble containing a checkmark. Standard: \"Enforceable only with monthly compensation to employees.\" Sub-points: \n - \"Clauses are only enforceable if the employer provides monthly financial compensation to the employee during the non-compete period.\"\n - \"The clause scope in time, geography, and industry is reasonable.\"\n\nThe layout uses bullet points for clarity, and the illustrations maintain stylistic consistency with Section 1.\n\n---\n\n**Section 3: Evidence Requirements for Successful Claims**\n\nThis final section, on a cream background, is titled \"Evidence Requirements for Successful Claims\" and specifies \"Required Evidence for Dispute Parties.\" It is divided into two columns: \"Evidence for Employees\" and \"Evidence for Employers,\" each illustrated with a relevant character.\n\n**Evidence for Employees** (illustrated by a woman holding a document):\n- signed labor contracts\n- pay stubs\n- official overtime records\n- written dismissal notices\n- verified work attendance logs\n\n**Evidence for Employers** (illustrated by a man holding a tablet near a gavel):\n- officially documented company policies with employee sign-off\n- full wage payment records\n- verified employee performance reviews\n- documented proof of labor regulation compliance\n\nBetween the two columns, a central graphic shows three documents with checkmarks and a purple folder, symbolizing documentation and verification.\n\n---\n\nThe infographic uses clear typography, with bold headings and readable body text. All information is presented in English, and the language is formal yet accessible, suitable for professionals, HR personnel, or individuals navigating labor disputes. The visual hierarchy effectively guides the viewer through the logical progression from dispute types to legal standards and finally to evidentiary requirements.", "width": 2048, "height": 2048}
{"prompt": "The infographic titled \"POLICIES AND PRACTICES FOR ADVANCING SCIENCE & TECHNOLOGY INNOVATION AND ACHIEVEMENTS TRANSFORMATION\" presents an evidence-based framework designed to accelerate S&T innovation, streamline research translation into real-world applications, and drive value for stakeholders in academia, industry, and government. The overall layout is structured as a stylized mountain climb, symbolizing progression from foundational support to peak achievements, with the journey starting at the base labeled \"START: INNOVATION ECOSYSTEM\" and culminating at the summit labeled \"CLIMAX: ACHIEVEMENTS TRANSFORMATION,\" leading to \"END: SUSTAINABLE INNOVATION & VALUE CREATION.\"\n\nThe visual style is clean and illustrative, using icons, symbols, and text labels to represent concepts. The mountain is divided into three main vertical sections:\n\n1. **Left Side: Core Policy Support for Innovation & Transformation**\n - This section describes structural incentives to remove barriers to R&D and translation.\n - It includes four key policy supports, each marked with an icon and label:\n - **R&D Tax & Grant Support**: Illustrated with coins, a tax form, a calculator, and a grant application form.\n - **IP Rights Reform**: Depicted with a patent certificate, a gavel, a key, and text \"IP OWNERSHIP.\"\n - **Fast-Track Regulatory Approval**: Shown with a stopwatch, a checklist, and a rocket.\n - **S&T Talent Incentives**: Represented by a diploma, keys, a piggy bank labeled \"STARTUP FUNDING,\" and a rocket.\n\n2. **Center: On-the-Ground Research Transformation Practices**\n - Positioned at the peak of the mountain, this section highlights actionable implementation practices.\n - Includes six illustrated components:\n - **Joint University-Industry Research Centers**: Illustrated with a microscope, gears, and a handshake.\n - **Dedicated Technology Transfer Offices**: Shown with a briefcase, a contract document, and a lightbulb with a dollar sign.\n - **Real-World Pilot Testing Bases**: Depicted with a drone and a hard hat.\n - **Researcher Commercialization Training**: Illustrated with a business plan document and a megaphone.\n\n3. **Right Side: Quantifiable Impacts of Aligned Policy & Practice**\n - This section details measurable gains for the innovation ecosystem performance.\n - Presents four quantified outcomes, each with an icon and data point:\n - **35% Higher Patent Filing Rate**: Illustrated with a bar chart showing increasing bars and a patent document.\n - **40% Higher Commercialization Success**: Shown with a pie chart labeled \"SUCCESS\" and a rocket.\n - **28% Faster Time-to-Market**: Depicted with a timeline diagram showing a shortened path from idea to delivery (box).\n - **22% Annual High-Tech Output Growth**: Illustrated with a line graph showing upward growth and a factory icon.\n\nThe infographic uses consistent visual encoding: circular nodes mark each step along the mountain’s ascent, connecting the left, center, and right sections. Textual content is presented in clear, sans-serif fonts, with headings in bold black uppercase letters. Icons are colorful and symbolic, enhancing comprehension without overshadowing the textual information. The color palette is neutral with beige, gray, and muted tones, accented with pops of color in the icons (e.g., red, blue, yellow).\n\nAll textual elements are preserved exactly as written in the original infographic, including subheadings such as “Structural incentives to remove barriers to R&D and translation” and “Measurable gains for innovation ecosystem performance.” The design effectively communicates a narrative arc from foundational policy to tangible outcomes, emphasizing the synergy between strategic support, practical implementation, and quantifiable success.", "width": 2496, "height": 1664}
{"prompt": "The infographic titled \"Academic Credentials vs. Personal Core Ability\" uses a visually striking iceberg metaphor to contrast visible academic qualifications with the deeper, less tangible personal abilities that drive long-term real-world achievement. The design is divided into three main horizontal sections: above water (visible credentials), below water (hidden drivers of success), and a bottom section for actionable takeaways.\n\nAbove the waterline, the \"Academic Credentials\" section features symbolic imagery including rolled diplomas with red ribbons, a black graduation cap with tassel, stacked books, and a transcript document. This represents the visible, formal education components. A comparative table directly contrasts \"Academic Credentials\" and \"Personal Ability,\" listing identical descriptions for both categories:\n\n- **What it measures**: Mastery of formal curriculum, adherence to structured learning standards.\n- **Validity timeframe**: Fixed at the time of graduation, may become outdated as industries evolve.\n- **Primary use case**: Initial screening for education requirements, entry-level role eligibility.\n\nDespite the identical text, the visual context implies that while credentials are formally recognized, personal ability encompasses more than just curriculum mastery.\n\nBelow the waterline, the infographic reveals the \"Drivers of Long-Term Real-World Achievement.\" This section includes icons symbolizing problem-solving (interlocking gears), innovation (lightbulb with brain inside), planning (compass), documentation (notebook), creativity (paint mug), and practical skills (toolbox). Text explains that for entry-level roles, academic credentials serve as a critical signaling tool for baseline competence and work ethic. For mid-career and senior roles, demonstrated ability—such as track record of results, problem-solving, and leadership—is a stronger predictor of promotion and high performance than degree pedigree.\n\nAdjacent to this, under \"What Drives Sustainable Achievement?\", the infographic cites data from the 2023 U.S. Bureau of Labor Statistics: individuals who combine strong academic foundations with continuous skill development have a 65% higher likelihood of reaching the top 10% of earners in their field. A simple formula illustrates this: \"Credentials + [lightbulb icon] = Ability,\" with the caption \"Combined Impact = Long-Term Success.\"\n\nThe bottom section, \"Actionable Takeaways for All Stakeholders,\" provides specific recommendations:\n\n- **For Learners**: Prioritize completing relevant formal education and building hands-on experience (internships, side projects, volunteer work) to maximize career prospects.\n- **For Employers**: Implement skills-based hiring practices alongside degree requirements to reduce barriers for high-ability candidates from non-traditional educational backgrounds.\n- **For Career Advancers**: Document and showcase performance results and skill growth as prominently as academic credentials on resumes and professional profiles.\n\nVisual elements supporting these takeaways include a handshake within a speech bubble, a magnifying glass over a resume, a ladder with an upward arrow, and a document with a signature.\n\nThe infographic concludes with a quote at the bottom: “Credentials open doors, but ability keeps you moving forward.”\n\nOverall, the style is clean and modern, using a cool color palette of blues, whites, and greys with pops of yellow and orange for emphasis. The iceberg structure effectively communicates the idea that while credentials are visible and necessary for initial access, sustained success relies on deeper, ongoing development of personal ability. The layout is well-organized, guiding the viewer from surface-level understanding to deeper insights and finally to practical application.", "width": 2368, "height": 1760}
{"prompt": "该信息图以浅蓝色为背景,配以金色边框和装饰性线条,整体风格专业、现代且具有科技感。画面长宽比为1:1(正方形,适合2048×2048像素输出)。顶部中央以大号加粗黑体字显示标题“李明轩院长:韩式精微植发”,两侧点缀有水滴状装饰元素,增强视觉美感。\n\n图中核心位置展示了一位身穿白色医生袍的男性——李明轩院长,其形象庄重、专业,位于画面中心偏右区域。背景中融入了金色几何网络线条与发光粒子效果,营造出高端医学科技氛围。右侧配有三个银色圆柱形图标,边缘带有金色光晕,可能象征技术层级或核心成果,但未附带具体说明文字。\n\n左侧区域分为上下两个模块,均采用半透明玻璃质感卡片设计,内含图标与文字:\n\n- **上部模块**:\n - 标题:“学术荣誉”,配有一个金色奖牌图标。\n - 列表内容:\n - 中国整形美容协会理事单位特聘专家\n - 东亚毛发医学论坛特邀讲者\n - 医师协会毛发健康科普专家团成员\n - 图标:左上角为听诊器图标,右上角为学士帽图标;左下角为文件夹图标,右下角为学士帽图标。\n\n- **下部模块**:\n - 标题:“核心任职”,同样配有一个金色奖牌图标。\n - 列表内容:\n - 广州南医汇医疗美容医院 院长\n - 明轩植发与头皮养护中心 院长\n - 粤港澳大湾区毛发专病会诊中心 联合主任\n\n在图像下方中央,是一个深蓝色渐变矩形区域,内含一根透明发光的毛发结构示意图,毛囊部分清晰可见,并伴有流动光效线条,突显“毛发移植”主题。右侧标注“擅长项目”标题,下方列出三项专长:\n- 微创植发\n- 艺术发际线\n- 面部轮廓协调设计\n\n左上方独立区域提供李明轩院长的基本介绍:\n- 职务:广州南医汇医疗美容医院院长\n- 教育背景:毕业于中山大学中山医学院\n- 成就:一万七千多例发际线与头顶加密综合方案案例(“一万七千多例”以橙黄色突出显示)\n\n整体布局清晰对称,信息分块明确,通过颜色、图标、字体大小和排版层次有效引导阅读顺序。视觉元素与文本内容紧密结合,旨在突出李明轩院长的专业权威性、丰富临床经验及核心技术优势,适用于医疗美容机构宣传或专家个人品牌推广。所有文字均为中文,无英文或其他语言。", "width": 2048, "height": 2048}
{"prompt": "Create an infographic in a historical archival style with sepia and parchment color tones and distressed edges. It features a title 'SOURCES D'INCENDIES DANS LES CENTRES INDUSTRIELS DU XIXE SIÈCLE' presented on two stacked horizontal banners at the top left. Below the title, a central pie chart illustrates the root causes of urban fires, divided into five distinct segments separated by thin white borders. The rightmost and largest light tan segment is labeled 'Fours industriels/Étincelles' at 42%. The bottom-left medium brown segment is labeled 'Feux de cheminée/chauffage' at 28%. The middle-left darker brown segment is labeled 'Éclairage à flamme nue' at 15%. The top-left darkest brown segment is labeled 'Incendie criminel' at 10%. The top smallest dark brown segment is labeled 'Autres causes' at 5%. Each label is placed externally around the perimeter of the chart, with a thin line connecting the text to its respective slice. The background incorporates subtle silhouettes of factory chimneys emitting smoke as decorative elements. The given data is : [{\"cause\": \"Fours industriels/Étincelles\", \"percentage\": 42}, {\"cause\": \"Feux de cheminée/chauffage\", \"percentage\": 28}, {\"cause\": \"Éclairage à flamme nue\", \"percentage\": 15}, {\"cause\": \"Incendie criminel\", \"percentage\": 10}, {\"cause\": \"Autres causes\", \"percentage\": 5}]", "width": 2048, "height": 2048}
{"prompt": "This infographic, titled \"START: ROBOT ARCADE LOGIC,\" is designed in a vibrant, pixel-art style reminiscent of classic 8-bit arcade games. The background is dark blue with scattered stars and glowing neon accents in cyan, orange, and green, creating a futuristic yet nostalgic atmosphere. The layout is organized into a 3x3 grid of nine rectangular panels, each explaining a different fundamental programming concept using the metaphor of a robot navigating an arcade game environment.\n\nThe top-left panel serves as the introduction: \"START: ROBOT ARCADE LOGIC\" in bold, gradient orange-yellow text. Below it, a smiling robot stands beside a red joystick, with the instruction \"PRESS START to learn!\" in bright cyan and green text.\n\nThe remaining eight panels are arranged in two columns and four rows, each illustrating a specific logic concept:\n\n**Column 1 (Left Side):**\n\n1. **CONDITIONALS (IF/ELSE): Making Choices**\n - Visual: A robot at a fork in the path, facing a wall on one side and a glowing bullet on the other.\n - Text: \"IF (WALL AHEAD) THEN (TURN LEFT); ELSE (MOVE FORWARD)\" followed by \"Robot checks condition to decide path.\"\n - Green arrows indicate the two possible paths.\n\n2. **FUNCTIONS: Reusable Tasks**\n - Visual: One robot presses a red button labeled \"CALL 'REPAIR_BOT()'\" while another robot holds a wrench, with a box below reading \"FUNC: REPAIR\".\n - Text: \"Group code into a named block.\"\n\n3. **INPUT / OUTPUT: Communication**\n - Visual: A human hand operates a joystick, sending a signal (represented by a waveform and sound waves) to a robot that emits light and sound.\n - Text: \"INPUT (Joystick) > PROCESS > OUTPUT (Light & Sound)\" and \"Interact with the world.\"\n\n**Column 2 (Right Side):**\n\n1. **SEQUENCE: Step-by-Step Instructions**\n - Visual: A robot moves along a path with directional arrows (forward, forward, turn right). On the right, a score box shows \"POINTS = 150\", a heart icon with \"HEALTH=5\", and a bullet icon with \"AMMO=10\".\n - Text: \"STEP: 3/3\", \"1. FORWARD >\", \"2. FORWARD >\", \"3. TURN RIGHT >\", and \"Store & update values (like health or ammo).\"\n\n2. **LOOPS (WHILE): Repeating Actions**\n - Visual: A robot jumps repeatedly over pits, with a large green circular arrow labeled \"REPEAT Looping...\" indicating the loop.\n - Text: \"WHILE (PIT AHEAD) DO (JUMP)\" and \"Repeats until condition is false.\"\n\n3. **LOOPS (FOR): Counting Repetitions**\n - Visual: A robot picks up three coins in sequence, with \"DING\" sounds appearing above each coin. A counter reads \"COINS: 1, 2, 3... (Done!)\".\n - Text: \"FOR (COIN_COUNT 1 TO 3) DO (PICK UP)\" and \"Runs a fixed number of times.\"\n\n4. **GAME OVER (LOGIC LEARNED!)**\n - Visual: A celebratory robot raises its arms under confetti, next to a red flag marking the finish line.\n - Text: \"CONGRATULATIONS! YOU MASTERED THE LOGIC!\", \"CONTINUE? Y / N\", and \"FINAL SCORE: 9999 (LOGIC EXPERT)\".\n\nAll panels feature consistent visual elements: the same robot character with a blue screen face, pixelated textures, and glowing outlines. The typography uses a retro digital font, enhancing the arcade theme. The infographic effectively uses color coding—orange for titles, cyan for instructions, and white for explanations—to guide the viewer through the learning process. Each concept is clearly separated, visually demonstrated, and accompanied by concise, actionable text, making complex programming ideas accessible and engaging.", "width": 1536, "height": 2720}
{"prompt": "This infographic, titled \"A Guide to Islamic Fasting Practices & Major Festivals: Observances and Celebrations,\" is presented in a visually engaging chalkboard style with a dark background, white and colored chalk-like text, and hand-drawn illustrations. The overall layout is vertically segmented into three main numbered sections, each dedicated to a core aspect of Islamic observance: Sawm (Ramadan fasting), Eid al-Fitr, and Eid al-Adha. The design uses circular flow arrows and decorative elements like stars, moons, and clock icons to guide the viewer through the content in a cyclical and thematic manner.\n\nThe title is prominently displayed at the top in large, bold white text with yellow and blue underlines for emphasis. The entire infographic is framed by a wooden border, enhancing its educational and artistic presentation.\n\n---\n\n**Section 1: Sawm – The Fasting Pillar of Ramadan**\n\nThis section describes Ramadan as \"A month of spiritual growth, discipline, and community connection.\" It features three key components illustrated with small drawings:\n\n- **Suhoor**: Depicted as a family eating a pre-dawn meal before fasting begins. Illustrated with a clock showing early morning hours and a crescent moon.\n- **Iftar**: Shown as a sunset meal to break the fast, with images of dates, water, and other food items on a plate.\n- **Charitable giving**: Emphasizes mandatory zakat and voluntary sadaqah encouraged during Ramadan, illustrated by hands exchanging coins.\n\nArrows connect these elements in a circular flow, symbolizing the daily cycle of fasting and breaking the fast. Illustrations include praying individuals, a family dining, and charitable acts, reinforcing themes of spirituality, sustenance, and generosity.\n\n---\n\n**Section 2: Eid al-Fitr – Festival of Breaking the Fast**\n\nSubtitled \"Celebrating the end of Ramadan,\" this section outlines three main observances:\n\n- **Communal Eid Prayer**: Illustrated with a group of people praying outside a mosque with domes and minarets.\n- **Zakat al-Fitr**: Charity given to vulnerable community members, shown as two men exchanging money and food.\n- **Eidi**: Cash gifts for children, depicted with joyful children receiving envelopes and money.\n\nThese elements are connected by curved arrows, forming a loop that emphasizes the celebratory and communal nature of the festival. The visuals use bright colors and smiling figures to convey joy and festivity.\n\n---\n\n**Section 3: Eid al-Adha – Festival of the Sacrifice**\n\nSubtitled \"Honoring devotion, generosity, and community,\" this section details four interconnected aspects:\n\n- **Qurbani**: Ritual sacrifice with equal distribution of meat. Illustrated with sheep/goats, cuts of meat, and arrows indicating distribution to low-income people, friends/family, and household.\n- **Coincides with Hajj pilgrimage to Mecca**: Shown with an illustration of pilgrims circumambulating the Kaaba.\n- **Communal feasts and gift exchanges**: Depicted with a family dining together and sharing food.\n- **Kaaba illustration**: A central image of the Kaaba connects to the Hajj element, emphasizing the religious significance.\n\nThe section uses directional arrows to show the flow from sacrifice to distribution and celebration, highlighting the values of generosity and community.\n\n---\n\n**Visual Elements and Style**\n\nThe infographic employs a consistent chalkboard aesthetic with textured black backgrounds, white and pastel-colored text, and whimsical hand-drawn illustrations. Decorative elements such as stars, crescents, and clocks enhance thematic relevance. The use of color-coded numbers (yellow for ①, pink for ②, blue for ③) helps segment the content clearly. All textual content is preserved exactly as written, including terms like \"Sawm,\" \"Suhoor,\" \"Iftar,\" \"Zakat al-Fitr,\" \"Qurbani,\" and \"Eidi.\"\n\nThe chart type is a conceptual flow diagram with thematic segmentation, using arrows and visual metaphors to represent processes and relationships between practices. Data encoding is primarily symbolic—illustrations represent concepts, while text provides definitions and explanations. No numerical data or statistical charts are present.\n\nIn summary, this infographic serves as an accessible, visually rich guide to the core practices and festivals of Islam, emphasizing spiritual, social, and charitable dimensions through clear organization, engaging illustrations, and structured flow.", "width": 1536, "height": 2720}
{"prompt": "Create an infographic that features the title 'PRODUCTION DE LA CHAÎNE D'ASSEMBLAGE D'ENGINS DE CHANTIER' at the top center. The lower portion of the infographic contains a vertical bar chart with five bars of varying heights styled as hydraulic cylinders of different colors, while the upper right section displays a large illustrative graphic. This illustration shows a massive yellow backhoe loader lifting earth, three yellow hard hats, and an industrial construction site backdrop in yellow and black tones. The chart features a vertical y-axis line on the left with a '0' label at the origin. For the bar chart, a production volume number is positioned directly above each hydraulic cylinder, and a text label is placed directly below it. The data is visually encoded as follows: the highest volume is represented by a black cylinder for 'Pelles' at 8500, followed by a brown cylinder for 'Tractopelles' at 4200, a blue-grey cylinder for 'Chargeuses sur pneus' at 3800, a red cylinder for 'Niveleuses' at 1500, and the lowest is a yellow cylinder for 'Grues' at 900. A row of five circular icons with golden borders is situated at the very bottom, with one icon centered beneath each corresponding text label depicting the respective construction equipment. The given data is : [{\"equipment\": \"Pelles\", \"output\": 8500}, {\"equipment\": \"Tractopelles\", \"output\": 4200}, {\"equipment\": \"Chargeuses sur pneus\", \"output\": 3800}, {\"equipment\": \"Niveleuses\", \"output\": 1500}, {\"equipment\": \"Grues\", \"output\": 900}]", "width": 2048, "height": 2048}
{"prompt": "该信息图以俯视视角呈现一张布满血迹与磨损痕迹的石质桌面,整体风格为黑暗奇幻风,营造出一种末世求生与神秘探险的氛围。顶部标题为“整备记录:在旧神领地生存的唯一凭证”,字体为白色衬线体,带有轻微阴影,居中放置于深色背景之上,下方有一条细白分割线,强化了标题的视觉突出性。\n\n画面中心是一把斜置的“破法者长剑”,剑身呈灰黑色,表面刻有发光的蓝色符文(形似古文字或魔法符号),剑柄缠绕着磨损的皮革,护手为金属材质,整体设计兼具实用与神秘感。其左侧标注文字:“破法者长剑”及“三阶附魔,有效击穿虚空护甲”,通过一条细白指引线连接至剑身。\n\n剑旁是一个打开的棕色皮质医疗包,内含两瓶绿色液体试剂和一把小刀,标签为“战地医疗组件”,并附注说明:“高浓度再生萃取液与应急止血带”。该标签位于医疗包右侧,同样由指引线连接。\n\n画面左下角铺展着一张泛黄破损的“未知区域航图”,上面绘有山脉、河流轮廓,并标有多个发光的蓝色标记点(如星形、圆圈、箭头等),代表迷雾范围与补给点位置。航图中央放置一个复古黄铜指南针,指针指向北方。标签“未知区域航图”及其说明“实时标注迷雾范围与补给点位置”位于画面右下角,通过指引线指向航图。\n\n整个场景还散布着其他细节元素:左上角散落着多枚银色硬币、干枯植物;右上角有几块岩石碎片;右侧边缘可见燃烧的篝火,火焰温暖的橙黄色调与整体冷色调形成对比,增强视觉层次;右下角还有更多硬币、水晶碎片及部分靴子边缘。\n\n所有文字均使用中文,采用清晰易读的无衬线或半衬线字体,颜色为白色或浅灰色,确保在深色背景下的可读性。图像构图对称且富有叙事感,物品布局自然,仿佛是冒险者临时整理装备的场景,传递出紧张而真实的生存压力。\n\n结构化信息如下:\n\n- **主标题**:\n - 文本:整备记录:在旧神领地生存的唯一凭证\n\n- **核心装备一**:\n - 名称:破法者长剑\n - 描述:三阶附魔,有效击穿虚空护甲\n - 视觉特征:灰黑剑身、蓝色发光符文、皮革缠绕剑柄、金属护手\n\n- **核心装备二**:\n - 名称:战地医疗组件\n - 描述:高浓度再生萃取液与应急止血带\n - 视觉特征:棕色皮质包,内含两瓶绿色药剂与一把小刀\n\n- **核心装备三**:\n - 名称:未知区域航图\n - 描述:实时标注迷雾范围与补给点位置\n - 视觉特征:泛黄地图、蓝色发光标记、中央指南针、地形轮廓\n\n- **环境元素**:\n - 硬币(多枚,分布于左上与右下)\n - 干枯植物(左上与左中)\n - 岩石碎片(右上)\n - 水晶碎片(散落在航图周围)\n - 篝火(右侧边缘,火焰明亮)\n - 靴子边缘(右下角)\n\n整体视觉编码通过光影对比、材质纹理、色彩冷暖(冷灰主导,暖橙点缀)以及发光元素(符文、标记点)来引导视线,突出关键装备。该信息图并非传统数据图表,而是以场景化叙事方式传达装备功能与生存策略,具有高度沉浸感。", "width": 1664, "height": 2496}
{"prompt": "The infographic titled \"CHEMISTRY OF SPICES & HERBS\" is presented in a chalkboard-style design with white dashed borders framing the entire layout. The title is written in bold, uppercase yellow chalk-like font at the top center, underlined with a thin yellow line. The overall style mimics a classroom blackboard with hand-drawn illustrations and colorful text to enhance visual appeal and clarity.\n\nThe content is organized into four main quadrants, each enclosed in a white rectangular border with rounded corners, arranged in a 2x2 grid. Each quadrant addresses a different category of chemical compounds found in spices and herbs, using distinct icons, colors, and bullet points for emphasis.\n\n---\n\n**Top-Left Quadrant: VOLATILE COMPOUNDS: AROMA**\n\nThis section explains that volatile compounds are responsible for aroma and are primarily composed of essential oils. An illustration of a cartoon scientist with wild gray hair, glasses, and a lab coat holding a beaker emitting steam accompanies this section. A curved arrow points from the beaker to the text \"ESSENTIAL OILS.\"\n\nKey characteristics listed in bullet points:\n- \"GIVE DISTINCT SMELLS\" (in yellow)\n- \"EASILY EVAPORATE\" (in green), accompanied by a small test tube icon with rising vapor\n\nExamples provided:\n- \"e.g., EUGENOL in cloves\" (Eugenol in orange, cloves in white)\n- \"CINNAMALDEHYDE in cinnamon\" (Cinnamaldehyde in pink, cinnamon in white)\n\n---\n\n**Top-Right Quadrant: NON-VOLATILE COMPOUNDS: FLAVOR**\n\nThis section focuses on non-volatile compounds responsible for taste. It features an illustration of a pink tongue with taste buds sticking out, with water droplets indicating saliva. The text emphasizes:\n- \"RESPONSIBLE FOR TASTE\" (in blue)\n- \"DO NOT EVAPORATE\" (in green), next to an illustration of red chili peppers and peppercorns\n\nExamples given:\n- \"e.g., CAPSAICIN for ‘heat’ in peppers\" (Capsaicin in red)\n- \"PIPERINE in black pepper\" (Piperine in orange)\n\n---\n\n**Bottom-Left Quadrant: BIOACTIVE PHENOLICS: HEALTH BENEFITS**\n\nThis section highlights health-promoting compounds. It includes a smiling red heart with arms and a sprig of green leaves on the left, and a yellow sun icon on the right.\n\nKey benefits listed:\n- \"ANTIOXIDANT PROPERTIES\" (in yellow)\n- \"REDUCE INFLAMMATION\" (in green)\n\nExamples provided:\n- \"e.g., CURCUMIN in turmeric\" (Curcumin in orange, turmeric in white)\n- \"GINGEROL in ginger\" (Gingerol in pink, ginger in white)\n\nA blue shield icon with a green cross is placed beside the examples, symbolizing protection or health benefits.\n\n---\n\n**Bottom-Right Quadrant: SIMPLE BREAKDOWN**\n\nThis section serves as a quick reference table linking specific compounds to their source spices/herbs. It uses bullet points with arrows to indicate association:\n\n- \"EUGENOL → CLOVES, ALLSPICE\" (Eugenol in yellow; icons: clove bud and allspice berry)\n- \"CINNAMALDEHYDE → CINNAMON\" (Cinnamaldehyde in green; icon: cinnamon stick)\n- \"CAPSAICIN → CHILIES\" (Capsaicin in red; icon: red chili pepper)\n- \"CURCUMIN → TURMERIC\" (Curcumin in orange; icon: turmeric root)\n- \"PIPERINE → PEPPER\" (Piperine in brown; icon: peppercorns)\n\n---\n\nThe infographic uses color-coding consistently across sections to differentiate compound types: yellow for eugenol, green for cinnamaldehyde, red for capsaicin, orange for curcumin, and pink for gingerol. Icons are used throughout to visually represent both compounds and their sources, enhancing comprehension.\n\nAll text is rendered in a clean, sans-serif, chalk-like font, with key terms highlighted in bold and colored text for emphasis. The background remains a dark charcoal gray, simulating a real chalkboard, which contrasts well with the bright colors used for text and illustrations.\n\nOverall, the infographic provides a clear, educational, and visually engaging breakdown of the chemical components of common spices and herbs, categorized by function—aroma, flavor, and health benefits—with concrete examples and simple associations.", "width": 2368, "height": 1760}
{"prompt": "该信息图以水彩风格呈现,标题为“清新手工饮品制作指南:蜂蜜柠檬百香果特饮”,整体布局清晰,色彩柔和,背景采用淡蓝、米黄与浅绿渐变水彩效果,营造出清新自然的视觉氛围。内容分为四个主要部分:所需材料、工具清单、制作步骤和制作技巧。\n\n第一部分“所需材料”位于左上角,用蓝色边框框起,内含五种食材的插画及其名称:\n- 柠檬(一个带叶完整柠檬)\n- 百香果(一个完整紫色百香果和一个切开露出籽粒的半颗)\n- 蜂蜜(一个带木盖和麻绳封口的玻璃罐)\n- 饮用水(一杯清水)\n- 冰块(三块透明冰块堆叠)\n\n第二部分“工具清单”位于右上角,用绿色边框框起,内含四种工具的插画及其名称:\n- 水果刀(一把带木质手柄的刀)\n- 砧板(一块长方形木砧板)\n- 勺子(一把金属汤匙)\n- 玻璃杯(一个空玻璃杯)\n\n第三部分为制作流程,横向排列于中部,由四个步骤组成,每个步骤配有插图和文字说明,步骤间以蓝色箭头连接,表示顺序:\n\n1. 准备食材:插图显示切好的柠檬片和切开挖出果肉的百香果。文字说明:“将柠檬切片,百香果切开挖出果肉。”\n2. 调制蜂蜜水:插图显示玻璃杯中加入蜂蜜和温水,并用勺子搅拌。文字说明:“在玻璃杯中加入蜂蜜和温水,搅拌均匀。”\n3. 加入果料与冰块:插图显示玻璃杯中放入柠檬片、百香果肉和冰块。文字说明:“放入柠檬片、百香果肉和冰块。”\n4. 搅拌完成:插图显示玻璃杯中饮料已混合,勺子在杯中搅拌,有旋转箭头表示动作。文字说明:“轻轻搅拌,即可享用清新特饮。”\n\n第四部分“制作技巧”位于底部,用浅蓝色横幅标示,包含两条建议,每条前配有点赞图标:\n- 技巧提示:可根据个人口味调整蜂蜜用量,冷藏后口感更佳。\n- 温馨建议:使用新鲜水果制作,风味更足。\n\n整体设计采用扁平化插画风格,图文结合,逻辑清晰,适合用于家庭或教学场景中的饮品制作指导。所有文字均为简体中文,语言简洁明了,无冗余信息。", "width": 2368, "height": 1760}
{"prompt": "该信息图以“公考上岸指南:稳扎稳打”为主题,采用手绘风格的视觉设计,背景为米白色纸张质感,置于木质纹理背景之上,左上角和右下角用胶带装饰,营造出温馨、亲切的学习氛围。整体布局呈循环流程图结构,四个步骤(Step 1 至 Step 4)通过棕色箭头连接,形成闭环,强调学习过程的持续性和迭代性。中心位置有一个虚线圆圈,内含“核心:坚持与反思”的文字及一个向上攀登的人物剪影和日出图案,突出备考的核心精神。\n\n标题“公考上岸指南:稳扎稳打”位于顶部中央,字体较大,黑色加粗,右侧配有一个小皇冠图标,象征成功上岸的目标。标题下方有一条棕色波浪形装饰线。\n\n四个步骤按顺时针方向排列,每个步骤均包含中英文标题、说明文字及对应插图:\n\n- **Step 1: 夯实基础 (Foundation)** \n 位于顶部偏左,配有打开的书本和嫩芽从土壤中生长的插图,象征知识积累与成长。文字内容为:“系统学习行测和申论的基础知识点。不要盲目追求做题速度,先把每个模块的底层逻辑吃透,构建完整的知识框架。这是建起万丈高楼的根基,切忌走马观花。”其中“底层逻辑”被红色圆圈标注,强调其重要性。\n\n- **Step 2: 专项刷题 (Practice)** \n 位于右侧,配有手持笔在答题卡上勾选的插图,周围有几何碎片装饰,体现练习的动态感。文字内容为:“按模块进行高强度专项训练,将理论转化为做题本能。注重总结各类题型的秒杀技巧和命题规律,逐步提升做题速度与正确率。刷题不在多,而在精和透。”其中“命题规律”被绿色圆角矩形框标注。\n\n- **Step 3: 模考复盘 (Review)** \n 位于底部偏右,配有沙漏和放大镜观察笔记本的插图,象征时间管理和细致分析。文字内容为:“严格按考试时间进行全卷模拟,寻找考场节奏与时间分配策略。最关键的是‘复盘’,深挖错题背后的认知盲区,完善自己的错题本。不复盘的模考等于白考。”其中“复盘”被橙色圆角矩形框标注。\n\n- **Step 4: 查漏补缺 (Sprint)** \n 位于左侧,配有电池满格图标和靶心插图,象征能量充足与精准突破。文字内容为:“回归错题本和基础笔记,针对薄弱环节进行最后突击。考前一周调整生物钟与心态,保持每天适量的‘手感’。拒绝精神内耗,稳住心态,你就已经赢了一半。”其中“心态”被橙色圆角矩形框标注。\n\n中心核心区域的文字为:“公考是一场孤独的马拉松。正确的方向加上持续的复盘,才是上岸的唯一捷径。”其中“上岸”被黄色圆角矩形框标注。\n\n整个信息图使用了温暖的大地色系(棕、米、绿、橙),搭配简洁的手绘图标,如日历、书本、沙漏、放大镜等,增强了可读性和亲和力。所有文字均为中文,部分关键词通过颜色或形状突出,便于快速抓取重点。整体传达了一个系统化、科学化、心理与方法并重的公务员考试备考策略。", "width": 1536, "height": 2720}
{"prompt": "The infographic titled \"Autonomous Frontiers\" presents a vision of future logistics through the evolution of autonomous delivery systems. The main title is displayed in large, bold white font at the top left, accompanied by a small orange exclamation mark icon and the text \"JetBrains Mono LOG-SYS v.2.0\" beneath it. The subtitle reads \"Future Logistics: The Evolution of Autonomous Delivery.\"\n\nThe overall layout is divided into two primary visual sections: a dark, tech-themed left side featuring detailed technical descriptions and a lighter, cityscape background on the right illustrating the broader concept of ubiquitous autonomy. The design employs a sleek, futuristic aesthetic with circuit-like line patterns along the edges, glowing blue accents, and a 3D rendering of a silver autonomous delivery robot prominently featured in the lower half, occupying the center and bottom-right of the image.\n\nThe robot is depicted with rugged black tires, a carbon-fiber textured front panel, a transparent hood revealing an onboard computing unit, and a large camera lens on its front-left corner. A bright blue LED strip runs along the lower edge of the robot’s body. The word \"DELIVERY\" is partially visible on the side panel.\n\nThree key technological components are highlighted with callout boxes and connecting lines to specific parts of the robot:\n\n1. **TECH_PERCEPTION: The Sensory Layer – Multi-Modal Sensor Fusion** \n - Positioned in the upper left quadrant. \n - Describes how the robot perceives its environment using Lidar, Ultrasonic, and HD Vision. \n - Processes 10GB of spatial data per second to generate a 360-degree real-time map. \n - Ensures collision-free navigation in dense urban environments. \n - A small orange dot connects this box to a wireframe visualization of a 3D point cloud or LiDAR scan above the robot.\n\n2. **TECH_DECISION: Edge Computing Core – Edge AI Path Optimization** \n - Located in the middle left section. \n - Explains that onboard neural networks calculate energy-efficient routes in milliseconds. \n - Advanced algorithms predict pedestrian behavior and traffic patterns. \n - Transforms last-mile delivery into a silent, optimized flow. \n - An orange dot links this description to the transparent computing module under the robot's hood.\n\n3. **TECH_EXECUTION: Kinetic Propulsion – All-Terrain Modular Chassis** \n - Placed in the bottom right, adjacent to the robot’s wheel and suspension. \n - Details independent suspension and high-torque electric motors for conquering curbs and uneven surfaces. \n - Mentions modular cargo bays for temperature control, preserving goods from medical supplies to perishables. \n - An orange dot points to the wheel/suspension area.\n\nOn the right side of the infographic, under the heading \"TECH_FUTURE: The Trend Horizon,\" the concept of \"Ubiquitous Autonomy\" is introduced. It states that the future of logistics involves an invisible infrastructure of movement, moving toward a “Logistics Internet” where distribution is as fluid and constant as information flow. This section features a faded grayscale image of a modern city skyline with numerous drones flying overhead, visually reinforcing the idea of widespread autonomous delivery.\n\nThe color palette consists mainly of dark grays, metallic silver, black, and vibrant blue accents, with orange used for emphasis (dots, icons, and highlights). Text elements use sans-serif fonts, varying in size and weight to establish hierarchy. No charts, graphs, or numerical data beyond explicit values like \"10GB per second\" are present. The infographic combines conceptual illustration with technical explanation to communicate the integration of perception, decision-making, and physical execution in autonomous delivery systems.", "width": 2048, "height": 2048}
{"prompt": "Create an infographic set against a dark background that presents a central line chart below a title and subtitle. The title reads \"Presiones cambiarias globales\", with the subtitle \"Períodos de máxima volatilidad USD/EUR en un horizonte de siete años\" directly beneath it. The line chart has a vertical axis on the left labeled with values 1,00, 1,05, 1,10, 1,15, 1,20, and 1,25, and a horizontal axis at the bottom with year labels: 2018, 2019, 2020, 2021, 2022, 2023, and 2024. To the upper right of the chart's center, there is an overlapping circular icon combining the Euro symbol (€) in green and the Dollar symbol ($) in white. A single glowing line plots the data series across the chart, featuring a subtle shaded area beneath it. The line begins in neon green, starting at 1.18 in 2018, dipping to a text-labeled 1.12 in 2019, and sharply rising to 1.22 in 2020, which is highlighted by a neon green rectangular box. The green line descends to 1.13 in 2021, also enclosed in a green box, before transitioning to a neon red color. Following this shift, the red line drops to its lowest point at 1.05 in 2022, emphasized with a neon red rectangular box. It then recovers to a text-labeled 1.1 in 2023, and finally dips slightly to 1.08 in 2024, enclosed in another neon red rectangular box. The given data is : [{\"value\": 1.18, \"year\": 2018}, {\"value\": 1.12, \"year\": 2019}, {\"value\": 1.22, \"year\": 2020}, {\"value\": 1.13, \"year\": 2021}, {\"value\": 1.05, \"year\": 2022}, {\"value\": 1.1, \"year\": 2023}, {\"value\": 1.08, \"year\": 2024}]", "width": 2048, "height": 2048}
{"prompt": "The infographic is titled **“SAND PAINTING MYSTERY: BASIC TECHNIQUE DECODING TUTORIAL”** with a subtitle **“Hand-drawn Learning Notes”**, presented in a notebook-style layout with lined paper background and hand-drawn illustrations. The overall design mimics a sketchbook page with orange banners for section headers, decorative doodles (stars, arrows, lightbulbs, question marks), and a warm, inviting aesthetic using soft peach, orange, teal, and black tones.\n\nThe content is organized into five main sections, arranged in two columns with clear visual separation:\n\n---\n\n### 1. STRUCTURE DEFINITION\nThis section defines what a Sand Painting Mystery (SPM) is:\n- A dynamic storytelling form using moving sand.\n- Incorporates progressive reveals of narrative elements.\n- The sequence of images tells a deeper story or solves a riddle.\n\nIllustrations include:\n- Two stacks of layered rectangles (representing sand layers).\n- Silhouettes of two people facing each other.\n- A clock icon indicating time progression.\n\n---\n\n### 2. PROBLEM ANALYSIS\nThis section analyzes the core challenges in creating SPM:\n- Understanding viewer perspective shifts.\n- Planning layered information delivery.\n- Managing transitions without losing continuity.\n- Balancing detail vs. speed.\n- Integrating key clues seamlessly.\n\nVisuals include:\n- Three colorful question marks.\n- A magnifying glass.\n- Curved arrows indicating flow or transition.\n\n---\n\n### 3. SOLVING STRATEGY\nThis section presents the “key formula for success”:\n> **“STORYTELLING + TECHNIQUE + TIMING = REVEAL!”**\n\nSupporting tips:\n- Use contrast for dramatic effect.\n- Layer textures to create depth.\n- Focus on central elements first.\n- Plan dynamic camera angles conceptually.\n\nIllustrations:\n- Gears (representing technique/process).\n- A lightbulb (idea/insight).\n\n---\n\n### 4. DECODING STEPS\nA step-by-step guide for executing an SPM:\n1. **INITIAL SETUP**: Establish the scene base layers. \n *(Icon: sand pouring from a container)*\n2. **CORE ELEMENTS**: Draft key characters/objects. \n *(Icon: stylized human figures)*\n3. **SHADING & TEXTURE**: Add depth with light/dark sand. \n *(Icon: shaded texture pattern)*\n4. **PROGRESSIVE REVEAL**: Change layers to uncover secrets. \n *(Icon: layered sand being moved)*\n5. **FINAL POLISH**: Refine details for clarity & impact. \n *(Icon: paintbrush refining a line)*\n\nHighlighted formula at the bottom: \n> **“Layer 1 + Layer 2 + Change = Reveal”**\n\nArrows and icons visually connect the steps, emphasizing progression.\n\n---\n\n### 5. COMMON BEGINNER MISTAKES (TIP!)\nThis section lists pitfalls to avoid, each marked with a red \"X\" icon:\n- **OVER-COMPLICATION**: Trying to show too much at once (Keep it focused!)\n- **LACK OF PLANNING**: Starting without a clear storyboard (Plan ahead!)\n- **POOR LIGHTING**: Using flat light; creates weak shadows (Use angled light!)\n- **INCONSISTENT SPEED**: Rushing sections unevenly (Maintain steady pace!)\n- **IGNORING DEPTH**: Flat designs are less engaging (Use layering techniques!)\n\nIllustrations:\n- A tangled scribble for over-complication.\n- A map with location pins for planning.\n- Lightbulbs (one bright, one dim) for lighting and ideas.\n\n---\n\nThe infographic uses a mix of text, icons, and hand-drawn visuals to convey information in an accessible, educational manner. All textual content is in English. The layout is structured, easy to follow, and visually engaging, ideal for learners interested in sand painting as a narrative art form.", "width": 2368, "height": 1760}
{"prompt": "This infographic presents a detailed character status screen for a game unit named \"Frost Zombie,\" set against a dark, icy fantasy environment with snow-covered terrain and glowing ice formations. The layout is split into two main sections: the left side contains structured textual information within a translucent blue-bordered panel titled \"CHARACTER STATUS,\" while the right side features a full-body illustration of the Frost Zombie character standing on an ice platform, wielding a glowing ice spear.\n\nThe title \"CHARACTER STATUS\" is displayed at the top center in bold, white sans-serif font. Below it, the character’s portrait appears in a square frame on the upper left, depicting a skeletal figure with icy horns, glowing blue eyes, and frost-covered armor. Adjacent to the portrait, key identifiers are listed:\n\n- NAME: FROST ZOMBIE\n- LEVEL: 35\n- RANK: ELITE (accompanied by two icons: a gold medal and a crown)\n\nBelow this, the \"VITALITY\" section displays HP and MP bars:\n\n- HP: 2800/2800 (full blue bar)\n- MP: 1450/1450 (full cyan bar)\n\nThree tabs — \"info\", \"stats\", and \"skills\" — are positioned beneath vitality, though only the \"info\" tab is active and visible.\n\nUnder the \"info\" tab, the following subsections are presented:\n\n**[ATTRIBUTES]**\n- STR 88\n- INT 105\n- DEF 74\n- AGI 42\n\n**[EQUIPMENT]**\nEach item is listed with an icon to its left:\n- WEAPON: Frostbite Spear (Ice Attack +45)\n- ARMOR: Icy Plate Mail (DEF +38)\n- HELM: Frozen Horns (MP +150)\n- BOOTS: Snow Drifter Boots (AGI +20)\n- ACCESSORY: Shard of Eternal Frost (Cold DMG +15%)\n\n**[SKILLS]**\n- Passive: Frostbite (Attacks slow enemies)\n- Active: Ice Shards, Glacial Strike, Frozen Tomb\n\n**[ULTIMATE SKILL]**\n- GROUP SHIELD\n - Description: Creates a powerful ice barrier around all allies, absorbing 5000 DMG, lasts 10s.\n - Mana: 300, Cooldown: 120s\n - Icon: A shield with silhouettes of three figures inside, surrounded by ice crystals.\n\nThe visual design employs a consistent icy blue color scheme with glowing effects, sharp edges, and crystalline textures. The character illustration on the right shows the Frost Zombie with a humanoid skeleton form, covered in jagged ice shards, wearing tattered brown leather armor with ice accents. He holds a large spear made of ice with a swirling energy effect, and his feet stand on a raised ice platform. The background features a nighttime snowy landscape with distant structures and floating ice crystals, enhancing the wintry theme.\n\nAll text is rendered in clean, modern fonts with white or light blue coloring for readability against the dark backdrop. The overall style suggests a mobile or digital RPG game interface, emphasizing clarity and thematic immersion.", "width": 2368, "height": 1760}
{"prompt": "The infographic titled \"Ornamental Fish: Complete Guide to Care & Appreciation\" is presented in a clean, watercolor-inspired design with a light blue and white background featuring subtle watercolor splashes and bubbles. The layout is divided into three main horizontal sections, each with distinct headings, visual icons, and explanatory text blocks, all arranged for easy readability.\n\n**Section 1: Healthy Habitat First: Tank Setup 101**\nThis section emphasizes foundational steps to avoid 80% of beginner fish loss. It features three core pre-fish steps, each with an illustrative icon and descriptive text box below:\n\n- **Tank Size Matching**: Illustrated by a rectangular aquarium with two goldfish, plants, rocks, and substrate. The accompanying text advises choosing a tank size that matches the full adult size of the selected fish species to prevent stunted growth and stress.\n \n- **2-4 Week Tank Cycling**: Depicted with a clock showing approximately 3:00 (indicating time) connected to a circular diagram of beneficial bacteria. The text instructs cycling the tank for 2 to 4 weeks before adding fish to establish nitrifying bacteria that break down toxic waste.\n\n- **Species-Specific Water Parameters**: Shown with a thermometer, a test kit vial with color gradient (pH scale), and two smaller test tubes with colored liquids. The text explains using test kits to adjust and monitor water parameters (pH, temperature, hardness) to match the specific needs of the chosen fish species.\n\n**Section 2: How to Appreciate Ornamental Fish Beyond Looks**\nThis section outlines three layers of enjoyment for hobbyists, presented as rounded rectangular boxes with illustrations and detailed descriptions:\n\n- **Aesthetic Trait Observation** (peach-colored header): Illustrated with two close-up images of colorful betta fish fins. The description encourages evaluating aesthetic traits such as color vibrancy, pattern symmetry, fin shape, and swimming movement.\n\n- **Natural Behavior Watching** (green header): Illustrated with two circular scenes—one showing schooling fish, the other depicting goldfish foraging and interacting. The text suggests observing natural behaviors like schooling, foraging, and courtship displays to recognize species-specific traits.\n\n- **Cultural & Origin Context** (beige header): Illustrated with a world map, a compass, a fish bowl, and a traditional pottery vessel. The description recommends researching the species’ origin, connecting its native wild habitat to unique adaptations and cultural significance in its home region.\n\n**Section 3: Quick Fixes for Common Beginner Issues**\nThis bottom section addresses immediate solutions for typical problems, with the subtitle “Resolve most problems in 24 hours or less with these steps.” It includes three illustrated problem-fix pairs:\n\n- **Cloudy Water Fix**: Illustrated with a cloudy aquarium, a bucket with a drop falling into it, and a healthy fish below. The solution involves testing for ammonia/nitrite spikes and performing 10–15% weekly partial water changes to restore balance.\n\n- **Fin Rot Treatment**: Illustrated with a fish exhibiting frayed fins and a bottle of medication. The instructions are to isolate affected fish immediately, test for high waste levels, and treat with aquarium-safe antibacterial medication as directed.\n\n- **Lethargic Fish Diagnosis**: Illustrated with a sluggish fish and a thermometer. The diagnostic steps include verifying water temperature matches species requirements, testing for nitrogen cycle imbalances, and checking for signs of bullying from tank mates.\n\nThe infographic uses consistent visual styling—hand-drawn, watercolor-style illustrations—with clear, bold headings and concise, informative subtext. All textual content is in English, and the overall structure promotes both practical care and deeper engagement with ornamental fish.", "width": 2720, "height": 1536}
{"prompt": "该信息图以“野生动物保护:解构与重塑”为主题,采用立体机械装置艺术风格,呈现一个由破碎玻璃、齿轮、镜头、望远镜、无人机、机械臂和植物等元素构成的复杂结构,象征科技与自然的融合与重构。整体背景为灰色混凝土墙面,营造出工业感与未来感并存的视觉氛围。画面中心是一个由金属支架支撑的多层结构,顶部是大型相机镜头,下方连接着双筒望远镜、机械臂和裂土中萌发的新芽,底部基座刻有“未来:重塑共生关系”,寓意从碎片化认知到系统性行动,最终实现人与自然和谐共生的愿景。\n\n信息图内容分为三大板块,分别对应“教育:认知碎片重组”、“实践:行动模块构建”和“未来:重塑共生关系”,三者呈自上而下、由理论到实践再到未来的逻辑递进关系。\n\n在“教育:认知碎片重组”部分,位于图像左上方,由多个透明破碎玻璃片组成,每片玻璃内嵌一个图标与文字说明,代表不同的教育方式:\n- 沉浸式体验:虚拟生态 —— 图标为戴VR眼镜的孩子与老虎互动。\n- 互动研学:知识解构 —— 图标为打开的书本与齿轮。\n- 专家讲座:思维碰撞 —— 图标为麦克风与声波。\n- 社区工坊:拼凑意识 —— 图标为拼图块与熊猫头像。\n\n这些玻璃片通过金色箭头指向中心结构,象征认知碎片被整合输入系统。\n\n在“实践:行动模块构建”部分,位于图像右下方,同样由多个透明玻璃片构成,展示具体的保护行动模块:\n- 反盗猎巡逻:技术防线 —— 图标为全副武装的巡护员与无人机。\n- 物种监测:数据追踪 —— 图标为手持GPS设备与平板电脑。\n- 公民科学:众创参与 —— 图标为多人协作使用工具(网、锤子、钳子)。\n- 栖息地修复:生态缝合 —— 图标为机械臂托举着裂土中生长的小树苗。\n\n这些模块通过齿轮、电线与机械结构相连,体现各行动模块之间的协同运作。\n\n整个结构的底部基座上刻有“未来:重塑共生关系”,作为最终目标,强调所有教育与实践努力的终极指向——建立可持续的人与自然共生关系。\n\n视觉设计上,大量使用透明玻璃、金属质感、机械零件和发光线条,结合冷色调光影,突出科技感与现代性。同时,新芽破土而出的意象注入希望与生命力,形成刚柔并济的视觉张力。信息图无传统图表或数据轴,而是通过符号化、隐喻化的视觉语言传递概念,属于概念性信息图(conceptual infographic),旨在激发思考而非提供量化数据。\n\n所有文本均为中文,包括标题、副标题、模块名称及描述,语言精炼,富有哲理与号召力。", "width": 1536, "height": 2720}
{"prompt": "Create an infographic featuring the title 'Financiación de los Niveles de Biovigilancia' at the top center. In the center, there is a large, 3D-styled pyramid diagram divided into four stacked horizontal levels, adopting a data-driven tech style with a color palette of medical blues and alert reds. To the left of the pyramid, a network of connected nodes integrates with large upward-pointing blue and red arrows. Each level of the pyramid contains illustrative icons such as data nodes, charts, and magnifying glasses, with smaller upward arrows connecting the levels internally to indicate upward flow. To the right of the pyramid, lines connect each segment to text labels describing the levels from bottom to top. The bottom two levels—a dark blue base labeled 'Departamentos Locales de Salud' and a medium blue segment labeled 'Control Regional de Enfermedades'—are grouped together by a vertical curly bracket on the far right classified as 'Financiación Operativa'. The top two levels—a light blue segment labeled 'EPA Nacional' and a red peak labeled 'Fondos Mundiales de Salud'—are grouped by another vertical curly bracket classified as 'Inversión Estratégica'. The given data is : [{\"classification\": \"Inversión Estratégica\", \"level_name\": \"Fondos Mundiales de Salud\"}, {\"classification\": \"Inversión Estratégica\", \"level_name\": \"EPA Nacional\"}, {\"classification\": \"Financiación Operativa\", \"level_name\": \"Control Regional de Enfermedades\"}, {\"classification\": \"Financiación Operativa\", \"level_name\": \"Departamentos Locales de Salud\"}]", "width": 2048, "height": 2048}
{"prompt": "The infographic titled \"The Alchemy of Nature: Food as Medicine\" is presented in a vibrant, watercolor-style illustration with a central symbolic tree labeled \"Tree of Life,\" from which four distinct quadrants radiate outward, each representing a different food category and its associated health benefit. The overall layout is circular and balanced, with the tree serving as the visual and conceptual anchor, connecting all four sections. The artistic style employs soft, blended watercolor washes, dynamic splashes, and natural motifs to evoke a sense of organic vitality and harmony.\n\nEach quadrant features a unique color palette, corresponding imagery, descriptive text block, and a highlighted \"Core Value.\" The text is rendered in a clean, legible serif font, with key phrases emphasized in bold or colored text for visual hierarchy.\n\nThe four sections are:\n\n1. **Vitality: Citrus and Immunity** (Top-Left Quadrant)\n - Visuals: Bright yellow-orange background with sunburst rays. Illustrations include whole and sliced oranges, lemons, and lime wedges, with juice splashing dynamically.\n - Text: \"Nature's defensive shield against seasonal ailments. These ingredients act as the catalyst for collagen production and cellular repair, fueling the body's natural resistance.\"\n - Core Value: **Immunity** (highlighted in orange).\n\n2. **Longevity: Berries and Protection** (Top-Right Quadrant)\n - Visuals: Deep purple and blue watercolor background with star-like sparkles. Features pomegranates (one whole, one cut open revealing seeds), blueberries, and berry splashes.\n - Text: \"Potent scavengers of free radicals that preserve cognitive function and heart health. They act as the body's time-keepers, preventing premature aging through concentrated phytonutrients.\"\n - Core Value: **Longevity** (highlighted in purple).\n\n3. **Renewal: Greens and Detox** (Bottom-Left Quadrant)\n - Visuals: Soft green and teal watercolor background with flowing leafy vines and bubbles. Depicts kale, spinach, parsley, and other leafy greens.\n - Text: \"The ultimate internal cleanser and blood builder. These plant-based powerhouses provide the magnesium and iron necessary for deep cellular oxygenation and metabolic rhythm.\"\n - Core Value: **Detoxification** (highlighted in green).\n\n4. **Endurance: Grains and Stability** (Bottom-Right Quadrant)\n - Visuals: Warm beige and golden brown tones with swirling patterns resembling grain spirals. Includes wheat stalks, oats, and scattered grains.\n - Text: \"The slow-release fuel for sustained mental and physical performance. High in fiber and essential proteins, these ingredients stabilize blood sugar and provide the foundation for long-term health.\"\n - Core Value: **Stability** (highlighted in brown).\n\nThe central \"Tree of Life\" has a thick, textured trunk and lush green foliage, symbolizing growth and interconnectedness. Its roots extend into the bottom-left quadrant (Greens), while its branches reach toward the top-right (Berries), visually reinforcing the flow of nourishment and balance.\n\nThis infographic uses a metaphorical and aesthetic approach to convey the concept of food as medicine, emphasizing holistic wellness through nature’s bounty. There are no charts, graphs, or numerical data; instead, information is encoded through color, imagery, and descriptive text blocks. The design is intended to be inspirational and educational, promoting a balanced diet aligned with core values: Immunity, Longevity, Detoxification, and Stability.", "width": 2048, "height": 2048}
{"prompt": "该信息图以手绘卡通风格呈现,主题为“【关于公共交通的新闻报道与信息更新】”,整体布局呈螺旋式或S形路径,从顶部开始,依次向下延伸至底部,象征着信息流动和城市发展进程。背景为浅米色,边框带有虚线和星星、云朵等装饰元素,营造出轻松、友好的视觉氛围。标题位于顶部中央,字体较大且加粗,下方配有淡蓝色波浪线装饰。\n\n信息图内容分为五个主要部分,通过一条蜿蜒的道路连接,每个部分均配有插图、标题横幅和说明文字框:\n\n1. **新线开通**(顶部左侧):\n - 插图:一列拟人化的地铁列车从隧道中驶出,车头有笑脸表情;上方有“新闻中心”对话气泡,旁边是带信号塔的云朵,象征信息传播。\n - 标题横幅:“新线开通”,黄色丝带样式。\n - 说明框:“地铁新线:连接城市新区域,缩短通勤时间。”\n - 配图:左下角为折叠地图与时钟图标,右下角为公文包与房屋图标,象征通勤与居住区连接。\n\n2. **线路优化**(顶部右侧):\n - 插图:一辆拟人化的公交车停靠在公交站台,站台上有三名乘客(成人与儿童),公交车上方有“线路优化”黄色横幅。\n - 思想气泡:一个循环箭头与爱心,象征优化带来的满意度提升。\n - 说明框:“公交调整:优化站点设置,提高覆盖率与效率。”\n - 配图:左下角为路线图与定位图标,右下角为竖起大拇指的手势图标,表示好评。\n\n3. **绿色出行倡议**(中部):\n - 插图:阳光明媚的户外场景,五位市民在绿树成荫的小路上骑行或步行,其中三人骑自行车(一人车前有篮子),两人步行。\n - 标题横幅:“绿色出行倡议”,绿色丝带样式。\n - 说明框:“低碳生活:鼓励步行与骑行,减少碳排放。”\n - 配图:左侧为两片绿叶,右侧为绿色脚印图标,强调环保理念。\n\n4. **智能支付升级**(底部左侧):\n - 插图:一只手握着智能手机,屏幕显示二维码,正对向两个闸机读卡器,周围有Wi-Fi信号和美元符号。\n - 标题横幅:“智能支付升级”,橙色圆角矩形。\n - 说明框:“便捷支付:全面支持扫码乘车,实现无感通行。”\n - 配图:左下角为手机与银行卡图标,象征多种支付方式。\n\n5. **未来展望**(底部右侧):\n - 插图:一架现代化高速列车(或飞行器)飞越城市天际线,背景有彩虹、云朵和星星,象征美好愿景。\n - 标题标签:“未来展望”,蓝色对话气泡。\n - 说明框:“智慧交通:构建互联互通、高效环保的未来出行体系。”\n - 配图:左下角为齿轮与云朵图标,代表技术与云端互联。\n\n整张信息图采用统一的可爱卡通画风,色彩柔和,以蓝、绿、黄为主色调,传达积极、环保、科技感强的城市交通发展愿景。所有文本均为简体中文,结构清晰,逻辑连贯,适合用于公共宣传或政策解读。", "width": 1664, "height": 2496}
{"prompt": "该信息图以“办公室零食分享”为主题,采用赛博朋克(Cyberpunk)风格的视觉设计,整体布局为横向六格漫画式叙事结构,每格代表一个章节,标题为“第1章”至“第6章”,顶部中央以霓虹灯效果突出主标题“办公室零食分享”。背景由电路板、发光管道、数据流和代码元素构成,营造出科技感十足的未来办公环境氛围。色彩以深蓝、紫色为主调,搭配荧光绿、粉紫、橙黄等高饱和度霓虹色,强化视觉冲击力。\n\n每一章节均包含插画、对话气泡、章节标题、摘要及统计数据,形成完整的故事线与数据支撑。\n\n---\n\n**第1章:零食的诱惑**\n- 插画:主角蓝发戴护目镜,大口吃着“未来能量条”薯片,口水四溅,表情贪婪。\n- 对话气泡:“就吃一片...真的!”\n- 摘要:“初始抵抗失败。”\n- 统计数据:\n - 意志力:0%\n - 零食库存:100%\n\n---\n\n**第2章:偷偷摸摸的尝试**\n- 插画:主角穿着连帽衫,背着电线背包,潜入办公室角落偷吃薯片,发出“唰唰”声,做“嘘~”手势。\n- 对话气泡:“没人看见...完美!”\n- 摘要:“成功潜入。”\n- 统计数据:\n - 潜行技能:95%\n - 被发现概率:5%\n\n---\n\n**第3章:被发现的尴尬**\n- 插画:老板监控系统(一个带耳机的大眼睛)发现主角偷吃,周围同事围观。主角慌张辩解。\n- 对话气泡:\n - 监控系统:“嘿!那是我的薯片!”\n - 主角:“额...我只是...测试一下口感!”\n- 摘要:“监控全面启动。”\n- 统计数据:\n - 尴尬指数:500%\n - 职业生涯危机:20%\n\n---\n\n**第4章:被迫分享的痛苦**\n- 插画:主角被一群表情狰狞、饥饿的同事围住,被迫分发薯片,自己泪流满面。\n- 标题框:“强制分享区”\n- 对话气泡:“我的心在滴血...”\n- 摘要:“资源重新分配。”\n- 统计数据:\n - 零食损失:80%\n - 怨气积累:300%\n\n---\n\n**第5章:空空如也的悲剧**\n- 插画:主角手持空袋,神情沮丧,胃部有蓝色电光闪烁,周围散落着废弃电线、绳索等杂物。\n- 思想气泡:悲伤流泪的表情符号 😢\n- 声音效果文字:“咕咕”\n- 摘要:“一切都结束了。”\n- 统计数据:\n - 饥饿感:9000%\n - 后悔程度:无限大\n\n---\n\n**第6章:复仇的计划**\n- 插画:主角双眼泛红,周身火焰环绕,正在图纸上绘制“零食保险箱2.0”的机械装置,充满决心。\n- 对话气泡:“下次...谁也别想动我的零食!”\n- 图纸下方标注:“零食保险箱2.0”\n- 摘要:“新的开始。”\n- 统计数据:\n - 决心:100%\n - 未来零食安全指数:99.9%\n\n---\n\n整体信息图通过六章漫画形式,将办公室中关于零食的私密行为转化为一场戏剧化、夸张化的“冒险旅程”,结合幽默与讽刺,揭示了职场中资源共享与个人欲望之间的冲突。每个章节不仅提供情节推进,还通过量化指标(如“意志力”、“怨气积累”、“未来零食安全指数”)赋予事件数据维度,增强表现力与趣味性。所有文字均为简体中文,符合中国网络文化语境,语言风格活泼、口语化,适合社交媒体传播。", "width": 2368, "height": 1760}
{"prompt": "该信息图以“路基动脉:沥青材质与智能养护”为主题,采用现代科技感十足的视觉设计风格,整体布局为网格式四象限结构,中央叠加一个核心模块,形成五部分协同展示的构图。背景为简洁的白色空间,带有几何线条和光影效果,营造出实验室或未来展厅的氛围。主标题位于顶部,使用粗体黑色字体,下方配有一条黄色装饰横线,点缀有小颗粒状元素,增强视觉层次。\n\n信息图通过五个主要模块分别阐述沥青路面材料与智能养护的关键技术,每个模块均包含一个由三维立体字母构成的视觉符号(ASPHALT、LASER、RECYCLE、COMPACT)或核心设备图像,并配有深灰色金属质感标签框,内含详细说明文字。\n\n1. **左上角模块:高性能沥青混合料(SMA)**\n - 视觉元素:立体大字“ASPHALT”由真实石料与沥青构成,表面呈现粗糙质感,部分沥青从字母边缘滴落,模拟真实材料状态。\n - 文本内容:\n - 标题:“高性能沥青混合料(SMA)”\n - 描述:“骨架密实结构。展示粗集料的石石嵌挤作用,高粘度改性沥青确保了高温稳定性与抗疲劳性能。”\n\n2. **右上角模块:智能激光病害检测**\n - 视觉元素:一块沥青路面样本置于展台上,上方有蓝色霓虹轮廓字“LASER”,一束蓝色激光从装置射向路面裂缝处,产生光点,同时显示路面内部的三维扫描波纹。\n - 文本内容:\n - 标题:“智能激光病害检测”\n - 描述:“实时三维重构。利用多线激光扫描技术,亚毫米级识别裂缝、坑槽,实现路面状况的数字化资产管理。”\n\n3. **中央模块:全生命周期养护**\n - 视觉元素:一块沥青板中间嵌入一个方形电子芯片,芯片发出蓝绿色光芒并辐射同心圆波纹,象征数据传感与智能监测。\n - 文本内容:\n - 标题:“全生命周期养护”\n - 描述:“数据驱动决策:从实验室研发到现场施工,再到预防性养护,科技让每一段公路都拥有‘自我感知’能力。”\n\n4. **左下角模块:RAP热再生技术**\n - 视觉元素:立体大字“RECYCLE”由开裂、风化的旧沥青材料构成,背景悬挂两幅热成像图(分别呈星形和圆形热点分布),颜色从蓝到红渐变,代表温度变化。\n - 文本内容:\n - 标题:“RAP热再生技术”\n - 描述:“循环可持续。将废旧路面铣刨、回收并添加再生剂重新利用,降低碳排放,延长路面服务寿命。”\n\n5. **右下角模块:智能压实控制系统**\n - 视觉元素:一个大型压路机滚筒正在碾压地面,前方是立体金属字“COMPACT”,具有高光泽反射效果,体现精密工程。\n - 文本内容:\n - 标题:“智能压实控制系统”\n - 描述:“精准密实。监控压实遍数与温度分布,防止漏压与过压,确保路面空隙率达到设计最优状态。”\n\n整体而言,该信息图通过高度拟物化的设计语言,结合动态光影、三维建模与科技符号,系统展示了沥青路面从材料创新、智能检测、数据管理、环保再生到施工控制的全链条智能化解决方案。各模块之间通过中央“全生命周期养护”概念串联,强调技术整合与数据驱动的核心理念。所有文本均为简体中文,无英文或其他语言混用,符合中国语境下的专业传播需求。", "width": 2048, "height": 2048}
{"prompt": "The infographic is presented as a richly detailed, oil-painting-style artwork framed in an ornate, gilded Baroque-style frame with intricate floral and scrollwork carvings. The overall aesthetic blends classical artistry with modern technological themes, creating a visual metaphor for the \"Renaissance\" of smart mobility. The background features textured canvas-like surfaces, with visible brushstrokes and paint smears, enhancing the impression of a living painting.\n\nAt the top center, the title \"The Renaissance of Smart Mobility\" is rendered in large, embossed, off-white serif typography with subtle beveling and shadow, giving it a three-dimensional appearance. Below the title, a horizontal band of thick, impasto-style paint strokes in blue, red, orange, and yellow adds dynamic visual energy, suggesting creativity and innovation.\n\nThe central image depicts a futuristic electric vehicle chassis, shown from a slightly elevated perspective, with its battery pack exposed. The chassis is mounted on four wheels and features copper-colored wiring connecting rectangular solid-state battery modules arranged in two rows. The vehicle’s interior is partially visible in the upper portion of the image, showing a modern dashboard with a digital display screen, steering wheel, and ambient lighting—rendered with realistic depth and reflections.\n\nThree key thematic sections are highlighted with callout boxes, each linked to specific parts of the vehicle or surrounding elements via thin white lines:\n\n1. **Sustainable Power Architecture** \n - Positioned on the left side, this section uses a brown, textured label resembling thick paint or clay. \n - Text: \"Utilizing Impasto techniques to depict the Solid-State battery modules, emphasizing the raw physical energy of future electrification.\" \n - Visual context: This label points to the battery pack in the chassis, which is rendered with raised, textured paint to simulate the impasto technique, emphasizing physicality and energy.\n\n2. **Cognitive Smart Accessories** \n - Located in the upper right, this section has a dark blue, torn-paper-style label. \n - Text: \"Building visual depth through Glazing to represent AI-driven sensors that merge human intuition with digital precision.\" \n - Visual context: The label points toward the vehicle’s dashboard, where glazing techniques (layered translucent paint) create depth and luminosity, symbolizing advanced AI-driven sensors.\n\n3. **Universal Neural Connectivity** \n - Situated in the lower right, this section features a beige, coarse-linen-textured label with torn edges. \n - Text: \"Applying coarse linen texture to the data stream visuals to ground Autonomous networks in a tangible, craftsman-like reality.\" \n - Visual context: The label points to the copper wiring and data flow within the chassis, which are depicted with a rough, fabric-like texture to represent tangible neural connectivity.\n\nAdditional artistic elements include:\n- A wooden paint palette in the bottom-left corner, holding dabs of blue, yellow, white, red, and orange paint, along with a metal palette knife.\n- A tray of paint blocks in the bottom-right, labeled with colors and one block marked \"V2X\", referencing Vehicle-to-Everything communication.\n- Two artist tools—a palette knife and a feather quill—extend into the frame from the left and right edges, respectively, reinforcing the theme of craftsmanship and creation.\n\nThe entire composition uses a dark, earthy color palette with highlights of metallic silver, copper, and vibrant paint splashes to draw attention to key technological components. The interplay between classical painting techniques (impasto, glazing, texture) and futuristic automotive technology creates a powerful narrative about the fusion of artistry and engineering in the evolution of smart mobility. The layout is balanced, with the central vehicle acting as the focal point, while the textual annotations guide the viewer’s eye to specific innovations.", "width": 1760, "height": 2368}
{"prompt": "This infographic, titled **\"Xiaoxiangfeng: The Timeless Tweed,\"** presents a sophisticated, narrative-driven visual guide to timeless fashion, structured into three thematic nodes. The overall design is opulent and refined, employing a soft pastel color palette dominated by champagne gold (#D4AF37), soft rose (#E6A8D7), and cream, accented with gold ornamental borders, white roses, and delicate ribbons. The layout is vertically segmented into three distinct panels, each framed with intricate golden scrollwork, creating a luxurious, vintage aesthetic reminiscent of haute couture.\n\n---\n\n### **Title Section (Top Panel)** \nThe title, “Xiaoxiangfeng: The Timeless Tweed,” is displayed in a bold, classic serif font centered at the top, flanked by decorative white roses tied with satin ribbons. The background features a subtle textured pattern, enhancing the elegance.\n\n---\n\n### **Node_01: Foundation of Style** \n**Visuals:** \n- A central illustration depicts a woman with short, styled brown hair, wearing a cream-colored tweed jacket and skirt set, accessorized with a multi-strand pearl necklace. She gently touches the front buttons of her jacket. \n- The setting is an elegant boutique or atelier, featuring pink tufted armchairs, a gilded mirror, racks of clothing, and floating sketches of garment designs and fabric swatches. \n\n**Textual Content:** \n- **Label:** Node_01 \n- **Quote:** “True style begins with the structure of the weave.” \n- **Keyword:** Foundation of Style \n\nThis node emphasizes the importance of fabric construction as the core of fashion design.\n\n---\n\n### **Node_02: Material Contrast** \n**Visuals:** \n- A close-up illustration focuses on a hand placing a double-pearl brooch onto the textured tweed fabric. The tweed displays a woven pattern with interlaced gold threads. \n- The background is blurred, drawing attention to the tactile contrast between the rough tweed and smooth pearls. \n\n**Textual Content:** \n- **Label:** Node_02 \n- **Quote:** “The secret lies in the harmony of textures.” \n- **Keyword:** Material Contrast \n- **Styling Tips:** \n - Color Code: #D4AF37 \n - Name: Champagne Gold \n - **Dialogue:** “Rough tweed meets smooth pearls—the perfect balance of strength and grace.” \n\nThis section highlights the aesthetic value of combining contrasting materials for visual and textural depth.\n\n---\n\n### **Node_03: The Complete Look** \n**Visuals:** \n- The woman from Node_01 is shown walking confidently down a sunlit cobblestone street lined with European-style buildings. She wears the same tweed suit, now paired with a matching long coat, exuding poise and elegance. \n- The lighting creates a warm, golden glow, reinforcing the theme of timeless sophistication. \n\n**Textual Content:** \n- **Label:** Node_03 \n- **Quote:** “Confidence is the final accessory to any outfit.” \n- **Keyword:** The Complete Look \n- **Fabric Tips:** \n - Color Code: #E6A8D7 \n - Name: Soft Rose \n - **Dialogue:** “It’s not just a jacket; it’s an attitude of timeless elegance.” \n\nThis final node elevates the outfit from mere clothing to a symbol of personal presence and enduring style.\n\n---\n\n### **Overall Design & Structure** \nThe infographic uses a **vertical triptych layout**, with each node separated by thin horizontal dividers and unified by consistent decorative elements—gold filigree frames, floral motifs, and cloud-shaped text bubbles. Text is presented in clean, readable fonts with hierarchical emphasis: titles in bold, quotes in regular weight, keywords in italicized or smaller font, and dialogue in a slightly condensed style. Color-coded sections (#D4AF37 and #E6A8D7) are explicitly labeled, providing practical styling and fabric guidance.\n\nAll textual content is in English, with no visible non-English text, confirming English as the primary language. The infographic functions both as an artistic fashion statement and a conceptual guide, blending visual storytelling with actionable fashion insights.", "width": 1664, "height": 2496}
{"prompt": "The infographic, titled \"Key Trends in Automotive Industry Software & Intelligent Development (2024-2030)\", is presented in a visually engaging chalkboard-style format with a wooden frame. The background mimics a black chalkboard with white, colored chalk-like text and hand-drawn illustrations, including stars, arrows, checkmarks, graphs, and erasers, giving it an educational and creative aesthetic. The title is prominently displayed at the top in bold white text, with decorative elements like a yellow star, a blue curved arrow, and a mathematical formula “fi = x = ±√(c²)/2” in the upper right corner.\n\nThe content is organized into a structured table with four main columns: **Trend Focus**, **Key Concept**, **Impact/Metric**, and **Timeline/Target**. The rows are grouped under three primary vertical categories on the left side, written vertically in bold white text: **CORE TECHNOLOGY SHIFTS**, **MARKET & REGULATORY DRIVERS**, and **END-USER EXPERIENCE EVOLUTION**. Each category has its own sub-section with distinct visual icons and color-coded highlights.\n\n---\n\n### CORE TECHNOLOGY SHIFTS\n\nThis section focuses on foundational technological advancements powering intelligent vehicles.\n\n- **Trend Focus**: \n - *Core Technology Trends Powering Intelligent Vehicles* — highlighted in a yellow circle with a red star icon. \n - Subtext: “Software-defined architecture replaces legacy hardware-first design.”\n\n- **Key Concepts**: \n 1. **Centralized E/E Architecture** — underlined in yellow. \n 2. **Generative AI Integration** — underlined in pink. \n 3. **Lifecycle OTA Updates** — underlined in blue, accompanied by a small diagram of a star → circle → checkmark.\n\n- **Impact/Metric**: \n 1. Cutting hardware redundancy by 40% on average. \n 2. In-vehicle voice assistants, predictive maintenance, autonomous driving path planning. \n 3. Enabling full lifecycle feature upgrades for 90% of new passenger vehicles.\n\n- **Timeline/Target**: \n - A green checkmark and a growing plant icon indicate progress. \n - “By 2028” circled in red. \n - A blue growth curve graph labeled “y” vs. “x” with dashed lines indicating thresholds.\n\n---\n\n### MARKET & REGULATORY DRIVERS\n\nThis section outlines external forces accelerating adoption.\n\n- **Trend Focus**: \n - *Market & Regulatory Push for Mass Adoption* — highlighted in yellow with orange rays and a downward arrow. \n - Subtext: “Policy and consumer demand accelerate intelligent software rollout.”\n\n- **Key Concepts**: \n 1. **$469B 2030 Market Size** — underlined in yellow, with a curved blue arrow pointing back to the concept. \n 2. **2026 ADAS Standard Mandate** — underlined in pink. \n 3. **UN R155 Cybersecurity Rule** — underlined in yellow, with a blue cube and cross symbol.\n\n- **Impact/Metric**: \n 1. Global automotive software market projected to reach $469 billion, growing at a 12.8% CAGR from 2024. \n 2. EU and China require Level 2+ advanced driver assistance systems (ADAS) as standard on all new passenger vehicles. \n 3. Mandates continuous software vulnerability monitoring for all connected vehicles sold in 54+ countries.\n\n- **Timeline/Target**: \n - “By 2030” underlined in green. \n - “By 2026” underlined in pink. \n - “Continuous” underlined in orange, with a warning triangle and a blue arrow.\n\n---\n\n### END-USER EXPERIENCE EVOLUTION\n\nThis section details innovations enhancing user interaction and personalization.\n\n- **Trend Focus**: \n - *Next-Generation In-Vehicle User Experience* — highlighted in yellow with a star icon. \n - Subtext: “Personalized, on-demand features redefine vehicle ownership.”\n\n- **Key Concepts**: \n 1. **On-Demand Feature Subscriptions** — underlined in orange. \n 2. **Context-Aware In-Vehicle Assistants** — underlined in gray. \n 3. **V2X Smart Connectivity** — underlined in blue, with a green checkmark.\n\n- **Impact/Metric**: \n 1. Generate $25 billion in annual OEM revenue (e.g., heated seats, advanced ADAS, infotainment packages). \n 2. Automatically adjust climate, seating, and entertainment based on driver biometrics and route data. \n 3. Enable real-time traffic signal optimization, hazard alerts, and smart city integration.\n\n- **Timeline/Target**: \n - “By 2027” underlined in green with a checkmark. \n - “Context-aware” underlined in green. \n - “By 2029” underlined in orange, with a group of stick figures in different colors (blue, yellow, orange, blue).\n\n---\n\nOverall, the infographic uses a combination of text, color-coding (yellow, pink, blue, green, orange), hand-drawn icons, and symbolic diagrams to convey complex information clearly and memorably. The visual style enhances readability and emphasizes key milestones, impacts, and timelines, making it suitable for presentations or strategic planning documents in the automotive industry. All textual content is in English, which constitutes the majority of visible text.", "width": 1536, "height": 2720}
{"prompt": "The infographic titled \"Women’s Health & Family Planning Support Programs\" is structured as a tree diagram with a central trunk branching into two main categories: \"Community Activities for Women’s Health & Family Planning Support\" on the left and \"Official Women’s & Family Planning Policy Benefits\" on the right. At the base of the tree is a third section labeled \"Access Application Workflow,\" which details the steps to access these services.\n\nThe overall layout uses a natural, organic tree design with brown branches and green leaf-shaped nodes for detailed information. The background is a light cream color, giving the infographic a warm, approachable feel. Each major section is contained in a rounded, peach-colored banner, while sub-points are in lighter beige or green leaf shapes. Illustrations of diverse women, families, healthcare professionals, and related icons (e.g., pills, stethoscopes, lactation rooms) accompany each service description, enhancing visual clarity and engagement.\n\n---\n\n**Left Branch: Community Activities for Women’s Health & Family Planning Support**\n\nThis branch highlights free, accessible community-based programs available to all women, with a note stating “No appointment or residency proof required for most services.”\n\n- **Free Gynecological Screenings**\n - Description: Free annual gynecological health screening camps for women aged 18+, held quarterly at local community health centers.\n - Visual: Two female doctors in white coats.\n\n- **Family Planning Consultations + Free Contraceptives**\n - Description: Free family planning consultation and contraceptive distribution sessions, held on the 15th of every month at community service hubs.\n - Visual: A group of three people (two men, one woman) discussing with speech bubbles containing gender symbols and a pill icon.\n\n- **Preconception & Postpartum Peer Support Groups**\n - Description: Peer support group meetings for postpartum women and families planning pregnancy, held biweekly to share experiences and access professional guidance.\n - Visual: Two women, one holding a baby, with heart and swirl speech bubbles.\n\n- **Adolescent Reproductive Health Workshops**\n - Description: Reproductive health and gender equality education workshops for adolescent girls and young women, hosted in local schools and community youth centers.\n - Visual: Three girls sitting at a table with a teacher pointing at a whiteboard.\n\n---\n\n**Right Branch: Official Women’s & Family Planning Policy Benefits**\n\nThis branch lists guaranteed policy benefits entitled to eligible individuals, emphasizing formal entitlements.\n\n- **Free Contraceptives + 50% Off Assisted Reproductive Care**\n - Description: All women with local residency or valid long-term residence permits are eligible for free basic contraceptive services and 50% subsidy for assisted reproductive treatment costs at designated public hospitals.\n - Visual: Pills, a medical caduceus symbol, and a single pill.\n\n- **158 Days Paid Maternity Leave + 15 Days Spousal Paternity Leave**\n - Description: Paid maternity leave of 158 days for eligible birthing parents, plus paid paternity leave of 15 days for spouses, with 100% salary coverage during the leave period.\n - Visual: A couple holding a baby.\n\n- **$800 One-Time Maternity Subsidy Low-Income Families**\n - Description: One-time maternity subsidy of $800 for low-income families meeting local eligibility criteria, disbursed within 30 days of application approval.\n - Visual: A hand receiving a dollar bill.\n\n- **Mandated Lactation Rooms in Eligible Workplaces**\n - Description: Mandate that all employers with 10+ female staff provide dedicated lactation rooms for female employees returning to work after maternity leave.\n - Visual: A sign reading “LACTATION ROOM” next to a woman breastfeeding.\n\n---\n\n**Base Section: Access Application Workflow**\n\nThis section outlines a three-step process for accessing the services and benefits above, presented as three distinct branches from the tree trunk.\n\n- **Step 1: Confirm Your Eligibility**\n - Instruction: Check eligibility online or in-person for free.\n - Details:\n - Visit your local community service center official social media account or in-person front desk to check eligibility for your desired service or benefit.\n - Prepare your ID card and residence permit if applying for policy-related financial benefits.\n - Note: All community activities are open to all women regardless of residency status.\n\n- **Step 2: Submit Your Registration or Application**\n - Instruction: Fill out a short form to request support.\n - Details:\n - Fill out a 1-page digital or paper registration form for your chosen service or benefit.\n - No additional documentation is required for free community activities like screenings or workshops.\n - Applications for financial benefits are processed within 15 working days of submission.\n\n- **Step 3: Access Your Support**\n - Instruction: Attend your service or receive your approved benefit.\n - Details:\n - Attend your scheduled activity at the listed community venue, or receive your financial subsidy via your registered bank account.\n - Community volunteers will send a free SMS reminder 24 hours before your scheduled activity.\n - You can reschedule your appointment for free up to 2 times if you are unable to attend as planned.\n\n---\n\nThe infographic employs a clear hierarchical structure using tree-like branching to organize information logically. Text is presented in clean, readable sans-serif fonts with varying sizes to denote headings, subheadings, and details. Color coding—peach for main sections, beige for secondary, and green for leaves—helps differentiate levels of information. Icons and illustrations are simple, cartoon-style drawings that complement the text without overwhelming it. The overall tone is supportive, inclusive, and informative, aimed at empowering women with knowledge about available resources and how to access them.", "width": 1536, "height": 2720}
{"prompt": "The infographic titled \"SPREADING SOCIAL WARMTH Through Charity Activities\" is a visually rich, circular-themed design that emphasizes compassion and community support. The central motif is a radiant golden heart with an intricate Chinese knot symbol, surrounded by sun-like rays, set within a glowing red circle. This core element is connected via luminous, circuit-like lines to five distinct circular vignettes arranged symmetrically around it, each representing a different charitable initiative. The overall aesthetic blends traditional East Asian artistry—featuring ink-wash mountains, stylized clouds, and classical architectural elements—with modern digital glows and circuit patterns, creating a harmonious fusion of heritage and innovation.\n\nEach of the five outer circles is framed with a glowing teal or orange border and contains both illustrative imagery and descriptive text in a consistent format: a bold title on a decorative banner, followed by a concise explanatory subtitle beneath it. The layout is radial and balanced, with visual connections from the center to each segment suggesting unity and interdependence among the causes.\n\nThe five initiatives are as follows:\n\n1. **COMMUNITY CARE & ELDERLY SUPPORT** \n - *Visuals*: An elderly person with a cane stands before a traditional Chinese pavilion, receiving a steaming bowl of soup from a hand extending from the left. The background features ornate lattice windows. \n - *Text*: \"Providing companionship, meals, and assistance to vulnerable seniors.\"\n\n2. **EDUCATION & YOUTH EMPOWERMENT** \n - *Visuals*: Two children in school uniforms stand beside an open book with a rocket launching from its pages and a lightbulb above them, symbolizing learning and inspiration. \n - *Text*: \"Donating books, supplies, and mentorship for future generations.\"\n\n3. **HEALTHCARE & WELLNESS ACCESS** \n - *Visuals*: A white ambulance with a red cross, a stethoscope, a traditional medicine jar, and ginseng roots are displayed against a backdrop of classic window latticework. \n - *Text*: \"Offering free clinics and health education to underserved communities.\"\n\n4. **DISASTER RELIEF & EMERGENCY AID** \n - *Visuals*: A drone delivers a box over a disaster zone with tents, a boat navigating waves, and a traditional building in the background. \n - *Text*: \"Delivering swift, essential supplies and support to affected areas.\"\n\n5. **ENVIRONMENTAL STEWARDSHIP** \n - *Visuals*: A hand cradles a seedling with a recycling symbol above it; in the background, a serene landscape includes a river, a stone bridge, trees, and mountains. \n - *Text*: \"Organizing clean-ups and tree planting for a healthier planet.\"\n\nThe background features soft, misty mountain ranges rendered in traditional Chinese painting style, with swirling clouds adding depth and movement. The color palette combines warm golds, deep reds, and cool teals, evoking both warmth and tranquility. The use of glowing outlines and circuit-like connectors suggests a modern, interconnected approach to philanthropy, while the cultural motifs ground the message in values of harmony and care.\n\nAll textual content is presented in English, with no other languages visible. The infographic uses a combination of symbolic illustration, clear typography, and structured layout to communicate its message effectively, making it accessible and emotionally resonant.", "width": 1536, "height": 2720}
{"prompt": "该信息图以工程蓝图(blueprint)风格呈现,背景为深蓝色网格,白色线条勾勒出汽车制动系统的各个组成部分及其工作原理。整体布局采用中心辐射式结构,围绕核心的制动盘和卡钳展开,同时通过多个子图和标注区域详细解释关键部件与技术参数。标题位于顶部中央,字体粗大醒目:“安全基石:制动系统解构”,右侧配有“SAFETY CERTIFIED STAMP”圆形认证印章,强调内容的专业性与安全性。\n\n---\n\n**主图结构与分区详解:**\n\n1. **液压逻辑 (Hydraulic Logic)**\n - 位于左上角,展示制动液管路系统示意图。\n - 标注①:主缸(Master Cylinder),连接真空助力器。\n - 标注②:制动液管路,箭头指示液体流动方向。\n - 标注③:制动液含水量 >3% 必须更换。\n - 文字说明:“制动的‘神经系统’。踏板压力通过真空助力器放大,驱动制动液在密闭管路中传递,确保四个轮端的压力均衡分配。”\n - 引入“帕斯卡定律”作为理论基础。\n\n2. **磨损极限 (Wear Limit)**\n - 位于右上角,展示刹车片剖面图。\n - 标注④:刹车片厚度测量范围。\n - 正常:>5mm\n - 警戒:3-5mm\n - 更换临界:<3mm\n - 图中标注“Wear indicator sensor”(磨损指示传感器)。\n - 文字说明:“安全预防的指标。刹车片摩擦层厚度低于2-3mm时必须更换。非正常的沟槽或偏磨反映了卡钳滑动销的润滑失效。”\n\n3. **能量转化 (Energy Conversion)**\n - 位于中央偏右,围绕制动盘和卡钳结构。\n - 文字说明:“制动系统的核心逻辑。通过摩擦力将车辆的动能转化为热能,强制改变物理状态。其设计的核心在于散热效能与抗热衰退能力。”\n - 旁边标注“制动距离公式:v² / 2μg”。\n\n4. **通风结构 (Venting)**\n - 位于左下角,展示通风刹车盘的剖面结构。\n - 标注①:内侧叶片;标注②:外侧叶片;标注④:总高度。\n - 箭头表示空气对流路径。\n - 文字说明:“抑制热衰退的关键。中空叶道设计加速空气对流,防止高温导致刹车油气化。它是剧烈驾驶下制动力的最后防线。”\n\n5. **材料成分 (Material)**\n - 位于右下角,展示刹车片材料微观结构。\n - 图中分解为四部分:\n - 陶瓷纤维\n - 树脂基体\n - 软金属颗粒\n - 润滑剂\n - 文字说明:“性能的微观保障。通过陶瓷纤维、树脂和软金属的配比,在静音、低粉尘与高温摩擦系数(Mu)之间寻求完美平衡。”\n\n6. **精密维护 (Precision Maintenance)**\n - 位于中央偏下,围绕制动盘。\n - 标注“公差: ±0.05mm”,强调制造精度。\n - 标注“轮毂螺栓扭矩: 120 Nm”,提示安装规范。\n\n7. **核心部件图示**\n - 中央主体为制动盘与卡钳组合的透视剖面图。\n - 制动盘带有通风孔设计,卡钳夹持于两侧。\n - 多个箭头环绕制动盘,示意热量散发路径与空气流动。\n\n---\n\n**视觉元素与数据编码方法:**\n- 使用标准工程制图符号:虚线表示内部结构,实线表示外部轮廓,箭头指示方向。\n- 所有尺寸、数值、公差均以精确数字标注,如“120 Nm”、“±0.05mm”、“>3%”、“v²/2μg”等。\n- 颜色编码:主要使用白色线条在蓝色背景上,重要术语如“帕斯卡定律”、“精密维护”、“能量转化”等用浅青色高亮,增强可读性。\n- 文字排版清晰,标题与正文分层,技术术语中英对照(如“Wear indicator sensor”、“Venting”、“Material”)。\n- 布局层次分明,从宏观系统到微观材料逐级深入,逻辑连贯。\n\n---\n\n**总结:**\n本信息图系统性地拆解了汽车制动系统的安全架构,涵盖液压传动、能量转换、材料科学、散热设计与维护标准五大维度。通过工程蓝图的形式,结合精准的技术参数与直观的结构示意图,全面揭示了现代制动系统如何在极端工况下保障行车安全。所有文本均为中文,符合任务要求的主语言判定。", "width": 1536, "height": 2720}
{"prompt": "The infographic titled \"SPREADING SOCIAL WARMTH Through Charity Activities\" is a visually rich, circular-themed design that emphasizes compassion and community support. The central motif is a radiant golden heart with an intricate Chinese knot symbol, surrounded by sun-like rays, set within a glowing red circle. This core element is connected via luminous, circuit-like lines to five distinct circular vignettes arranged symmetrically around it, each representing a different charitable initiative. The overall aesthetic blends traditional East Asian artistry—featuring ink-wash mountains, stylized clouds, and classical architectural elements—with modern digital glows and circuit patterns, creating a harmonious fusion of heritage and innovation.\n\nEach of the five outer circles is framed with a glowing teal or orange border and contains both illustrative imagery and descriptive text in a consistent format: a bold title on a decorative banner, followed by a concise explanatory subtitle beneath it. The layout is radial and balanced, with visual connections from the center to each segment suggesting unity and interdependence among the causes.\n\nThe five initiatives are as follows:\n\n1. **COMMUNITY CARE & ELDERLY SUPPORT** \n - *Visuals*: An elderly person with a cane stands before a traditional Chinese pavilion, receiving a steaming bowl of soup from a hand extending from the left. The background features ornate lattice windows. \n - *Text*: \"Providing companionship, meals, and assistance to vulnerable seniors.\"\n\n2. **EDUCATION & YOUTH EMPOWERMENT** \n - *Visuals*: Two children in school uniforms stand beside an open book with a rocket launching from its pages and a lightbulb above them, symbolizing learning and inspiration. \n - *Text*: \"Donating books, supplies, and mentorship for future generations.\"\n\n3. **HEALTHCARE & WELLNESS ACCESS** \n - *Visuals*: A white ambulance with a red cross, a stethoscope, a traditional medicine jar, and ginseng roots are displayed against a backdrop of classic window latticework. \n - *Text*: \"Offering free clinics and health education to underserved communities.\"\n\n4. **DISASTER RELIEF & EMERGENCY AID** \n - *Visuals*: A drone delivers a box over a disaster zone with tents, a boat navigating waves, and a traditional building in the background. \n - *Text*: \"Delivering swift, essential supplies and support to affected areas.\"\n\n5. **ENVIRONMENTAL STEWARDSHIP** \n - *Visuals*: A hand cradles a seedling with a recycling symbol above it; in the background, a serene landscape includes a river, a stone bridge, trees, and mountains. \n - *Text*: \"Organizing clean-ups and tree planting for a healthier planet.\"\n\nThe background features soft, misty mountain ranges rendered in traditional Chinese painting style, with swirling clouds adding depth and movement. The color palette combines warm golds, deep reds, and cool teals, evoking both warmth and tranquility. The use of glowing outlines and circuit-like connectors suggests a modern, interconnected approach to philanthropy, while the cultural motifs ground the message in values of harmony and care.\n\nAll textual content is presented in English, with no other languages visible. The infographic uses a combination of symbolic illustration, clear typography, and structured layout to communicate its message effectively, making it accessible and emotionally resonant.", "width": 1536, "height": 2720}
{"prompt": "The infographic presents a comprehensive overview of the Guangdong Province Courts’ approach to civil and administrative judgments, centered around the principle of \"Human-Centered Justice.\" The design is visually structured as a radiant tree or sunburst, with a central golden circle at the core from which six colorful branches extend outward, each representing a key pillar of the judicial system. The background features soft, warm gradients of peach, cream, and light blue, with subtle leaf-like patterns emanating from the center, symbolizing growth, reach, and interconnectedness.\n\nAt the center of the infographic is a circular emblem containing an illustration of two hands cradling a classical courthouse building with a balanced scale atop it, symbolizing justice and protection. Below this image, the main title reads: \"GUANGDONG PROVINCE COURTS: CIVIL & ADMINISTRATIVE JUDGMENTS (Human-Centered Justice)\" in bold, dark brown text.\n\nEach of the six surrounding branches is color-coded and contains an icon, a heading, and a brief descriptive subtext. The branches are arranged symmetrically around the central circle:\n\n1. **Top-Left Branch (Blue)** \n - **Title**: CITIZEN ACCESS & EMPOWERMENT \n - **Description**: Simplified procedures, legal aid, digital platforms for easier filing. \n - **Visual Element**: An illustration of a diverse group of citizens — a young couple with a child, an elderly woman, and a construction worker — smiling and standing together, symbolizing inclusivity and broad public access.\n\n2. **Top-Right Branch (Green)** \n - **Title**: CIVIL DISPUTE RESOLUTION \n - **Description**: Contracts, property, family matters. Focus on mediation & fair compensation. \n - **Visual Element**: Two hands shaking over a document with a house icon, representing agreement and resolution in civil matters.\n\n3. **Middle-Left Branch (Purple)** \n - **Title**: SMART COURTS & INNOVATION \n - **Description**: AI assistance, online trials, improving efficiency & reach. \n - **Visual Element**: A tablet displaying a gavel and network connections, indicating digital transformation and technological integration in court operations.\n\n4. **Middle-Right Branch (Yellow)** \n - **Title**: ADMINISTRATIVE OVERSIGHT \n - **Description**: Challenging government actions. Ensuring lawful & reasonable administration. \n - **Visual Element**: A person facing a government building with a shield bearing a checkmark, signifying accountability and protection against unlawful administrative acts.\n\n5. **Bottom-Left Branch (Pink/Red)** \n - **Title**: RESTORING HARMONY & TRUST \n - **Description**: Enforcing rights, resolving conflicts, strengthening social stability. \n - **Visual Element**: A peaceful scene of people walking across a bridge over water, surrounded by trees and smiling faces, evoking community well-being and social cohesion.\n\n6. **Bottom-Right Branch (Orange)** \n - **Title**: TRANSPARENT & FAIR PROCESS \n - **Description**: Open hearings, clear timelines, impartial adjudication. \n - **Visual Element**: A gavel striking a surface, with a speech bubble and two hands shaking along a winding path, illustrating communication, fairness, and procedural integrity.\n\nThe overall chart type is a radial or hub-and-spoke diagram, where the central theme connects to six supporting pillars. Data encoding is primarily textual and symbolic, using icons and illustrations to convey abstract concepts. The visual style is clean, modern, and illustrative, employing flat design elements with soft shadows and rounded corners to create a friendly and accessible tone. All text is presented in English, with no other languages visible. The infographic effectively communicates a holistic, citizen-focused judicial framework emphasizing accessibility, innovation, fairness, and social harmony.", "width": 1536, "height": 2720}
{"prompt": "The infographic titled \"The Art of Strategic Reporting\" with the subtitle \"Tactical Analysis of Conflict: Balancing Military Tech and Social Impact\" presents a structured, futuristic visual model for strategic conflict reporting. The overall layout is vertically organized into three primary layers, labeled as LAYER_TOP, LAYER_MIDDLE, and LAYER_BOTTOM, arranged in a descending stack that visually represents a process flow from observation to action. A fourth element, MINDSET_SHIFT: Ethics, is positioned separately to the right, emphasizing its foundational role across all layers.\n\nThe design employs a dark, high-tech aesthetic with a deep blue-gray background, accented by glowing neon blue and green elements, giving it a sci-fi or digital interface appearance. Central to the composition is a 3D-rendered stack of three geometric platforms, each representing one of the three layers. The top platform is a translucent blue sphere with hexagonal patterns and data nodes, symbolizing global surveillance or awareness. Below it sits a metallic gray octagonal plate with a cross-shaped cutout, representing analysis. The bottom platform glows with a soft green light and features a circular icon resembling a lightning bolt, symbolizing resilience or actionable energy. Beneath this stack, a small compass-like dial points toward the lower-right, suggesting direction or navigation.\n\nEach layer is accompanied by a text box with a dark gray background, rounded corners, and white sans-serif font, containing detailed descriptions. The boxes are labeled with capitalized identifiers (e.g., LAYER_TOP) and include subheadings, explanations, insights, and actions.\n\n---\n\n### Structural Breakdown:\n\n#### **LAYER_TOP: Awareness**\n- **Layer 1: Objective Observation (The Tech Scan)**\n - Description: Identify technical shifts and military movements without bias. Analyze the 'what' and 'how' before the 'why.'\n - Insight: *'Data-driven reporting minimizes emotional volatility in social discourse.'*\n- Visual Elements: Positioned above the blue sphere, with icons of a satellite dish and wireless signals floating nearby, reinforcing themes of surveillance and data collection.\n\n#### **LAYER_MIDDLE: Analysis**\n- **Layer 2: Social Impact Assessment (The Human Filter)**\n - Description: Evaluate how technological advancements affect social structures. Filter noise from signal using proven sociopolitical frameworks.\n - Action: *'Bridge the gap between hardware capabilities and human consequences.'* (with \"human consequences\" highlighted in orange)\n- Visual Elements: Adjacent to the metallic middle platform, with a wireless signal icon and a shield icon partially overlapping the sphere, symbolizing protection and filtering.\n\n#### **LAYER_BOTTOM: Resilience**\n- **Layer 3: Constructive Dialogue (The Solution Path)**\n - Description: Provide actionable reporting that fosters stability. Transition from conflict reporting to resilience building.\n - Output: *'Deliver clarity that empowers decision-makers and informs the public.'*\n- Visual Elements: Positioned below the green platform, with icons of a heartbeat waveform and a lighthouse, symbolizing vitality and guidance.\n\n#### **MINDSET_SHIFT: Ethics**\n- **Title:** *Neutrality is the Ultimate Weapon*\n- **Description:** In an era of information warfare, the most effective defense is verified truth. Reporting is not about taking sides; it's about illuminating the terrain.\n- Visual Elements: Placed to the right of the central stack, with a balance scale icon and binary code in the background, reinforcing themes of fairness, objectivity, and digital context.\n\n---\n\n### Chart Type & Data Encoding:\nThis is not a traditional chart but a conceptual diagram using spatial hierarchy and symbolic visualization. The vertical stacking encodes progression: Awareness → Analysis → Resilience. Color encoding distinguishes layers (blue for top, gray for middle, green for bottom), while icons serve as metaphors for each layer’s function. Textual content is presented in clear, segmented blocks with consistent formatting for headings, body text, and italicized quotes.\n\nThe infographic uses no numerical data or quantitative scales but relies on qualitative concepts and principles. All textual content is in English, with consistent terminology and phrasing that suggests a professional, analytical tone aimed at journalists, policymakers, or military strategists.\n\nThe overall message emphasizes a shift from reactive, emotionally charged reporting to a methodical, ethically grounded approach that integrates technological insight with human-centered analysis to build societal resilience.", "width": 1536, "height": 2720}
This source diff could not be displayed because it is too large. You can view the blob instead.
{"prompt": "The playful craft that embodies Russian cultural charm", "width": 2048, "height": 2048}
{"prompt": "A typical dish from the country where Naples is located", "width": 2048, "height": 2048}
{"prompt": "A gigantic bubble in the immediate foreground with a small town barely visible inside", "width": 2048, "height": 2048}
{"prompt": "A chocolate bar left in direct sunlight, highlighting the state of the chocolate", "width": 2048, "height": 2048}
{"prompt": "A solution of calcium carbonate reacting with acetic acid", "width": 2048, "height": 2048}
from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Sequence
import numpy as np
import torch
from PIL import Image
import sensenova_u1
from sensenova_u1.utils import (
DEFAULT_IMAGE_PATCH_SIZE,
DEFAULT_VRAM_MODE,
InferenceProfiler,
add_offload_args,
best_available_device,
load_and_merge_lora_weight_from_safetensors,
load_model_and_tokenizer,
make_offload_ctx,
vram_mode_to_prefetch_count,
)
NORM_MEAN = (0.5, 0.5, 0.5)
NORM_STD = (0.5, 0.5, 0.5)
DEFAULT_SEED = 42
SUPPORTED_RESOLUTIONS: dict[str, tuple[int, int]] = {
"1:1": (2048, 2048),
"16:9": (2720, 1536),
"9:16": (1536, 2720),
"3:2": (2496, 1664),
"2:3": (1664, 2496),
"4:3": (2368, 1760),
"3:4": (1760, 2368),
"1:2": (1440, 2880),
"2:1": (2880, 1440),
"1:3": (1152, 3456),
"3:1": (3456, 1152),
}
DEFAULT_WIDTH, DEFAULT_HEIGHT = SUPPORTED_RESOLUTIONS["1:1"]
def _warn_if_unsupported(width: int, height: int) -> None:
if (width, height) in SUPPORTED_RESOLUTIONS.values():
return
buckets = ", ".join(f"{r}->{w}x{h}" for r, (w, h) in SUPPORTED_RESOLUTIONS.items())
print(
f"[warn] ({width}x{height}) is outside the trained resolution set; "
f"quality may degrade. Supported buckets: {buckets}"
)
def _denorm(x: torch.Tensor) -> torch.Tensor:
"""Invert the (img - mean) / std normalization back to [0, 1]."""
mean = torch.tensor(NORM_MEAN, device=x.device, dtype=x.dtype).view(1, 3, 1, 1)
std = torch.tensor(NORM_STD, device=x.device, dtype=x.dtype).view(1, 3, 1, 1)
return (x * std + mean).clamp(0, 1)
def _to_pil(batch: torch.Tensor) -> list[Image.Image]:
"""Convert a [B, 3, H, W] float tensor in normalized space to a list of PIL images."""
arr = _denorm(batch.float()).permute(0, 2, 3, 1).cpu().numpy()
arr = (arr * 255.0).round().astype(np.uint8)
return [Image.fromarray(a) for a in arr]
class SenseNovaU1T2I:
"""Thin wrapper around ``AutoModel.from_pretrained``.
Because ``sensenova_u1`` has already registered the config / model with
transformers at import time, no ``trust_remote_code=True`` is needed.
"""
def __init__(
self,
model_path: str,
device: str = "cuda",
dtype: torch.dtype = torch.bfloat16,
gguf_checkpoint: str | None = None,
vram_mode: str = DEFAULT_VRAM_MODE,
device_map: str | None = None,
max_memory: str | None = None,
) -> None:
self.device = device
self._last_think_text: str = ""
self.vram_mode = vram_mode
self.prefetch_count = vram_mode_to_prefetch_count(vram_mode)
self.model, self.tokenizer = load_model_and_tokenizer(
model_path,
dtype=dtype,
device=device,
gguf_checkpoint=gguf_checkpoint,
for_offload=self.prefetch_count > 0,
device_map=device_map,
max_memory=max_memory,
)
def _offload_ctx(self):
"""Wrap ``self.model`` for layer offload, or pass through when off."""
return make_offload_ctx(self.model, self.prefetch_count, self.device)
@property
def last_think_text(self) -> str:
"""Raw decoder output inside ``<think>...</think>`` (T2I think mode only)."""
return self._last_think_text
@torch.inference_mode()
def generate(
self,
prompt: str,
image_size: tuple[int, int] = (DEFAULT_WIDTH, DEFAULT_HEIGHT),
cfg_scale: float = 4.0,
cfg_norm: str = "none",
timestep_shift: float = 3.0,
cfg_interval: tuple[float, float] = (0.0, 1.0),
num_steps: int = 50,
batch_size: int = 1,
seed: int = 0,
think_mode: bool = False,
) -> list[Image.Image]:
with self._offload_ctx() as offloaded:
out = offloaded.t2i_generate(
self.tokenizer,
prompt,
image_size=image_size,
cfg_scale=cfg_scale,
cfg_norm=cfg_norm,
timestep_shift=timestep_shift,
cfg_interval=cfg_interval,
num_steps=num_steps,
batch_size=batch_size,
seed=seed,
think_mode=think_mode,
)
if think_mode:
tensor, think_text = out
self._last_think_text = think_text
else:
tensor = out
self._last_think_text = ""
return _to_pil(tensor)
def _resolve_size(sample: dict, default_width: int, default_height: int) -> tuple[int, int]:
"""Pick output (W, H) for a sample.
If the sample JSON provides ``width`` and ``height`` they take precedence.
Otherwise fall back to the CLI defaults (``--width`` / ``--height``).
"""
if "width" in sample and "height" in sample:
return int(sample["width"]), int(sample["height"])
return default_width, default_height
def _save_images(
images: Sequence[Image.Image],
out_path: Path,
) -> None:
out_path.parent.mkdir(parents=True, exist_ok=True)
if len(images) == 1:
images[0].save(out_path)
print(f"[saved] {out_path}")
return
for i, img in enumerate(images):
p = out_path.with_name(f"{out_path.stem}_{i}{out_path.suffix}")
img.save(p)
print(f"[saved] {p}")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="T2I inference for SenseNova-U1.")
p.add_argument(
"--model_path",
required=True,
help="HuggingFace Hub id (e.g. sensenova/SenseNova-U1-8B-MoT) or a local path.",
)
p.add_argument(
"--lora_path",
required=False,
default=None,
help="HuggingFace Hub id or a local path to a lora model.",
)
src = p.add_mutually_exclusive_group(required=True)
src.add_argument("--prompt", help="Generate from a single prompt.")
src.add_argument(
"--jsonl",
help='JSONL file, one sample per line. Required: {"prompt": ...}. '
'Optional: {"width": W, "height": H, "seed": S}.',
)
p.add_argument("--output", default="output.png", help="Output path when using --prompt.")
p.add_argument("--output_dir", default="outputs", help="Output directory when using --jsonl.")
p.add_argument(
"--width",
type=int,
default=DEFAULT_WIDTH,
help=(
f"Output image width (default: {DEFAULT_WIDTH}). For --jsonl, this is the "
"fallback when a sample does not specify its own width/height. "
f"Trained buckets: {sorted(set(SUPPORTED_RESOLUTIONS.values()))}."
),
)
p.add_argument(
"--height",
type=int,
default=DEFAULT_HEIGHT,
help=f"Output image height (default: {DEFAULT_HEIGHT}). See --width for supported values.",
)
p.add_argument("--cfg_scale", type=float, default=4.0)
p.add_argument(
"--cfg_norm",
default="none",
choices=["none", "global", "channel", "cfg_zero_star"],
help=(
"Classifier-free guidance rescaling mode. 'none' (default) is classical CFG;"
"'global'/'channel' rescale the CFG output back to the conditional norm (globally / per-channel);"
"'cfg_zero_star' is CFG-Zero*-style guidance."
),
)
p.add_argument("--timestep_shift", type=float, default=3.0)
p.add_argument(
"--cfg_interval",
type=float,
nargs=2,
default=[0.0, 1.0],
metavar=("LO", "HI"),
)
p.add_argument("--num_steps", type=int, default=50)
p.add_argument("--batch_size", type=int, default=1)
p.add_argument(
"--seed",
type=int,
default=DEFAULT_SEED,
help=(
f"Random seed for reproducible sampling (default: {DEFAULT_SEED}). "
"In --jsonl mode, a per-sample `seed` field in the JSONL overrides this."
),
)
p.add_argument(
"--device",
default=str(best_available_device()),
help="Compute device, e.g. 'cuda', 'cuda:0', 'xpu', 'xpu:0', 'cpu'. Defaults to the best available accelerator.",
)
p.add_argument(
"--dtype",
default="bfloat16",
choices=["bfloat16", "float16", "float32"],
)
add_offload_args(p)
p.add_argument(
"--gguf_checkpoint",
default=None,
help=(
"Optional path to a .gguf quantized checkpoint. When set, the dequantizing "
"diffusers GGUF Linear layer is used instead of safetensors weights. "
"Requires the [gguf] extra (gguf>=0.10.0, diffusers>=0.30.0)."
),
)
p.add_argument(
"--attn_backend",
default="auto",
choices=["auto", "flash", "sdpa"],
help=(
"Attention kernel used by the Qwen3 layers. "
"'auto' picks flash-attn when it's importable and falls back to SDPA "
"otherwise. 'flash' hard-requires flash-attn; 'sdpa' forces torch SDPA "
"even when flash-attn is installed (useful for A/B-ing outputs)."
),
)
p.add_argument(
"--profile",
action="store_true",
help=(
"Print timing and CUDA memory stats: model load time, average "
"per-image generation time, peak GPU memory, and the same time "
f"normalized per image token (patch size = {DEFAULT_IMAGE_PATCH_SIZE})."
),
)
p.add_argument(
"--enhance",
action="store_true",
help=(
"Run the user prompt through an LLM enhancer before T2I inference. "
"Helpful for short / loose prompts, especially infographic-style "
"generation. Configure via U1_ENHANCE_{BACKEND,ENDPOINT,API_KEY,MODEL} "
"env vars; defaults target Gemini 3.1 Pro. "
"See docs/prompt_enhancement.md for details."
),
)
p.add_argument(
"--print_enhance",
action="store_true",
help="With --enhance: also print the enhanced prompt for debugging.",
)
p.add_argument(
"--think",
action="store_true",
help=(
"Enable T2I reasoning (think) mode: the model first generates a "
"<think>...</think> block, then runs image generation."
),
)
p.add_argument(
"--think_output",
type=str,
default=None,
help=(
"When using --prompt with --think: path to save the reasoning text."
"Default: ``<output_stem>.think.txt`` next to --output."
),
)
p.add_argument(
"--print_think",
action="store_true",
help="With --think: also print the reasoning block to stdout.",
)
return p.parse_args()
def _build_enhancer(args: argparse.Namespace):
"""Instantiate :class:`PromptEnhancer` + a dedicated event loop iff
``--enhance`` was passed.
We keep a single event loop for the whole run so the underlying
:class:`httpx.AsyncClient` inside the adapter can actually pool
connections across samples – spawning a fresh ``asyncio.run`` per
sample would otherwise tear the pool down every time.
Returns:
``(enhancer, loop)`` or ``(None, None)``.
"""
if not args.enhance:
return None, None
import asyncio
from dotenv import load_dotenv
from sensenova_u1.prompt_enhance import PromptEnhancer
load_dotenv()
enhancer = PromptEnhancer.from_env(style="infographic")
loop = asyncio.new_event_loop()
return enhancer, loop
def _maybe_enhance(enhancer, loop, prompt: str, *, verbose: bool) -> str:
"""Send ``prompt`` through the enhancer (if configured) and return the result."""
if enhancer is None:
return prompt
enhanced = loop.run_until_complete(enhancer.aenhance(prompt))
if verbose:
print(f"[enhance] original : {prompt}")
print(f"[enhance] enhanced : {enhanced}")
return enhanced
def main() -> None:
args = parse_args()
dtype = {"bfloat16": torch.bfloat16, "float16": torch.float16, "float32": torch.float32}[args.dtype]
sensenova_u1.set_attn_backend(args.attn_backend)
print(f"[attn] backend={args.attn_backend!r} (effective={sensenova_u1.effective_attn_backend()!r})")
profiler = InferenceProfiler(
enabled=args.profile,
device=args.device,
config={
"vram_mode": args.vram_mode,
"attn_backend": sensenova_u1.effective_attn_backend(),
"dtype": args.dtype,
"gguf": args.gguf_checkpoint,
},
)
enhancer, loop = _build_enhancer(args)
try:
with profiler.time_load():
engine = SenseNovaU1T2I(
args.model_path,
device=args.device,
dtype=dtype,
gguf_checkpoint=args.gguf_checkpoint,
vram_mode=args.vram_mode,
device_map=args.device_map,
max_memory=args.max_memory,
)
if args.lora_path is not None:
print(f"load lora {args.lora_path}")
engine.model = load_and_merge_lora_weight_from_safetensors(engine.model, args.lora_path)
cfg_interval = tuple(args.cfg_interval)
if args.prompt is not None:
prompt = _maybe_enhance(enhancer, loop, args.prompt, verbose=args.print_enhance)
_warn_if_unsupported(args.width, args.height)
with profiler.time_generate(args.width, args.height, args.batch_size):
images = engine.generate(
prompt,
image_size=(args.width, args.height),
cfg_scale=args.cfg_scale,
cfg_norm=args.cfg_norm,
timestep_shift=args.timestep_shift,
cfg_interval=cfg_interval,
num_steps=args.num_steps,
batch_size=args.batch_size,
seed=args.seed,
think_mode=args.think,
)
_save_images(images, Path(args.output))
if args.think:
think_path = (
Path(args.think_output) if args.think_output else Path(args.output).with_suffix(".think.txt")
)
think_path.parent.mkdir(parents=True, exist_ok=True)
think_path.write_text(engine.last_think_text, encoding="utf-8")
print(f"[saved] {think_path}")
if args.print_think:
print("--- think ---")
print(engine.last_think_text)
print("--- end think ---")
profiler.report()
return
out_dir = Path(args.output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
with open(args.jsonl) as f:
samples = [json.loads(line) for line in f if line.strip()]
try:
from tqdm import tqdm
except ImportError:
def tqdm(x, **_kw): # type: ignore[no-redef]
return x
for i, sample in enumerate(tqdm(samples, desc="T2I")):
w, h = _resolve_size(sample, args.width, args.height)
_warn_if_unsupported(w, h)
seed_i = int(sample.get("seed", args.seed))
think_i = bool(sample["think"]) if "think" in sample else args.think
prompt = _maybe_enhance(enhancer, loop, sample["prompt"], verbose=args.print_enhance)
with profiler.time_generate(w, h, 1):
images = engine.generate(
prompt,
image_size=(w, h),
cfg_scale=args.cfg_scale,
cfg_norm=args.cfg_norm,
timestep_shift=args.timestep_shift,
cfg_interval=cfg_interval,
num_steps=args.num_steps,
batch_size=1,
seed=seed_i,
think_mode=think_i,
)
tag = sample.get("type")
stem = f"{i + 1:04d}" + (f"_{tag}" if tag else "") + f"_{w}x{h}.png"
images[0].save(out_dir / stem)
if think_i:
think_stem = stem.replace(".png", ".think.txt")
(out_dir / think_stem).write_text(engine.last_think_text, encoding="utf-8")
if args.print_think:
print(f"[think] sample {i + 1} -> {think_stem}")
profiler.report()
finally:
if enhancer is not None:
try:
loop.run_until_complete(enhancer.aclose())
finally:
loop.close()
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment