Vertex AI Imagen for Automated Image Generation
Why Vertex AI Imagen
For production image generation at scale, I have found Vertex AI Imagen to be the most reliable option. It offers consistent quality, reasonable pricing, strong safety filters, and the enterprise reliability you expect from Google Cloud. I use it for generating blog thumbnails, social media graphics, and product imagery across multiple client projects.
Setting Up the Client
from google.cloud import aiplatform
from vertexai.preview.vision_models import ImageGenerationModel
import vertexai
def setup_vertex():
vertexai.init(
project="your-project-id",
location="us-central1"
)
return ImageGenerationModel.from_pretrained("imagen-3.0-generate-001")
model = setup_vertex()
def generate_image(
prompt: str,
negative_prompt: str = "",
aspect_ratio: str = "1:1",
num_images: int = 1
) -> list:
response = model.generate_images(
prompt=prompt,
negative_prompt=negative_prompt,
number_of_images=num_images,
aspect_ratio=aspect_ratio,
safety_filter_level="block_some",
person_generation="allow_adult"
)
return response.images
Prompt Optimization for Imagen
Every image generation model has its own prompt language quirks. Imagen responds well to specific patterns that I have identified through extensive testing.
class ImagenPromptOptimizer:
QUALITY_SUFFIXES = [
"professional photography",
"high quality",
"sharp focus",
"well-lit"
]
STYLE_MAP = {
"photo": "photorealistic, professional photograph, DSLR quality",
"illustration": "digital illustration, clean lines, modern style",
"minimal": "minimalist design, clean background, simple composition",
"3d": "3D rendered, ray tracing, realistic lighting, studio quality",
"watercolor": "watercolor painting, soft edges, artistic, hand-painted feel"
}
def optimize(self, subject: str, style: str = "photo", extras: str = "") -> str:
style_desc = self.STYLE_MAP.get(style, style)
quality = ", ".join(self.QUALITY_SUFFIXES[:2])
prompt = f"{subject}, {style_desc}, {quality}"
if extras:
prompt += f", {extras}"
return prompt
def build_negative(self, style: str = "photo") -> str:
base_negatives = "blurry, low quality, distorted, watermark, text overlay"
if style == "photo":
return f"{base_negatives}, cartoon, illustration, painting"
elif style == "illustration":
return f"{base_negatives}, photorealistic, photograph"
return base_negatives
Batch Generation Pipeline
For generating hundreds of images, I built a pipeline that handles queuing, generation, quality checks, and storage.
import asyncio
from google.cloud import storage
from dataclasses import dataclass
@dataclass
class ImageRequest:
id: str
subject: str
style: str
aspect_ratio: str
use_case: str
class ImagePipeline:
def __init__(self, bucket_name: str):
self.model = setup_vertex()
self.optimizer = ImagenPromptOptimizer()
self.storage = storage.Client()
self.bucket = self.storage.bucket(bucket_name)
def process_batch(self, requests: list[ImageRequest]) -> list[dict]:
results = []
for req in requests:
prompt = self.optimizer.optimize(req.subject, req.style)
negative = self.optimizer.build_negative(req.style)
try:
images = generate_image(
prompt=prompt,
negative_prompt=negative,
aspect_ratio=req.aspect_ratio,
num_images=2 # Generate 2, pick the best
)
best = self._select_best(images)
url = self._upload(best, req.id)
results.append({
"id": req.id,
"status": "success",
"url": url,
"prompt": prompt
})
except Exception as e:
results.append({
"id": req.id,
"status": "failed",
"error": str(e)
})
return results
def _upload(self, image, request_id: str) -> str:
blob = self.bucket.blob(f"generated/{request_id}.png")
image.save(f"/tmp/{request_id}.png")
blob.upload_from_filename(f"/tmp/{request_id}.png")
return blob.public_url
Quality Scoring with Vision Models
When generating multiple candidates per request, I use a vision model to score each image and select the best one.
from vertexai.generative_models import GenerativeModel, Part
import base64
def score_image(image_bytes: bytes, intended_subject: str) -> float:
vision_model = GenerativeModel("gemini-2.0-flash")
image_part = Part.from_data(image_bytes, mime_type="image/png")
response = vision_model.generate_content([
image_part,
f"""Score this image on a scale of 1-10 for each criterion:
1. Visual quality (sharpness, lighting, composition)
2. Subject accuracy (does it depict: {intended_subject})
3. Aesthetic appeal (is it visually pleasing)
4. Professional quality (suitable for commercial use)
Return only a JSON object with scores and an overall average."""
])
scores = json.loads(response.text)
return scores.get("overall", 5.0)
Handling Safety Filters
Imagen's safety filters are aggressive, which is generally good but can block legitimate content. When a prompt triggers a filter, I have a fallback strategy:
def generate_with_fallback(prompt: str, **kwargs) -> list:
try:
return generate_image(prompt, **kwargs)
except Exception as e:
if "safety" in str(e).lower():
# Rephrase the prompt to be more generic
sanitized = sanitize_prompt(prompt)
return generate_image(sanitized, **kwargs)
raise
def sanitize_prompt(prompt: str) -> str:
"""Remove potentially triggering terms while keeping the core request."""
model = GenerativeModel("gemini-2.0-flash")
response = model.generate_content(
f"""Rephrase this image generation prompt to be safe and appropriate
while keeping the core visual concept intact: {prompt}"""
)
return response.text
Cost Tracking
Imagen pricing is per image generated. At scale, tracking costs per project and per use case is essential.
class CostTracker:
PRICE_PER_IMAGE = 0.04 # Approximate, check current pricing
def __init__(self):
self.log = []
def record(self, request_id: str, num_images: int, project: str):
self.log.append({
"request_id": request_id,
"num_images": num_images,
"cost": num_images * self.PRICE_PER_IMAGE,
"project": project,
"timestamp": datetime.utcnow().isoformat()
})
def report(self, project: str = None) -> dict:
entries = self.log if not project else [e for e in self.log if e["project"] == project]
return {
"total_images": sum(e["num_images"] for e in entries),
"total_cost": sum(e["cost"] for e in entries)
}
Lessons Learned
- Always generate 2-4 candidates and select the best. The quality variance between generations is significant.
- Negative prompts matter more than you think. Spending time on negative prompts prevents the most common quality issues.
- Aspect ratio should match your target platform exactly. Do not generate square and crop later.
- Cache prompts and results. If you need the same type of image again, check the cache first.
Vertex AI Imagen is my default choice for production image generation. The API reliability, consistent quality, and Google Cloud integration make it ideal for automated pipelines that need to produce hundreds of images without manual intervention.