diff --git a/apps/docs/components/ui/icon-mapping.ts b/apps/docs/components/ui/icon-mapping.ts index 312eb8893ae..39c22dc25ff 100644 --- a/apps/docs/components/ui/icon-mapping.ts +++ b/apps/docs/components/ui/icon-mapping.ts @@ -264,6 +264,8 @@ export const blockTypeToIconMap: Record = { extend_v2: ExtendIcon, fathom: FathomIcon, file: DocumentIcon, + file_v2: DocumentIcon, + file_v3: DocumentIcon, file_v4: DocumentIcon, findymail: FindymailIcon, firecrawl: FirecrawlIcon, @@ -309,6 +311,7 @@ export const blockTypeToIconMap: Record = { iam: IAMIcon, identity_center: IdentityCenterIcon, image_generator: ImageIcon, + image_generator_v2: ImageIcon, imap: MailServerIcon, incidentio: IncidentioIcon, infisical: InfisicalIcon, @@ -341,6 +344,7 @@ export const blockTypeToIconMap: Record = { microsoft_planner: MicrosoftPlannerIcon, microsoft_teams: MicrosoftTeamsIcon, mistral_parse: MistralIcon, + mistral_parse_v2: MistralIcon, mistral_parse_v3: MistralIcon, monday: MondayIcon, mongodb: MongoDBIcon, @@ -421,6 +425,7 @@ export const blockTypeToIconMap: Record = { vercel: VercelIcon, video_generator: VideoIcon, video_generator_v2: VideoIcon, + video_generator_v3: VideoIcon, vision: EyeIcon, vision_v2: EyeIcon, wealthbox: WealthboxIcon, diff --git a/apps/docs/content/docs/en/tools/image_generator.mdx b/apps/docs/content/docs/en/tools/image_generator.mdx index 7e1f25bc642..f44eba6f1f6 100644 --- a/apps/docs/content/docs/en/tools/image_generator.mdx +++ b/apps/docs/content/docs/en/tools/image_generator.mdx @@ -6,63 +6,75 @@ description: Generate images import { BlockInfoCard } from "@/components/ui/block-info-card" {/* MANUAL-CONTENT-START:intro */} -[DALL-E](https://openai.com/dall-e-3) is OpenAI's advanced AI system designed to generate realistic images and art from natural language descriptions. As a state-of-the-art image generation model, DALL-E can create detailed and creative visuals based on text prompts, allowing users to transform their ideas into visual content without requiring artistic skills. +The Image Generator block creates images from text prompts using leading image generation providers. Choose OpenAI for GPT Image models, Google Gemini for Nano Banana models, or Fal.ai for a multi-model catalog that includes Nano Banana, GPT Image, Seedream, FLUX, and Grok Imagine. -With DALL-E, you can: +Use it to: -- **Generate realistic images**: Create photorealistic visuals from textual descriptions -- **Design conceptual art**: Transform abstract ideas into visual representations -- **Produce variations**: Generate multiple interpretations of the same prompt -- **Control artistic style**: Specify artistic styles, mediums, and visual aesthetics -- **Create detailed scenes**: Describe complex scenes with multiple elements and relationships -- **Visualize products**: Generate product mockups and design concepts -- **Illustrate ideas**: Turn written concepts into visual illustrations +- **Generate production images**: Create polished visuals from workflow prompts +- **Choose the right provider**: Route requests to OpenAI, Gemini, or Fal.ai based on model availability and cost +- **Control output shape**: Set provider-specific size, aspect ratio, resolution, quality, background, and output format options +- **Use advanced Fal.ai features**: Configure safety tolerance, safety checking, web search grounding, seeds, and thinking level when supported +- **Pass generated files downstream**: Use the returned image file or URL in later workflow steps -In Sim, the DALL-E integration enables your agents to generate images programmatically as part of their workflows. This allows for powerful automation scenarios such as content creation, visual design, and creative ideation. Your agents can formulate detailed prompts, generate corresponding images, and incorporate these visuals into their outputs or downstream processes. This integration bridges the gap between natural language processing and visual content creation, enabling your agents to communicate not just through text but also through compelling imagery. By connecting Sim with DALL-E, you can create agents that produce visual content on demand, illustrate concepts, generate design assets, and enhance user experiences with rich visual elements - all without requiring human intervention in the creative process. +In Sim, the Image Generator block lets agents create visual assets programmatically as part of automated workflows. This is useful for content creation, design mockups, product visuals, creative ideation, and any flow that needs generated imagery without a manual handoff. {/* MANUAL-CONTENT-END */} ## Usage Instructions -Integrate Image Generator into the workflow. Can generate images using DALL-E 3, GPT Image 1, or GPT Image 2. +Generate images using OpenAI GPT Image, Google Nano Banana, or Fal.ai image models. ## Tools -### `openai_image` +### `image_generate` -Generate images using OpenAI +Generate images with OpenAI GPT Image, Google Nano Banana, or Fal.ai image models #### Input | Parameter | Type | Required | Description | | --------- | ---- | -------- | ----------- | -| `model` | string | Yes | The model to use \(dall-e-3, gpt-image-1, or gpt-image-2\) | -| `prompt` | string | Yes | A text description of the desired image | -| `size` | string | Yes | Image size. dall-e-3: 1024x1024, 1024x1792, or 1792x1024. gpt-image-1: auto, 1024x1024, 1536x1024, or 1024x1536. gpt-image-2: auto or any size with edges ≤3840px and multiples of 16 \(e.g. 1024x1024, 1536x1024, 1024x1536, 2560x1440, 3840x2160\). | -| `quality` | string | No | Quality. dall-e-3: standard\|hd. gpt-image-1/gpt-image-2: auto\|low\|medium\|high | -| `style` | string | No | The style of the image \(vivid or natural\), only for dall-e-3 | -| `background` | string | No | Background. gpt-image-1: auto\|transparent\|opaque. gpt-image-2: auto\|opaque \(transparent not supported\) | -| `outputFormat` | string | No | Output image format \(png, jpeg, webp\), only for gpt-image-1 and gpt-image-2 | -| `moderation` | string | No | Moderation level \(auto or low\), only for gpt-image-1 and gpt-image-2 | -| `n` | number | No | The number of images to generate \(1-10\) | -| `apiKey` | string | Yes | Your OpenAI API key | +| `provider` | string | Yes | Image generation provider: openai, gemini, or falai | +| `apiKey` | string | Yes | Provider API key | +| `model` | string | Yes | Provider model ID, such as gpt-image-1.5, gemini-3.1-flash-image-preview, or nano-banana-2 | +| `prompt` | string | Yes | Text prompt describing the image to generate | +| `size` | string | No | Provider-specific image size | +| `aspectRatio` | string | No | Aspect ratio, such as auto, 1:1, 16:9, or 9:16 | +| `resolution` | string | No | Provider-specific image resolution, such as 1K, 2K, 4K, 1k, or 2k | +| `quality` | string | No | Provider-specific image quality | +| `background` | string | No | Background setting when supported | +| `outputFormat` | string | No | Output image format: png, jpeg, or webp where supported | +| `moderation` | string | No | OpenAI moderation level: auto or low | +| `safetyTolerance` | string | No | Fal.ai safety tolerance when supported | +| `numImages` | number | No | Number of images to generate, subject to provider limits | +| `seed` | number | No | Random seed when supported | +| `enableSafetyChecker` | boolean | No | Enable the Fal.ai safety checker when supported | +| `enableWebSearch` | boolean | No | Enable web search grounding when supported by the selected Fal.ai model | +| `thinkingLevel` | string | No | Fal.ai thinking level when supported: minimal or high | #### Output | Parameter | Type | Description | | --------- | ---- | ----------- | -| `success` | boolean | Operation success status | -| `output` | object | Generated image data | -| ↳ `content` | string | Image URL or identifier | -| ↳ `image` | string | Base64 encoded image data | -| ↳ `metadata` | object | Image generation metadata | -| ↳ `model` | string | Model used for image generation | +| `content` | string | Generated image URL or identifier | +| `image` | file | Generated image file | +| `imageUrl` | string | Generated image URL | +| `provider` | string | Provider used | +| `model` | string | Model used | +| `metadata` | json | Generation metadata | +| ↳ `provider` | string | Provider used | +| ↳ `model` | string | Model used | +| ↳ `description` | string | Provider description | +| ↳ `revisedPrompt` | string | Revised prompt | +| ↳ `seed` | number | Seed used for generation | +| ↳ `jobId` | string | Provider job ID | +| ↳ `contentType` | string | Image MIME type | diff --git a/apps/docs/content/docs/en/tools/video_generator.mdx b/apps/docs/content/docs/en/tools/video_generator.mdx index a33492c95b9..b2e7c9fce54 100644 --- a/apps/docs/content/docs/en/tools/video_generator.mdx +++ b/apps/docs/content/docs/en/tools/video_generator.mdx @@ -6,29 +6,27 @@ description: Generate videos from text using AI import { BlockInfoCard } from "@/components/ui/block-info-card" {/* MANUAL-CONTENT-START:intro */} -Create videos from text prompts using cutting-edge AI models from top providers. Sim's Video Generator brings powerful, creative video synthesis capabilities to your workflow—supporting diverse models, aspect ratios, resolutions, camera controls, native audio, and advanced style and consistency features. +Create videos from text prompts using leading AI video providers. Sim's Video Generator supports direct provider integrations for Runway, Google Veo, Luma, and MiniMax, plus a Fal.ai multi-model provider for newer and specialized models. **Supported Providers & Models:** -- **[Runway Gen-4](https://research.runwayml.com/gen2/)** (Runway ML): - Runway is a pioneer in text-to-video generation, known for powerful models like Gen-2, Gen-3, and Gen-4. The latest [Gen-4](https://research.runwayml.com/gen2/) model (and Gen-4 Turbo for faster results) supports more realistic motion, greater world consistency, and visual references for character, object, style, and location. Supports 16:9, 9:16, and 1:1 aspect ratios, 5–10 second durations, up to 4K resolution, style presets, and direct upload of reference images for consistent generations. Runway powers creative tools for filmmakers, studios, and content creators worldwide. +- **[Runway Gen-4](https://docs.dev.runwayml.com/)**: Generate image-to-video clips with a required reference image, 5 or 10 second durations, and landscape, portrait, or square output. -- **[Google Veo](https://deepmind.google/technologies/veo/)** (Google DeepMind): - [Veo](https://deepmind.google/technologies/veo/) is Google’s next-generation video generation model, offering high-quality, native-audio videos up to 1080p and 16 seconds. Supports advanced motion, cinematic effects, and nuanced text understanding. Veo can generate videos with built-in sound—activating native audio as well as silent clips. Options include 16:9 aspect, variable duration, different models (veo-3, veo-3.1), and prompt-based controls. Ideal for storytelling, advertising, research, and ideation. +- **[Google Veo](https://ai.google.dev/gemini-api/docs/video)**: Generate text-to-video clips with Veo 3 and Veo 3.1 models, portrait or landscape aspect ratios, 4, 6, or 8 second durations, and 720p or 1080p output. -- **[Luma Dream Machine](https://lumalabs.ai/dream-machine)** (Luma AI): - [Dream Machine](https://lumalabs.ai/dream-machine) delivers jaw-droppingly realistic and fluid video from text. It incorporates advanced camera control, cinematography prompts, and supports both ray-1 and ray-2 models. Dream Machine supports precise aspect ratios (16:9, 9:16, 1:1), variable durations, and the specification of camera paths for intricate visual direction. Luma is renowned for breakthrough visual fidelity and is backed by leading AI vision researchers. +- **[Luma Dream Machine](https://docs.lumalabs.ai/docs/video-generation)**: Generate Ray 2 videos with 5 or 9 second durations, common aspect ratios, multiple resolutions, and optional camera concept controls. -- **[MiniMax Hailuo-02](https://minimax.chat/)** (via [Fal.ai](https://fal.ai/)): - [MiniMax Hailuo-02](https://minimax.chat/) is a sophisticated Chinese generative video model, available globally through [Fal.ai](https://fal.ai/). Generate videos up to 16 seconds in landscape or portrait format, with options for prompt optimization to improve clarity and creativity. Pro and standard endpoints available, supporting high resolutions (up to 1920×1080). Well-suited for creative projects needing prompt translation and optimization, commercial storytelling, and rapid prototyping of visual ideas. +- **[MiniMax Hailuo](https://platform.minimax.io/docs/api-reference/video-generation-t2v)**: Generate Hailuo 2.3 or Hailuo-02 videos through MiniMax's platform API, with standard or pro quality endpoints and prompt optimization. + +- **[Fal.ai Multi-Model](https://fal.ai/docs/model-api-reference/video-generation-api/overview)**: Access Veo 3.1, Sora 2, Seedance 2.0, Kling 3.0 and O3, MiniMax Hailuo 2.3, WAN 2.2, LTX 2.3, and previously supported Fal.ai models from one provider option. **How to Choose:** -Pick your provider and model based on your needs for quality, speed, duration, audio, cost, and unique features. Runway and Veo offer world-leading realism and cinematic capabilities; Luma excels in fluid motion and camera control; MiniMax is ideal for Chinese-language prompts and offers fast, affordable access. Consider reference support, style presets, audio requirements, and pricing when selecting your tool. +Pick the provider and model based on quality, speed, duration, audio support, reference image needs, resolution, and cost. Runway is best when you have a visual reference, Veo and Luma are strong general text-to-video options, MiniMax offers a direct Hailuo API path, and Fal.ai is the best choice when you need access to the broadest model catalog. For more details on features, restrictions, pricing, and model advances, see each provider’s official documentation above. {/* MANUAL-CONTENT-END */} @@ -36,7 +34,7 @@ For more details on features, restrictions, pricing, and model advances, see eac ## Usage Instructions -Generate high-quality videos from text prompts using leading AI providers. Supports multiple models, aspect ratios, resolutions, and provider-specific features like world consistency, camera controls, and audio generation. +Generate high-quality videos from text prompts using leading AI providers. Supports Runway, Google Veo, Luma, MiniMax, and Fal.ai multi-model generation with provider-specific durations, aspect ratios, resolutions, prompt optimization, and native audio controls. @@ -141,9 +139,10 @@ Generate videos using MiniMax Hailuo through MiniMax Platform API with advanced | --------- | ---- | -------- | ----------- | | `provider` | string | Yes | Video provider \(minimax\) | | `apiKey` | string | Yes | MiniMax API key from platform.minimax.io | -| `model` | string | No | MiniMax model: hailuo-02 \(default\) | +| `model` | string | No | MiniMax model: hailuo-2.3 \(default\) or hailuo-02 | | `prompt` | string | Yes | Text prompt describing the video to generate | | `duration` | number | No | Video duration in seconds \(6 or 10, default: 6\) | +| `endpoint` | string | No | Quality endpoint: standard \(768P\) or pro \(1080P for 6s videos\) | | `promptOptimizer` | boolean | No | Enable prompt optimization for better results \(default: true\) | #### Output @@ -161,7 +160,7 @@ Generate videos using MiniMax Hailuo through MiniMax Platform API with advanced ### `video_falai` -Generate videos using Fal.ai platform with access to multiple models including Veo 3.1, Sora 2, Kling 2.5, MiniMax Hailuo, and more +Generate videos using Fal.ai with access to Veo 3.1, Sora 2, Seedance 2.0, Kling 3.0, MiniMax Hailuo 2.3, WAN 2.2, LTX 2.3, and previously supported models #### Input @@ -169,12 +168,13 @@ Generate videos using Fal.ai platform with access to multiple models including V | --------- | ---- | -------- | ----------- | | `provider` | string | Yes | Video provider \(falai\) | | `apiKey` | string | Yes | Fal.ai API key | -| `model` | string | Yes | Fal.ai model: veo-3.1 \(Google Veo 3.1\), sora-2 \(OpenAI Sora 2\), kling-2.5-turbo-pro \(Kling 2.5 Turbo Pro\), kling-2.1-pro \(Kling 2.1 Master\), minimax-hailuo-2.3-pro \(MiniMax Hailuo Pro\), minimax-hailuo-2.3-standard \(MiniMax Hailuo Standard\), wan-2.1 \(WAN T2V\), ltxv-0.9.8 \(LTXV 13B\) | +| `model` | string | Yes | Fal.ai model: veo-3.1, veo-3.1-fast, sora-2, sora-2-pro, seedance-2.0, seedance-2.0-fast, kling-v3-pro, kling-v3-4k, kling-o3-pro, kling-o3-4k, minimax-hailuo-2.3-pro, minimax-hailuo-2.3-standard, wan-2.2-a14b-turbo, ltx-2.3, ltx-2.3-fast, plus previously supported model IDs | | `prompt` | string | Yes | Text prompt describing the video to generate | | `duration` | number | No | Video duration in seconds \(varies by model\) | | `aspectRatio` | string | No | Aspect ratio \(varies by model\): 16:9, 9:16, 1:1 | -| `resolution` | string | No | Video resolution \(varies by model\): 540p, 720p, 1080p | +| `resolution` | string | No | Video resolution \(varies by model\): 480p, 580p, 720p, 1080p, true_1080p, 1440p, 2160p, 4k | | `promptOptimizer` | boolean | No | Enable prompt optimization for MiniMax models \(default: true\) | +| `generateAudio` | boolean | No | Generate native audio when supported by the selected Fal.ai model | #### Output diff --git a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts index ab6f6b1831c..5fec793c7e0 100644 --- a/apps/sim/app/(landing)/integrations/data/icon-mapping.ts +++ b/apps/sim/app/(landing)/integrations/data/icon-mapping.ts @@ -298,7 +298,7 @@ export const blockTypeToIconMap: Record = { hunter: HunterIOIcon, iam: IAMIcon, identity_center: IdentityCenterIcon, - image_generator: ImageIcon, + image_generator_v2: ImageIcon, imap: MailServerIcon, incidentio: IncidentioIcon, infisical: InfisicalIcon, @@ -398,7 +398,7 @@ export const blockTypeToIconMap: Record = { typeform: TypeformIcon, upstash: UpstashIcon, vercel: VercelIcon, - video_generator_v2: VideoIcon, + video_generator_v3: VideoIcon, vision_v2: EyeIcon, wealthbox: WealthboxIcon, webflow: WebflowIcon, diff --git a/apps/sim/app/(landing)/integrations/data/integrations.json b/apps/sim/app/(landing)/integrations/data/integrations.json index b3d8b3bc4fc..aaa3df9a1fb 100644 --- a/apps/sim/app/(landing)/integrations/data/integrations.json +++ b/apps/sim/app/(landing)/integrations/data/integrations.json @@ -6642,11 +6642,11 @@ "tags": ["enrichment", "sales-engagement"] }, { - "type": "image_generator", + "type": "image_generator_v2", "slug": "image-generator", "name": "Image Generator", "description": "Generate images", - "longDescription": "Integrate Image Generator into the workflow. Can generate images using DALL-E 3, GPT Image 1, or GPT Image 2.", + "longDescription": "Generate images using OpenAI GPT Image, Google Nano Banana, or Fal.ai image models.", "bgColor": "#4D5FFF", "iconName": "ImageIcon", "docsUrl": "https://docs.sim.ai/tools/image_generator", @@ -14015,14 +14015,14 @@ "tags": ["cloud", "ci-cd"] }, { - "type": "video_generator_v2", + "type": "video_generator_v3", "slug": "video-generator", "name": "Video Generator", "description": "Generate videos from text using AI", - "longDescription": "Generate high-quality videos from text prompts using leading AI providers. Supports multiple models, aspect ratios, resolutions, and provider-specific features like world consistency, camera controls, and audio generation.", + "longDescription": "Generate high-quality videos from text prompts using leading AI providers. Supports Runway, Google Veo, Luma, MiniMax, and Fal.ai multi-model generation with provider-specific durations, aspect ratios, resolutions, prompt optimization, and native audio controls.", "bgColor": "#181C1E", "iconName": "VideoIcon", - "docsUrl": "https://docs.sim.ai/tools/video-generator", + "docsUrl": "https://docs.sim.ai/tools/video_generator", "operations": [], "operationCount": 0, "triggers": [], diff --git a/apps/sim/app/api/tools/image/route.ts b/apps/sim/app/api/tools/image/route.ts index 8ea4af44d70..d48e5dffd80 100644 --- a/apps/sim/app/api/tools/image/route.ts +++ b/apps/sim/app/api/tools/image/route.ts @@ -1,18 +1,140 @@ import { createLogger } from '@sim/logger' -import { toError } from '@sim/utils/errors' +import { getErrorMessage, toError } from '@sim/utils/errors' +import { sleep } from '@sim/utils/helpers' import { type NextRequest, NextResponse } from 'next/server' -import { imageProxyQuerySchema } from '@/lib/api/contracts/tools/media/image' -import { getValidationErrorMessage, searchParamsToObject } from '@/lib/api/server/validation' +import { + type ImageToolBody, + type imageProviders, + imageProxyQuerySchema, + imageToolContract, +} from '@/lib/api/contracts/tools/media/image' +import { + getValidationErrorMessage, + parseRequest, + searchParamsToObject, + validationErrorResponse, +} from '@/lib/api/server' import { checkInternalAuth } from '@/lib/auth/hybrid' +import { getMaxExecutionTimeout } from '@/lib/core/execution-limits' import { secureFetchWithPinnedIP, validateUrlWithDNS, } from '@/lib/core/security/input-validation.server' import { generateRequestId } from '@/lib/core/utils/request' +import { getBaseUrl } from '@/lib/core/utils/urls' import { withRouteHandler } from '@/lib/core/utils/with-route-handler' const logger = createLogger('ImageProxyAPI') +export const dynamic = 'force-dynamic' +export const maxDuration = 600 + +type ImageProvider = (typeof imageProviders)[number] + +interface GeneratedImageResult { + buffer: Buffer + contentType: string + fileName: string + provider: ImageProvider + model: string + sourceUrl?: string + description?: string + revisedPrompt?: string + seed?: number + jobId?: string +} + +interface StoredImageResponse { + content: string + imageUrl: string + imageFile?: unknown + fileName: string + contentType: string + provider: ImageProvider + model: string + metadata: { + provider: ImageProvider + model: string + description?: string + revisedPrompt?: string + seed?: number + jobId?: string + contentType: string + } +} + +export const POST = withRouteHandler(async (request: NextRequest) => { + const requestId = generateRequestId() + logger.info(`[${requestId}] Image generation request started`) + + try { + const authResult = await checkInternalAuth(request, { requireWorkflowId: false }) + if (!authResult.success || !authResult.userId) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }) + } + + const parsed = await parseRequest( + imageToolContract, + request, + {}, + { + validationErrorResponse: (error) => { + logger.warn(`[${requestId}] Invalid image generation request:`, error.issues) + return validationErrorResponse( + error, + getValidationErrorMessage(error, 'Invalid request data') + ) + }, + } + ) + if (!parsed.success) return parsed.response + + const body = parsed.data.body + const provider = body.provider as ImageProvider + const { apiKey, model, prompt } = body + + if (prompt.length < 3 || prompt.length > 4000) { + return NextResponse.json( + { error: 'Prompt must be between 3 and 4000 characters' }, + { status: 400 } + ) + } + + logger.info(`[${requestId}] Generating image with ${provider}, model: ${model || 'default'}`) + + let imageResult: GeneratedImageResult + try { + if (provider === 'openai') { + imageResult = await generateWithOpenAI(apiKey, body, requestId, logger) + } else if (provider === 'gemini') { + imageResult = await generateWithGemini(apiKey, body, requestId, logger) + } else if (provider === 'falai') { + imageResult = await generateWithFalAI(apiKey, body, requestId, logger) + } else { + return NextResponse.json({ error: `Unknown provider: ${provider}` }, { status: 400 }) + } + } catch (error) { + logger.error(`[${requestId}] Image generation failed:`, error) + const errorMessage = getErrorMessage(error, 'Image generation failed') + return NextResponse.json({ error: errorMessage }, { status: 500 }) + } + + const storedImage = await storeGeneratedImage(imageResult, body, authResult.userId, requestId) + + logger.info(`[${requestId}] Image generation completed successfully`, { + provider, + model: storedImage.model, + contentType: storedImage.contentType, + }) + + return NextResponse.json(storedImage) + } catch (error) { + logger.error(`[${requestId}] Image generation route error:`, error) + const errorMessage = getErrorMessage(error, 'Unknown error') + return NextResponse.json({ error: errorMessage }, { status: 500 }) + } +}) + /** * Proxy for fetching images * This allows client-side requests to fetch images from various sources while avoiding CORS issues @@ -98,3 +220,743 @@ export const GET = withRouteHandler(async (request: NextRequest) => { }) } }) + +const OPENAI_IMAGE_MODELS = [ + 'gpt-image-2', + 'gpt-image-1.5', + 'gpt-image-1', + 'gpt-image-1-mini', +] as const +const OPENAI_IMAGE_SIZES = ['auto', '1024x1024', '1536x1024', '1024x1536'] as const +const OPENAI_IMAGE_2_SIZES = [...OPENAI_IMAGE_SIZES, '2560x1440', '3840x2160'] as const +const OPENAI_IMAGE_QUALITIES = ['auto', 'low', 'medium', 'high'] as const +const OPENAI_IMAGE_BACKGROUNDS = ['auto', 'transparent', 'opaque'] as const +const IMAGE_OUTPUT_FORMATS = ['png', 'jpeg', 'webp'] as const +const OPENAI_MODERATION_LEVELS = ['auto', 'low'] as const + +const GEMINI_IMAGE_MODELS = [ + 'gemini-3.1-flash-image-preview', + 'gemini-3-pro-image-preview', + 'gemini-2.5-flash-image', +] as const +const GEMINI_BASE_ASPECT_RATIOS = [ + '1:1', + '2:3', + '3:2', + '3:4', + '4:3', + '4:5', + '5:4', + '9:16', + '16:9', + '21:9', +] as const +const GEMINI_EXTREME_ASPECT_RATIOS = ['1:4', '1:8', '4:1', '8:1'] as const +const GEMINI_IMAGE_SIZES = ['512', '1K', '2K', '4K'] as const +const GEMINI_PRO_IMAGE_SIZES = ['1K', '2K', '4K'] as const + +interface FalAIImageModelConfig { + endpoint: string + defaultSize?: string + sizeOptions?: readonly string[] + defaultAspectRatio?: string + aspectRatios?: readonly string[] + defaultResolution?: string + resolutionOptions?: readonly string[] + defaultOutputFormat?: string + outputFormats?: readonly string[] + defaultQuality?: string + qualityOptions?: readonly string[] + defaultBackground?: string + backgroundOptions?: readonly string[] + defaultSafetyTolerance?: string + safetyToleranceOptions?: readonly string[] + maxNumImages?: number + supportsSeed?: boolean + supportsEnableSafetyChecker?: boolean + supportsEnableWebSearch?: boolean + supportsThinkingLevel?: boolean +} + +const FALAI_NANO_BANANA_ASPECT_RATIOS = [ + 'auto', + '21:9', + '16:9', + '3:2', + '4:3', + '5:4', + '1:1', + '4:5', + '3:4', + '2:3', + '9:16', +] as const +const FALAI_EXTREME_ASPECT_RATIOS = ['4:1', '1:4', '8:1', '1:8'] as const +const FALAI_STANDARD_IMAGE_SIZES = [ + 'square_hd', + 'square', + 'portrait_4_3', + 'portrait_16_9', + 'landscape_4_3', + 'landscape_16_9', +] as const +const FALAI_SEEDREAM_IMAGE_SIZES = [...FALAI_STANDARD_IMAGE_SIZES, 'auto_2K', 'auto_4K'] as const + +const FALAI_IMAGE_MODEL_CONFIGS: Record = { + 'nano-banana-2': { + endpoint: 'fal-ai/nano-banana-2', + defaultAspectRatio: 'auto', + aspectRatios: [...FALAI_NANO_BANANA_ASPECT_RATIOS, ...FALAI_EXTREME_ASPECT_RATIOS], + defaultResolution: '1K', + resolutionOptions: ['0.5K', '1K', '2K', '4K'], + defaultOutputFormat: 'png', + outputFormats: IMAGE_OUTPUT_FORMATS, + defaultSafetyTolerance: '4', + safetyToleranceOptions: ['1', '2', '3', '4', '5', '6'], + maxNumImages: 4, + supportsSeed: true, + supportsEnableWebSearch: true, + supportsThinkingLevel: true, + }, + 'nano-banana-pro': { + endpoint: 'fal-ai/nano-banana-pro', + defaultAspectRatio: '1:1', + aspectRatios: FALAI_NANO_BANANA_ASPECT_RATIOS, + defaultResolution: '1K', + resolutionOptions: ['1K', '2K', '4K'], + defaultOutputFormat: 'png', + outputFormats: IMAGE_OUTPUT_FORMATS, + defaultSafetyTolerance: '4', + safetyToleranceOptions: ['1', '2', '3', '4', '5', '6'], + maxNumImages: 4, + supportsSeed: true, + supportsEnableWebSearch: true, + }, + 'nano-banana': { + endpoint: 'fal-ai/nano-banana', + defaultAspectRatio: '1:1', + aspectRatios: FALAI_NANO_BANANA_ASPECT_RATIOS.filter((ratio) => ratio !== 'auto'), + defaultOutputFormat: 'png', + outputFormats: IMAGE_OUTPUT_FORMATS, + defaultSafetyTolerance: '4', + safetyToleranceOptions: ['1', '2', '3', '4', '5', '6'], + maxNumImages: 4, + supportsSeed: true, + }, + 'gpt-image-1.5': { + endpoint: 'fal-ai/gpt-image-1.5', + defaultSize: '1024x1024', + sizeOptions: ['1024x1024', '1536x1024', '1024x1536'], + defaultQuality: 'high', + qualityOptions: ['low', 'medium', 'high'], + defaultBackground: 'auto', + backgroundOptions: OPENAI_IMAGE_BACKGROUNDS, + defaultOutputFormat: 'png', + outputFormats: IMAGE_OUTPUT_FORMATS, + maxNumImages: 4, + }, + 'seedream-v4.5': { + endpoint: 'fal-ai/bytedance/seedream/v4.5/text-to-image', + defaultSize: 'auto_2K', + sizeOptions: FALAI_SEEDREAM_IMAGE_SIZES, + maxNumImages: 6, + supportsSeed: true, + supportsEnableSafetyChecker: true, + }, + 'flux-2-pro': { + endpoint: 'fal-ai/flux-2-pro', + defaultSize: 'landscape_4_3', + sizeOptions: FALAI_STANDARD_IMAGE_SIZES, + defaultOutputFormat: 'jpeg', + outputFormats: ['jpeg', 'png'], + defaultSafetyTolerance: '2', + safetyToleranceOptions: ['1', '2', '3', '4', '5'], + supportsSeed: true, + supportsEnableSafetyChecker: true, + }, + 'grok-imagine-image': { + endpoint: 'xai/grok-imagine-image', + defaultAspectRatio: '1:1', + aspectRatios: [ + '2:1', + '20:9', + '19.5:9', + '16:9', + '4:3', + '3:2', + '1:1', + '2:3', + '3:4', + '9:16', + '9:19.5', + '9:20', + '1:2', + ], + defaultResolution: '1k', + resolutionOptions: ['1k', '2k'], + defaultOutputFormat: 'jpeg', + outputFormats: IMAGE_OUTPUT_FORMATS, + maxNumImages: 4, + }, +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value) +} + +function getStringProperty( + record: Record | undefined, + key: string +): string | undefined { + const value = record?.[key] + return typeof value === 'string' ? value : undefined +} + +function getNumberProperty( + record: Record | undefined, + key: string +): number | undefined { + const value = record?.[key] + return typeof value === 'number' ? value : undefined +} + +function firstRecord(value: unknown): Record | undefined { + return Array.isArray(value) ? value.find(isRecord) : undefined +} + +function pickAllowed( + value: string | undefined, + allowed: readonly string[], + fallback: string +): string { + return value && allowed.includes(value) ? value : fallback +} + +function clampInteger( + value: number | undefined, + min: number, + max: number, + fallback: number +): number { + if (typeof value !== 'number' || !Number.isInteger(value)) return fallback + return Math.min(Math.max(value, min), max) +} + +function getContentTypeForFormat(format: string | undefined): string { + if (format === 'jpeg') return 'image/jpeg' + if (format === 'webp') return 'image/webp' + return 'image/png' +} + +function extensionFromContentType(contentType: string): string { + if (contentType.includes('jpeg') || contentType.includes('jpg')) return 'jpg' + if (contentType.includes('webp')) return 'webp' + return 'png' +} + +async function bufferFromImageUrl(url: string): Promise<{ buffer: Buffer; contentType: string }> { + if (url.startsWith('data:')) { + const match = /^data:([^;]+);base64,(.+)$/u.exec(url) + if (!match) throw new Error('Invalid data URI image response') + return { + contentType: match[1], + buffer: Buffer.from(match[2], 'base64'), + } + } + + const urlValidation = await validateUrlWithDNS(url, 'imageUrl') + if (!urlValidation.isValid || !urlValidation.resolvedIP) { + throw new Error(urlValidation.error || 'Generated image URL failed validation') + } + + const imageResponse = await secureFetchWithPinnedIP(url, urlValidation.resolvedIP, { + method: 'GET', + }) + if (!imageResponse.ok) { + await imageResponse.text().catch(() => {}) + throw new Error(`Failed to download generated image: ${imageResponse.status}`) + } + + const contentType = imageResponse.headers.get('content-type') || 'image/png' + const arrayBuffer = await imageResponse.arrayBuffer() + return { buffer: Buffer.from(arrayBuffer), contentType } +} + +async function generateWithOpenAI( + apiKey: string, + body: ImageToolBody, + requestId: string, + logger: ReturnType +): Promise { + const model = pickAllowed(body.model, OPENAI_IMAGE_MODELS, 'gpt-image-1.5') + const size = + model === 'gpt-image-2' + ? pickAllowed(body.size, OPENAI_IMAGE_2_SIZES, 'auto') + : pickAllowed(body.size, OPENAI_IMAGE_SIZES, 'auto') + const outputFormat = pickAllowed(body.outputFormat, IMAGE_OUTPUT_FORMATS, 'png') + const requestBody: Record = { + model, + prompt: body.prompt, + size, + n: 1, + } + + if (body.quality) { + requestBody.quality = pickAllowed(body.quality, OPENAI_IMAGE_QUALITIES, 'auto') + } + if (body.background) { + requestBody.background = pickAllowed(body.background, OPENAI_IMAGE_BACKGROUNDS, 'auto') + } + if (body.outputFormat) { + requestBody.output_format = outputFormat + } + if (body.moderation) { + requestBody.moderation = pickAllowed(body.moderation, OPENAI_MODERATION_LEVELS, 'auto') + } + + const openaiResponse = await fetch('https://api.openai.com/v1/images/generations', { + method: 'POST', + headers: { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(requestBody), + }) + + if (!openaiResponse.ok) { + const error = await openaiResponse.text() + throw new Error(`OpenAI API error: ${openaiResponse.status} - ${error}`) + } + + const data = (await openaiResponse.json()) as unknown + if (!isRecord(data)) { + throw new Error('Invalid OpenAI image response') + } + + const firstImage = firstRecord(data.data) + const base64Image = getStringProperty(firstImage, 'b64_json') + const imageUrl = getStringProperty(firstImage, 'url') + const revisedPrompt = getStringProperty(firstImage, 'revised_prompt') + let buffer: Buffer + let contentType = getContentTypeForFormat(outputFormat) + + if (base64Image) { + buffer = Buffer.from(base64Image, 'base64') + } else if (imageUrl) { + const downloaded = await bufferFromImageUrl(imageUrl) + buffer = downloaded.buffer + contentType = downloaded.contentType + } else { + logger.error(`[${requestId}] OpenAI response missing image payload`) + throw new Error('No image data found in OpenAI response') + } + + return { + buffer, + contentType, + fileName: `openai-${model}.${extensionFromContentType(contentType)}`, + provider: 'openai', + model, + sourceUrl: imageUrl, + revisedPrompt, + } +} + +async function generateWithGemini( + apiKey: string, + body: ImageToolBody, + requestId: string, + logger: ReturnType +): Promise { + const model = pickAllowed(body.model, GEMINI_IMAGE_MODELS, 'gemini-3.1-flash-image-preview') + const aspectRatios = + model === 'gemini-3.1-flash-image-preview' + ? [...GEMINI_BASE_ASPECT_RATIOS, ...GEMINI_EXTREME_ASPECT_RATIOS] + : GEMINI_BASE_ASPECT_RATIOS + const imageConfig: Record = {} + + if (body.aspectRatio) { + imageConfig.aspectRatio = pickAllowed(body.aspectRatio, aspectRatios, '1:1') + } + + if (model === 'gemini-3.1-flash-image-preview' && body.resolution) { + imageConfig.imageSize = pickAllowed(body.resolution, GEMINI_IMAGE_SIZES, '1K') + } else if (model === 'gemini-3-pro-image-preview' && body.resolution) { + imageConfig.imageSize = pickAllowed(body.resolution, GEMINI_PRO_IMAGE_SIZES, '1K') + } + + const requestBody: Record = { + contents: [ + { + parts: [{ text: body.prompt }], + }, + ], + } + + requestBody.generationConfig = { + responseModalities: ['TEXT', 'IMAGE'], + ...(Object.keys(imageConfig).length > 0 && { imageConfig }), + } + + const geminiResponse = await fetch( + `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`, + { + method: 'POST', + headers: { + 'x-goog-api-key': apiKey, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(requestBody), + } + ) + + if (!geminiResponse.ok) { + const error = await geminiResponse.text() + throw new Error(`Gemini API error: ${geminiResponse.status} - ${error}`) + } + + const data = (await geminiResponse.json()) as unknown + if (!isRecord(data)) { + throw new Error('Invalid Gemini image response') + } + + const candidate = firstRecord(data.candidates) + const content = isRecord(candidate?.content) ? candidate.content : undefined + const parts = Array.isArray(content?.parts) ? content.parts : [] + const textPart = parts.find((part) => isRecord(part) && typeof part.text === 'string') + const imagePart = parts.find((part) => { + if (!isRecord(part)) return false + return isRecord(part.inlineData) || isRecord(part.inline_data) + }) + + if (!isRecord(imagePart)) { + logger.error(`[${requestId}] Gemini response missing image part`) + throw new Error('No image data found in Gemini response') + } + + const inlineData = isRecord(imagePart.inlineData) + ? imagePart.inlineData + : isRecord(imagePart.inline_data) + ? imagePart.inline_data + : undefined + const base64Image = getStringProperty(inlineData, 'data') + const contentType = + getStringProperty(inlineData, 'mimeType') || + getStringProperty(inlineData, 'mime_type') || + 'image/png' + + if (!base64Image) { + throw new Error('Gemini image response missing inline image data') + } + + return { + buffer: Buffer.from(base64Image, 'base64'), + contentType, + fileName: `gemini-${model}.${extensionFromContentType(contentType)}`, + provider: 'gemini', + model, + description: isRecord(textPart) ? getStringProperty(textPart, 'text') : undefined, + } +} + +function buildFalAIQueueUrl(endpoint: string, requestId: string, path: 'status' | 'response') { + return `https://queue.fal.run/${endpoint}/requests/${requestId}/${path}` +} + +function getFalAIErrorMessage(error: unknown): string { + if (typeof error === 'string') return error + if (isRecord(error)) { + return ( + getStringProperty(error, 'message') || + getStringProperty(error, 'detail') || + JSON.stringify(error) + ) + } + return 'Unknown Fal.ai error' +} + +async function generateWithFalAI( + apiKey: string, + body: ImageToolBody, + requestId: string, + logger: ReturnType +): Promise { + const model = body.model || 'nano-banana-2' + const modelConfig = FALAI_IMAGE_MODEL_CONFIGS[model] + if (!modelConfig) { + throw new Error(`Unknown Fal.ai image model: ${model}`) + } + + const requestBody: Record = { + prompt: body.prompt, + sync_mode: false, + } + + if (modelConfig.maxNumImages) { + requestBody.num_images = clampInteger(body.numImages, 1, modelConfig.maxNumImages, 1) + } + if (modelConfig.supportsSeed && body.seed !== undefined) { + requestBody.seed = body.seed + } + if (modelConfig.sizeOptions && modelConfig.defaultSize) { + requestBody.image_size = pickAllowed( + body.size, + modelConfig.sizeOptions, + modelConfig.defaultSize + ) + } + if (modelConfig.aspectRatios && modelConfig.defaultAspectRatio) { + requestBody.aspect_ratio = pickAllowed( + body.aspectRatio, + modelConfig.aspectRatios, + modelConfig.defaultAspectRatio + ) + } + if (modelConfig.resolutionOptions && modelConfig.defaultResolution) { + requestBody.resolution = pickAllowed( + body.resolution, + modelConfig.resolutionOptions, + modelConfig.defaultResolution + ) + } + if (modelConfig.outputFormats && modelConfig.defaultOutputFormat) { + requestBody.output_format = pickAllowed( + body.outputFormat, + modelConfig.outputFormats, + modelConfig.defaultOutputFormat + ) + } + if (modelConfig.qualityOptions && modelConfig.defaultQuality) { + requestBody.quality = pickAllowed( + body.quality, + modelConfig.qualityOptions, + modelConfig.defaultQuality + ) + } + if (modelConfig.backgroundOptions && modelConfig.defaultBackground) { + requestBody.background = pickAllowed( + body.background, + modelConfig.backgroundOptions, + modelConfig.defaultBackground + ) + } + if (modelConfig.safetyToleranceOptions && modelConfig.defaultSafetyTolerance) { + requestBody.safety_tolerance = pickAllowed( + body.safetyTolerance, + modelConfig.safetyToleranceOptions, + modelConfig.defaultSafetyTolerance + ) + } + if (modelConfig.supportsEnableSafetyChecker && body.enableSafetyChecker !== undefined) { + requestBody.enable_safety_checker = body.enableSafetyChecker + } + if (modelConfig.supportsEnableWebSearch && body.enableWebSearch !== undefined) { + requestBody.enable_web_search = body.enableWebSearch + } + if (modelConfig.supportsThinkingLevel && body.thinkingLevel) { + requestBody.thinking_level = pickAllowed(body.thinkingLevel, ['minimal', 'high'], 'minimal') + } + + const createResponse = await fetch(`https://queue.fal.run/${modelConfig.endpoint}`, { + method: 'POST', + headers: { + Authorization: `Key ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(requestBody), + }) + + if (!createResponse.ok) { + const error = await createResponse.text() + throw new Error(`Fal.ai API error: ${createResponse.status} - ${error}`) + } + + const createData = (await createResponse.json()) as unknown + if (!isRecord(createData)) { + throw new Error('Invalid Fal.ai queue response') + } + + const falRequestId = getStringProperty(createData, 'request_id') + if (!falRequestId) { + throw new Error('Fal.ai queue response missing request_id') + } + + const statusUrl = + getStringProperty(createData, 'status_url') || + buildFalAIQueueUrl(modelConfig.endpoint, falRequestId, 'status') + const responseUrl = + getStringProperty(createData, 'response_url') || + buildFalAIQueueUrl(modelConfig.endpoint, falRequestId, 'response') + + logger.info(`[${requestId}] Fal.ai image request created: ${falRequestId}`) + + const pollIntervalMs = 3000 + const maxAttempts = Math.ceil(getMaxExecutionTimeout() / pollIntervalMs) + let attempts = 0 + + while (attempts < maxAttempts) { + await sleep(pollIntervalMs) + + const statusResponse = await fetch(statusUrl, { + headers: { + Authorization: `Key ${apiKey}`, + }, + }) + + if (!statusResponse.ok) { + await statusResponse.text().catch(() => {}) + throw new Error(`Fal.ai status check failed: ${statusResponse.status}`) + } + + const statusData = (await statusResponse.json()) as unknown + if (!isRecord(statusData)) { + throw new Error('Invalid Fal.ai status response') + } + + const status = getStringProperty(statusData, 'status') + if (status === 'COMPLETED') { + const statusError = statusData.error + if (statusError) { + throw new Error(`Fal.ai generation failed: ${getFalAIErrorMessage(statusError)}`) + } + + const resultResponse = await fetch( + getStringProperty(statusData, 'response_url') || responseUrl, + { + headers: { + Authorization: `Key ${apiKey}`, + }, + } + ) + + if (!resultResponse.ok) { + await resultResponse.text().catch(() => {}) + throw new Error(`Failed to fetch Fal.ai result: ${resultResponse.status}`) + } + + const resultData = (await resultResponse.json()) as unknown + if (!isRecord(resultData)) { + throw new Error('Invalid Fal.ai result response') + } + + const firstImage = firstRecord(resultData.images) + const imageUrl = + getStringProperty(firstImage, 'url') || + getStringProperty(firstImage, 'data') || + getStringProperty(firstImage, 'content') + if (!imageUrl) { + throw new Error('No image URL in Fal.ai response') + } + + const downloaded = await bufferFromImageUrl(imageUrl) + const contentType = + getStringProperty(firstImage, 'content_type') || + getStringProperty(firstImage, 'contentType') || + downloaded.contentType + const fileName = + getStringProperty(firstImage, 'file_name') || + getStringProperty(firstImage, 'fileName') || + `falai-${model}.${extensionFromContentType(contentType)}` + + return { + buffer: downloaded.buffer, + contentType, + fileName, + provider: 'falai', + model, + sourceUrl: imageUrl.startsWith('data:') ? undefined : imageUrl, + description: getStringProperty(resultData, 'description'), + revisedPrompt: getStringProperty(resultData, 'revised_prompt'), + seed: getNumberProperty(resultData, 'seed'), + jobId: falRequestId, + } + } + + if (['ERROR', 'FAILED', 'CANCELLED'].includes(status || '')) { + throw new Error(`Fal.ai generation failed: ${getFalAIErrorMessage(statusData.error)}`) + } + + attempts += 1 + } + + throw new Error('Fal.ai image generation timed out') +} + +async function storeGeneratedImage( + imageResult: GeneratedImageResult, + body: ImageToolBody, + userId: string, + requestId: string +): Promise { + const timestamp = Date.now() + const safeFileName = imageResult.fileName || `image-${imageResult.provider}-${timestamp}.png` + const executionContext = + body.workspaceId && body.workflowId && body.executionId + ? { + workspaceId: body.workspaceId, + workflowId: body.workflowId, + executionId: body.executionId, + } + : null + + if (executionContext) { + const { uploadExecutionFile } = await import('@/lib/uploads/contexts/execution') + const imageFile = await uploadExecutionFile( + executionContext, + imageResult.buffer, + safeFileName, + imageResult.contentType, + userId + ) + + return { + content: imageFile.url, + imageUrl: imageFile.url, + imageFile, + fileName: safeFileName, + contentType: imageResult.contentType, + provider: imageResult.provider, + model: imageResult.model, + metadata: { + provider: imageResult.provider, + model: imageResult.model, + description: imageResult.description, + revisedPrompt: imageResult.revisedPrompt, + seed: imageResult.seed, + jobId: imageResult.jobId, + contentType: imageResult.contentType, + }, + } + } + + const { StorageService } = await import('@/lib/uploads') + const fileInfo = await StorageService.uploadFile({ + file: imageResult.buffer, + fileName: safeFileName, + contentType: imageResult.contentType, + context: 'copilot', + }) + const imageUrl = `${getBaseUrl()}${fileInfo.path}` + logger.info(`[${requestId}] Stored generated image fallback`, { + fileName: safeFileName, + size: imageResult.buffer.length, + }) + + return { + content: imageUrl, + imageUrl, + fileName: safeFileName, + contentType: imageResult.contentType, + provider: imageResult.provider, + model: imageResult.model, + metadata: { + provider: imageResult.provider, + model: imageResult.model, + description: imageResult.description, + revisedPrompt: imageResult.revisedPrompt, + seed: imageResult.seed, + jobId: imageResult.jobId, + contentType: imageResult.contentType, + }, + } +} diff --git a/apps/sim/app/api/tools/video/route.ts b/apps/sim/app/api/tools/video/route.ts index 84d930d9c7b..693a6e192c2 100644 --- a/apps/sim/app/api/tools/video/route.ts +++ b/apps/sim/app/api/tools/video/route.ts @@ -84,13 +84,14 @@ export const POST = withRouteHandler(async (request: NextRequest) => { ) } - // Validate aspect ratio (Veo only supports 16:9 and 9:16) - const validAspectRatios = provider === 'veo' ? ['16:9', '9:16'] : ['16:9', '9:16', '1:1'] - if (aspectRatio && !validAspectRatios.includes(aspectRatio)) { - return NextResponse.json( - { error: `Aspect ratio must be ${validAspectRatios.join(', ')}` }, - { status: 400 } - ) + if (provider !== 'falai') { + const validAspectRatios = provider === 'veo' ? ['16:9', '9:16'] : ['16:9', '9:16', '1:1'] + if (aspectRatio && !validAspectRatios.includes(aspectRatio)) { + return NextResponse.json( + { error: `Aspect ratio must be ${validAspectRatios.join(', ')}` }, + { status: 400 } + ) + } } logger.info(`[${requestId}] Generating video with ${provider}, model: ${model || 'default'}`) @@ -166,10 +167,11 @@ export const POST = withRouteHandler(async (request: NextRequest) => { } else if (provider === 'minimax') { const result = await generateWithMiniMax( apiKey, - model || 'hailuo-02', + model || 'hailuo-2.3', prompt, duration || 6, - body.promptOptimizer !== false, // Default true + body.promptOptimizer !== false, + body.endpoint, requestId, logger ) @@ -185,6 +187,10 @@ export const POST = withRouteHandler(async (request: NextRequest) => { { status: 400 } ) } + const validationError = getFalAIValidationError(model, duration, aspectRatio, resolution) + if (validationError) { + return NextResponse.json({ error: validationError }, { status: 400 }) + } const result = await generateWithFalAI( apiKey, model, @@ -193,6 +199,7 @@ export const POST = withRouteHandler(async (request: NextRequest) => { aspectRatio, resolution, body.promptOptimizer, + body.generateAudio, requestId, logger ) @@ -635,27 +642,25 @@ async function generateWithMiniMax( prompt: string, duration: number, promptOptimizer: boolean, + endpoint: string | undefined, requestId: string, logger: ReturnType ): Promise<{ buffer: Buffer; width: number; height: number; jobId: string; duration: number }> { logger.info(`[${requestId}] Starting MiniMax Hailuo generation via MiniMax Platform API`) logger.info( - `[${requestId}] Request params - model: ${model}, duration: ${duration}, promptOptimizer: ${promptOptimizer}` + `[${requestId}] Request params - model: ${model}, duration: ${duration}, endpoint: ${endpoint || 'standard'}, promptOptimizer: ${promptOptimizer}` ) - // Determine resolution and dimensions based on duration - // MiniMax-Hailuo-02 supports 768P (6s) or 1080P (10s) - const resolution = duration === 10 ? '1080P' : '768P' - const dimensions = duration === 10 ? { width: 1920, height: 1080 } : { width: 1360, height: 768 } + const useProResolution = endpoint === 'pro' && duration === 6 + const resolution = useProResolution ? '1080P' : '768P' + const dimensions = useProResolution ? { width: 1920, height: 1080 } : { width: 1360, height: 768 } logger.info( `[${requestId}] Using resolution: ${resolution}, dimensions: ${dimensions.width}x${dimensions.height}` ) - // Map our model ID to MiniMax model name const minimaxModel = model === 'hailuo-02' ? 'MiniMax-Hailuo-02' : 'MiniMax-Hailuo-2.3' - // Create video generation request via MiniMax Platform API const createResponse = await fetch('https://api.minimax.io/v1/video_generation', { method: 'POST', headers: { @@ -782,32 +787,290 @@ async function generateWithMiniMax( throw new Error('MiniMax generation timed out') } -// Helper function to strip subpaths from Fal.ai model IDs for status/result endpoints -function getBaseModelId(fullModelId: string): string { - const parts = fullModelId.split('/') - // Keep only the first two parts (e.g., "fal-ai/sora-2" from "fal-ai/sora-2/text-to-video") - if (parts.length > 2) { - return parts.slice(0, 2).join('/') - } - return fullModelId +type FalAIDurationFormat = 'number' | 'seconds' | 'string' + +interface FalAIModelConfig { + endpoint: string + durationFormat?: FalAIDurationFormat + durationOptions?: readonly number[] + supportsAspectRatio?: boolean + aspectRatioOptions?: readonly string[] + supportsResolution?: boolean + resolutionOptions?: readonly string[] + supportsPromptOptimizer?: boolean + supportsGenerateAudio?: boolean +} + +interface FalAIRequestBody { + prompt: string + duration?: number | string + aspect_ratio?: string + resolution?: string + prompt_optimizer?: boolean + generate_audio?: boolean +} + +const FALAI_MODEL_CONFIGS: Record = { + 'veo-3.1': { + endpoint: 'fal-ai/veo3.1', + durationFormat: 'seconds', + durationOptions: [4, 6, 8], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16'], + supportsResolution: true, + resolutionOptions: ['720p', '1080p', '4k'], + supportsGenerateAudio: true, + }, + 'veo-3.1-fast': { + endpoint: 'fal-ai/veo3.1/fast', + durationFormat: 'seconds', + durationOptions: [4, 6, 8], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16'], + supportsResolution: true, + resolutionOptions: ['720p', '1080p', '4k'], + supportsGenerateAudio: true, + }, + 'sora-2': { + endpoint: 'fal-ai/sora-2/text-to-video', + durationFormat: 'number', + durationOptions: [4, 8, 12, 16, 20], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16'], + supportsResolution: true, + resolutionOptions: ['720p'], + }, + 'sora-2-pro': { + endpoint: 'fal-ai/sora-2/text-to-video/pro', + durationFormat: 'number', + durationOptions: [4, 8, 12, 16, 20], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16'], + supportsResolution: true, + resolutionOptions: ['720p', '1080p', 'true_1080p'], + }, + 'seedance-2.0': { + endpoint: 'bytedance/seedance-2.0/text-to-video', + durationFormat: 'string', + durationOptions: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + supportsAspectRatio: true, + aspectRatioOptions: ['auto', '21:9', '16:9', '4:3', '1:1', '3:4', '9:16'], + supportsResolution: true, + resolutionOptions: ['480p', '720p', '1080p'], + supportsGenerateAudio: true, + }, + 'seedance-2.0-fast': { + endpoint: 'bytedance/seedance-2.0/fast/text-to-video', + durationFormat: 'string', + durationOptions: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + supportsAspectRatio: true, + aspectRatioOptions: ['auto', '21:9', '16:9', '4:3', '1:1', '3:4', '9:16'], + supportsResolution: true, + resolutionOptions: ['480p', '720p'], + supportsGenerateAudio: true, + }, + 'kling-v3-pro': { + endpoint: 'fal-ai/kling-video/v3/pro/text-to-video', + durationFormat: 'string', + durationOptions: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16', '1:1'], + supportsGenerateAudio: true, + }, + 'kling-v3-4k': { + endpoint: 'fal-ai/kling-video/v3/4k/text-to-video', + durationFormat: 'string', + durationOptions: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16', '1:1'], + supportsGenerateAudio: true, + }, + 'kling-o3-pro': { + endpoint: 'fal-ai/kling-video/o3/pro/text-to-video', + durationFormat: 'string', + durationOptions: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16', '1:1'], + supportsGenerateAudio: true, + }, + 'kling-o3-4k': { + endpoint: 'fal-ai/kling-video/o3/4k/text-to-video', + durationFormat: 'string', + durationOptions: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16', '1:1'], + supportsGenerateAudio: true, + }, + 'kling-2.5-turbo-pro': { + endpoint: 'fal-ai/kling-video/v2.5-turbo/pro/text-to-video', + durationFormat: 'string', + supportsAspectRatio: true, + supportsResolution: true, + }, + 'kling-2.1-pro': { + endpoint: 'fal-ai/kling-video/v2.1/master/text-to-video', + durationFormat: 'string', + supportsAspectRatio: true, + supportsResolution: true, + }, + 'minimax-hailuo-2.3-pro': { + endpoint: 'fal-ai/minimax/hailuo-2.3/pro/text-to-video', + supportsPromptOptimizer: true, + }, + 'minimax-hailuo-2.3-standard': { + endpoint: 'fal-ai/minimax/hailuo-2.3/standard/text-to-video', + durationFormat: 'string', + durationOptions: [6, 10], + supportsPromptOptimizer: true, + }, + 'minimax-hailuo-02-pro': { + endpoint: 'fal-ai/minimax/hailuo-02/pro/text-to-video', + durationFormat: 'string', + supportsAspectRatio: true, + supportsResolution: true, + supportsPromptOptimizer: true, + }, + 'minimax-hailuo-02-standard': { + endpoint: 'fal-ai/minimax/hailuo-02/standard/text-to-video', + durationFormat: 'string', + supportsAspectRatio: true, + supportsResolution: true, + supportsPromptOptimizer: true, + }, + 'wan-2.2-a14b-turbo': { + endpoint: 'fal-ai/wan/v2.2-a14b/text-to-video/turbo', + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16', '1:1'], + supportsResolution: true, + resolutionOptions: ['480p', '580p', '720p'], + }, + 'wan-2.1': { + endpoint: 'fal-ai/wan-t2v', + }, + 'ltx-2.3': { + endpoint: 'fal-ai/ltx-2.3/text-to-video', + durationFormat: 'number', + durationOptions: [6, 8, 10], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16'], + supportsResolution: true, + resolutionOptions: ['1080p', '1440p', '2160p'], + supportsGenerateAudio: true, + }, + 'ltx-2.3-fast': { + endpoint: 'fal-ai/ltx-2.3/text-to-video/fast', + durationFormat: 'number', + durationOptions: [6, 8, 10, 12, 14, 16, 18, 20], + supportsAspectRatio: true, + aspectRatioOptions: ['16:9', '9:16'], + supportsResolution: true, + resolutionOptions: ['1080p', '1440p', '2160p'], + supportsGenerateAudio: true, + }, + 'ltxv-0.9.8': { + endpoint: 'fal-ai/ltxv-13b-098-distilled', + }, +} + +function formatFalAIDuration( + format: FalAIDurationFormat | undefined, + duration: number | undefined +): string | number | undefined { + if (!format || duration === undefined) return undefined + + if (format === 'number') return duration + if (format === 'seconds') return `${duration}s` + return String(duration) +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value) +} + +function getStringProperty( + record: Record | undefined, + key: string +): string | undefined { + const value = record?.[key] + return typeof value === 'string' ? value : undefined } -// Helper function to format duration based on model requirements -function formatDuration(model: string, duration: number | undefined): string | number | undefined { - if (duration === undefined) return undefined +function getNumberProperty( + record: Record | undefined, + key: string +): number | undefined { + const value = record?.[key] + return typeof value === 'number' ? value : undefined +} - // Veo 3.1 requires duration with "s" suffix (e.g., "8s") - if (model === 'veo-3.1') { - return `${duration}s` +function formatAllowedValues(allowed: readonly (number | string)[]): string { + return allowed.map(String).join(', ') +} + +function getFalAIValidationError( + model: string, + duration: number | undefined, + aspectRatio: string | undefined, + resolution: string | undefined +): string | undefined { + const modelConfig = FALAI_MODEL_CONFIGS[model] + if (!modelConfig) { + return `Unknown Fal.ai model: ${model}` } - // Sora 2 requires numeric duration - if (model === 'sora-2') { - return duration + if ( + duration !== undefined && + modelConfig.durationOptions && + !modelConfig.durationOptions.includes(duration) + ) { + return `Invalid duration for Fal.ai model ${model}. Supported durations: ${formatAllowedValues(modelConfig.durationOptions)}` } - // Other models use string format - return String(duration) + if (aspectRatio) { + if (!modelConfig.supportsAspectRatio) { + return `Fal.ai model ${model} does not support aspect ratio` + } + + if (modelConfig.aspectRatioOptions && !modelConfig.aspectRatioOptions.includes(aspectRatio)) { + return `Invalid aspect ratio for Fal.ai model ${model}. Supported aspect ratios: ${formatAllowedValues(modelConfig.aspectRatioOptions)}` + } + } + + if (resolution) { + if (!modelConfig.supportsResolution) { + return `Fal.ai model ${model} does not support resolution` + } + + if (modelConfig.resolutionOptions && !modelConfig.resolutionOptions.includes(resolution)) { + return `Invalid resolution for Fal.ai model ${model}. Supported resolutions: ${formatAllowedValues(modelConfig.resolutionOptions)}` + } + } + + if ( + model === 'ltx-2.3-fast' && + duration !== undefined && + duration > 10 && + resolution && + resolution !== '1080p' + ) { + return 'Fal.ai model ltx-2.3-fast only supports durations over 10 seconds with 1080p resolution' + } + + return undefined +} + +function getFalAIErrorMessage(error: unknown): string { + if (typeof error === 'string') return error + if (isRecord(error)) return getStringProperty(error, 'message') || JSON.stringify(error) + return 'Unknown error' +} + +function buildFalAIQueueUrl( + endpoint: string, + requestId: string, + path: 'response' | 'status' +): string { + return `https://queue.fal.run/${endpoint}/requests/${requestId}/${path}` } async function generateWithFalAI( @@ -818,64 +1081,41 @@ async function generateWithFalAI( aspectRatio: string | undefined, resolution: string | undefined, promptOptimizer: boolean | undefined, + generateAudio: boolean | undefined, requestId: string, logger: ReturnType ): Promise<{ buffer: Buffer; width: number; height: number; jobId: string; duration: number }> { logger.info(`[${requestId}] Starting Fal.ai generation with model: ${model}`) - // Map our model IDs to Fal.ai model paths - const modelMap: { [key: string]: string } = { - 'veo-3.1': 'fal-ai/veo3.1', - 'sora-2': 'fal-ai/sora-2/text-to-video', - 'kling-2.5-turbo-pro': 'fal-ai/kling-video/v2.5-turbo/pro/text-to-video', - 'kling-2.1-pro': 'fal-ai/kling-video/v2.1/master/text-to-video', - 'minimax-hailuo-2.3-pro': 'fal-ai/minimax/hailuo-02/pro/text-to-video', - 'minimax-hailuo-2.3-standard': 'fal-ai/minimax/hailuo-02/standard/text-to-video', - 'wan-2.1': 'fal-ai/wan-t2v', - 'ltxv-0.9.8': 'fal-ai/ltxv-13b-098-distilled', - } - - const falModelId = modelMap[model] - if (!falModelId) { + const modelConfig = FALAI_MODEL_CONFIGS[model] + if (!modelConfig) { throw new Error(`Unknown Fal.ai model: ${model}`) } - // Build request body based on model requirements - const requestBody: any = { prompt } - - // Models that support duration and aspect_ratio parameters - const supportsStandardParams = [ - 'kling-2.5-turbo-pro', - 'kling-2.1-pro', - 'minimax-hailuo-2.3-pro', - 'minimax-hailuo-2.3-standard', - ] - - // Models that only need prompt (minimal params) - const minimalParamModels = ['ltxv-0.9.8', 'wan-2.1', 'veo-3.1', 'sora-2'] + const requestBody: FalAIRequestBody = { prompt } + const formattedDuration = formatFalAIDuration(modelConfig.durationFormat, duration) - if (supportsStandardParams.includes(model)) { - // Kling and MiniMax models support duration and aspect_ratio - const formattedDuration = formatDuration(model, duration) - if (formattedDuration !== undefined) { - requestBody.duration = formattedDuration - } + if (formattedDuration !== undefined) { + requestBody.duration = formattedDuration + } - if (aspectRatio) { - requestBody.aspect_ratio = aspectRatio - } + if (modelConfig.supportsAspectRatio && aspectRatio) { + requestBody.aspect_ratio = aspectRatio + } - if (resolution) { - requestBody.resolution = resolution - } + if (modelConfig.supportsResolution && resolution) { + requestBody.resolution = resolution } - // MiniMax models support prompt optimizer - if (model.startsWith('minimax-hailuo') && promptOptimizer !== undefined) { + if (modelConfig.supportsPromptOptimizer && promptOptimizer !== undefined) { requestBody.prompt_optimizer = promptOptimizer } - const createResponse = await fetch(`https://queue.fal.run/${falModelId}`, { + if (modelConfig.supportsGenerateAudio && generateAudio !== undefined) { + requestBody.generate_audio = generateAudio + } + + const createResponse = await fetch(`https://queue.fal.run/${modelConfig.endpoint}`, { method: 'POST', headers: { Authorization: `Key ${apiKey}`, @@ -889,13 +1129,24 @@ async function generateWithFalAI( throw new Error(`Fal.ai API error: ${createResponse.status} - ${error}`) } - const createData = await createResponse.json() - const requestIdFal = createData.request_id + const createData = (await createResponse.json()) as unknown + if (!isRecord(createData)) { + throw new Error('Invalid Fal.ai queue response') + } - logger.info(`[${requestId}] Fal.ai request created: ${requestIdFal}`) + const requestIdFal = getStringProperty(createData, 'request_id') + if (!requestIdFal) { + throw new Error('Fal.ai queue response missing request_id') + } + + const statusUrl = + getStringProperty(createData, 'status_url') || + buildFalAIQueueUrl(modelConfig.endpoint, requestIdFal, 'status') + const responseUrl = + getStringProperty(createData, 'response_url') || + buildFalAIQueueUrl(modelConfig.endpoint, requestIdFal, 'response') - // Get base model ID (without subpath) for status and result endpoints - const baseModelId = getBaseModelId(falModelId) + logger.info(`[${requestId}] Fal.ai request created: ${requestIdFal}`) const pollIntervalMs = 5000 const maxAttempts = Math.ceil(getMaxExecutionTimeout() / pollIntervalMs) @@ -904,27 +1155,32 @@ async function generateWithFalAI( while (attempts < maxAttempts) { await sleep(pollIntervalMs) - const statusResponse = await fetch( - `https://queue.fal.run/${baseModelId}/requests/${requestIdFal}/status`, - { - headers: { - Authorization: `Key ${apiKey}`, - }, - } - ) + const statusResponse = await fetch(statusUrl, { + headers: { + Authorization: `Key ${apiKey}`, + }, + }) if (!statusResponse.ok) { await statusResponse.text().catch(() => {}) throw new Error(`Fal.ai status check failed: ${statusResponse.status}`) } - const statusData = await statusResponse.json() + const statusData = (await statusResponse.json()) as unknown + if (!isRecord(statusData)) { + throw new Error('Invalid Fal.ai status response') + } + + if (getStringProperty(statusData, 'status') === 'COMPLETED') { + const statusError = statusData.error + if (statusError) { + throw new Error(`Fal.ai generation failed: ${getFalAIErrorMessage(statusError)}`) + } - if (statusData.status === 'COMPLETED') { logger.info(`[${requestId}] Fal.ai generation completed after ${attempts * 5}s`) const resultResponse = await fetch( - `https://queue.fal.run/${baseModelId}/requests/${requestIdFal}`, + getStringProperty(statusData, 'response_url') || responseUrl, { headers: { Authorization: `Key ${apiKey}`, @@ -937,9 +1193,15 @@ async function generateWithFalAI( throw new Error(`Failed to fetch result: ${resultResponse.status}`) } - const resultData = await resultResponse.json() + const resultData = (await resultResponse.json()) as unknown + if (!isRecord(resultData)) { + throw new Error('Invalid Fal.ai result response') + } - const videoUrl = resultData.video?.url || resultData.output?.url + const videoOutput = isRecord(resultData.video) ? resultData.video : undefined + const fallbackOutput = isRecord(resultData.output) ? resultData.output : undefined + const videoUrl = + getStringProperty(videoOutput, 'url') || getStringProperty(fallbackOutput, 'url') if (!videoUrl) { throw new Error('No video URL in response') } @@ -952,11 +1214,10 @@ async function generateWithFalAI( const arrayBuffer = await videoResponse.arrayBuffer() - // Try to get dimensions from response, or calculate from aspect ratio - let width = resultData.video?.width || 1920 - let height = resultData.video?.height || 1080 + let width = getNumberProperty(videoOutput, 'width') || 1920 + let height = getNumberProperty(videoOutput, 'height') || 1080 - if (!resultData.video?.width && aspectRatio) { + if (!getNumberProperty(videoOutput, 'width') && aspectRatio?.includes(':')) { const dims = getVideoDimensions(aspectRatio, resolution || '1080p') width = dims.width height = dims.height @@ -967,12 +1228,12 @@ async function generateWithFalAI( width, height, jobId: requestIdFal, - duration: duration || 5, + duration: getNumberProperty(videoOutput, 'duration') || duration || 5, } } - if (statusData.status === 'FAILED') { - throw new Error(`Fal.ai generation failed: ${statusData.error || 'Unknown error'}`) + if (['ERROR', 'FAILED', 'CANCELLED'].includes(getStringProperty(statusData, 'status') || '')) { + throw new Error(`Fal.ai generation failed: ${getFalAIErrorMessage(statusData.error)}`) } attempts++ @@ -986,13 +1247,20 @@ function getVideoDimensions( resolution: string ): { width: number; height: number } { let height: number - if (resolution === '4k') { + if (resolution === '4k' || resolution === '2160p') { height = 2160 + } else if (resolution === 'true_1080p') { + height = 1080 } else { - height = Number.parseInt(resolution.replace('p', '')) + const parsedHeight = Number.parseInt(resolution.replace('p', '')) + height = Number.isFinite(parsedHeight) ? parsedHeight : 1080 } const [ratioW, ratioH] = aspectRatio.split(':').map(Number) + if (!Number.isFinite(ratioW) || !Number.isFinite(ratioH) || ratioH === 0) { + return { width: Math.round((height * 16) / 9), height } + } + const width = Math.round((height * ratioW) / ratioH) return { width, height } diff --git a/apps/sim/blocks/blocks/image_generator.ts b/apps/sim/blocks/blocks/image_generator.ts index 69f94cb49e2..be543494f53 100644 --- a/apps/sim/blocks/blocks/image_generator.ts +++ b/apps/sim/blocks/blocks/image_generator.ts @@ -1,14 +1,65 @@ import { ImageIcon } from '@/components/icons' import { AuthMode, type BlockConfig, IntegrationType } from '@/blocks/types' +import { parseOptionalBooleanInput } from '@/blocks/utils' +import type { ImageGenerationResponse } from '@/tools/image/types' import type { DalleResponse } from '@/tools/openai/types' +const OPENAI_GPT_IMAGE_MODELS = [ + { label: 'GPT Image 1.5', id: 'gpt-image-1.5' }, + { label: 'GPT Image 1', id: 'gpt-image-1' }, + { label: 'GPT Image 1 Mini', id: 'gpt-image-1-mini' }, +] + +const GEMINI_IMAGE_MODELS = [ + { label: 'Nano Banana 2', id: 'gemini-3.1-flash-image-preview' }, + { label: 'Nano Banana Pro', id: 'gemini-3-pro-image-preview' }, + { label: 'Nano Banana', id: 'gemini-2.5-flash-image' }, +] + +const FALAI_IMAGE_MODELS = [ + { label: 'Nano Banana 2', id: 'nano-banana-2' }, + { label: 'Nano Banana Pro', id: 'nano-banana-pro' }, + { label: 'GPT Image 1.5', id: 'gpt-image-1.5' }, + { label: 'Seedream 4.5', id: 'seedream-v4.5' }, + { label: 'FLUX 2 Pro', id: 'flux-2-pro' }, + { label: 'Grok Imagine Image', id: 'grok-imagine-image' }, + { label: 'Nano Banana', id: 'nano-banana' }, +] + +const BASE_ASPECT_RATIO_OPTIONS = [ + { label: '1:1', id: '1:1' }, + { label: '16:9', id: '16:9' }, + { label: '9:16', id: '9:16' }, + { label: '3:2', id: '3:2' }, + { label: '2:3', id: '2:3' }, + { label: '4:3', id: '4:3' }, + { label: '3:4', id: '3:4' }, + { label: '5:4', id: '5:4' }, + { label: '4:5', id: '4:5' }, + { label: '21:9', id: '21:9' }, +] + +const EXTREME_ASPECT_RATIO_OPTIONS = [ + { label: '4:1', id: '4:1' }, + { label: '1:4', id: '1:4' }, + { label: '8:1', id: '8:1' }, + { label: '1:8', id: '1:8' }, +] + +const OUTPUT_FORMAT_OPTIONS = [ + { label: 'PNG', id: 'png' }, + { label: 'JPEG', id: 'jpeg' }, + { label: 'WebP', id: 'webp' }, +] + export const ImageGeneratorBlock: BlockConfig = { type: 'image_generator', name: 'Image Generator', description: 'Generate images', + hideFromToolbar: true, authMode: AuthMode.ApiKey, longDescription: - 'Integrate Image Generator into the workflow. Can generate images using DALL-E 3, GPT Image 1, or GPT Image 2.', + 'Integrate Image Generator into the workflow. Can generate images using DALL-E 3 and GPT Image models.', docsLink: 'https://docs.sim.ai/tools/image_generator', category: 'tools', integrationType: IntegrationType.AI, @@ -258,3 +309,578 @@ export const ImageGeneratorBlock: BlockConfig = { metadata: { type: 'json', description: 'Generation metadata' }, }, } + +export const ImageGeneratorV2Block: BlockConfig = { + type: 'image_generator_v2', + name: 'Image Generator', + description: 'Generate images', + authMode: AuthMode.ApiKey, + longDescription: + 'Generate images using OpenAI GPT Image, Google Nano Banana, or Fal.ai image models.', + docsLink: 'https://docs.sim.ai/tools/image_generator', + category: 'tools', + integrationType: IntegrationType.AI, + tags: ['image-generation', 'llm'], + bgColor: '#4D5FFF', + icon: ImageIcon, + subBlocks: [ + { + id: 'provider', + title: 'Provider', + type: 'dropdown', + options: [ + { label: 'OpenAI', id: 'openai' }, + { label: 'Google Gemini', id: 'gemini' }, + { label: 'Fal.ai (Multi-Model)', id: 'falai' }, + ], + value: () => 'openai', + }, + { + id: 'model', + title: 'Model', + type: 'dropdown', + options: OPENAI_GPT_IMAGE_MODELS, + value: () => 'gpt-image-1.5', + condition: { field: 'provider', value: 'openai' }, + dependsOn: ['provider'], + }, + { + id: 'model', + title: 'Model', + type: 'dropdown', + options: GEMINI_IMAGE_MODELS, + value: () => 'gemini-3.1-flash-image-preview', + condition: { field: 'provider', value: 'gemini' }, + dependsOn: ['provider'], + }, + { + id: 'model', + title: 'Model', + type: 'dropdown', + options: FALAI_IMAGE_MODELS, + value: () => 'nano-banana-2', + condition: { field: 'provider', value: 'falai' }, + dependsOn: ['provider'], + }, + { + id: 'prompt', + title: 'Prompt', + type: 'long-input', + required: true, + placeholder: 'Describe the image you want to generate...', + }, + { + id: 'size', + title: 'Size', + type: 'dropdown', + options: [ + { label: 'Auto', id: 'auto' }, + { label: 'Square (1024x1024)', id: '1024x1024' }, + { label: 'Landscape (1536x1024)', id: '1536x1024' }, + { label: 'Portrait (1024x1536)', id: '1024x1536' }, + ], + value: () => 'auto', + condition: { field: 'provider', value: 'openai' }, + dependsOn: ['provider', 'model'], + }, + { + id: 'size', + title: 'Size', + type: 'dropdown', + options: [ + { label: 'Square (1024x1024)', id: '1024x1024' }, + { label: 'Landscape (1536x1024)', id: '1536x1024' }, + { label: 'Portrait (1024x1536)', id: '1024x1536' }, + ], + value: () => '1024x1024', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'gpt-image-1.5' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'size', + title: 'Size', + type: 'dropdown', + options: [ + { label: 'Auto 2K', id: 'auto_2K' }, + { label: 'Auto 4K', id: 'auto_4K' }, + { label: 'Square HD', id: 'square_hd' }, + { label: 'Square', id: 'square' }, + { label: 'Portrait 4:3', id: 'portrait_4_3' }, + { label: 'Portrait 16:9', id: 'portrait_16_9' }, + { label: 'Landscape 4:3', id: 'landscape_4_3' }, + { label: 'Landscape 16:9', id: 'landscape_16_9' }, + ], + value: () => 'auto_2K', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'seedream-v4.5' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'size', + title: 'Size', + type: 'dropdown', + options: [ + { label: 'Landscape 4:3', id: 'landscape_4_3' }, + { label: 'Landscape 16:9', id: 'landscape_16_9' }, + { label: 'Square HD', id: 'square_hd' }, + { label: 'Square', id: 'square' }, + { label: 'Portrait 4:3', id: 'portrait_4_3' }, + { label: 'Portrait 16:9', id: 'portrait_16_9' }, + ], + value: () => 'landscape_4_3', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'flux-2-pro' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + options: [...BASE_ASPECT_RATIO_OPTIONS, ...EXTREME_ASPECT_RATIO_OPTIONS], + value: () => '1:1', + condition: { + field: 'provider', + value: 'gemini', + and: { field: 'model', value: 'gemini-3.1-flash-image-preview' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + options: BASE_ASPECT_RATIO_OPTIONS, + value: () => '1:1', + condition: { + field: 'provider', + value: 'gemini', + and: { + field: 'model', + value: ['gemini-3-pro-image-preview', 'gemini-2.5-flash-image'], + }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + options: [ + { label: 'Auto', id: 'auto' }, + ...BASE_ASPECT_RATIO_OPTIONS, + ...EXTREME_ASPECT_RATIO_OPTIONS, + ], + value: () => 'auto', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'nano-banana-2' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + options: [{ label: 'Auto', id: 'auto' }, ...BASE_ASPECT_RATIO_OPTIONS], + value: () => '1:1', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'nano-banana-pro' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + options: BASE_ASPECT_RATIO_OPTIONS, + value: () => '1:1', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'nano-banana' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + options: [ + { label: '1:1', id: '1:1' }, + { label: '16:9', id: '16:9' }, + { label: '9:16', id: '9:16' }, + { label: '4:3', id: '4:3' }, + { label: '3:2', id: '3:2' }, + { label: '2:3', id: '2:3' }, + { label: '3:4', id: '3:4' }, + { label: '2:1', id: '2:1' }, + { label: '1:2', id: '1:2' }, + { label: '20:9', id: '20:9' }, + { label: '9:20', id: '9:20' }, + { label: '19.5:9', id: '19.5:9' }, + { label: '9:19.5', id: '9:19.5' }, + ], + value: () => '1:1', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'grok-imagine-image' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + options: [ + { label: '512', id: '512' }, + { label: '1K', id: '1K' }, + { label: '2K', id: '2K' }, + { label: '4K', id: '4K' }, + ], + value: () => '1K', + condition: { + field: 'provider', + value: 'gemini', + and: { field: 'model', value: 'gemini-3.1-flash-image-preview' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + options: [ + { label: '1K', id: '1K' }, + { label: '2K', id: '2K' }, + { label: '4K', id: '4K' }, + ], + value: () => '1K', + condition: { + field: 'provider', + value: 'gemini', + and: { field: 'model', value: 'gemini-3-pro-image-preview' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + options: [ + { label: '0.5K', id: '0.5K' }, + { label: '1K', id: '1K' }, + { label: '2K', id: '2K' }, + { label: '4K', id: '4K' }, + ], + value: () => '1K', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'nano-banana-2' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + options: [ + { label: '1K', id: '1K' }, + { label: '2K', id: '2K' }, + { label: '4K', id: '4K' }, + ], + value: () => '1K', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'nano-banana-pro' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + options: [ + { label: '1k', id: '1k' }, + { label: '2k', id: '2k' }, + ], + value: () => '1k', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'grok-imagine-image' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'quality', + title: 'Quality', + type: 'dropdown', + options: [ + { label: 'Auto', id: 'auto' }, + { label: 'Low', id: 'low' }, + { label: 'Medium', id: 'medium' }, + { label: 'High', id: 'high' }, + ], + value: () => 'auto', + condition: { field: 'provider', value: 'openai' }, + dependsOn: ['provider', 'model'], + }, + { + id: 'quality', + title: 'Quality', + type: 'dropdown', + options: [ + { label: 'High', id: 'high' }, + { label: 'Medium', id: 'medium' }, + { label: 'Low', id: 'low' }, + ], + value: () => 'high', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'gpt-image-1.5' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'background', + title: 'Background', + type: 'dropdown', + options: [ + { label: 'Auto', id: 'auto' }, + { label: 'Transparent', id: 'transparent' }, + { label: 'Opaque', id: 'opaque' }, + ], + value: () => 'auto', + condition: { + field: 'provider', + value: ['openai', 'falai'], + and: { field: 'model', value: ['gpt-image-1.5', 'gpt-image-1', 'gpt-image-1-mini'] }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'outputFormat', + title: 'Output Format', + type: 'dropdown', + options: OUTPUT_FORMAT_OPTIONS, + value: () => 'png', + condition: { + field: 'provider', + value: ['openai', 'falai'], + and: { + field: 'model', + value: [ + 'gpt-image-1.5', + 'gpt-image-1', + 'gpt-image-1-mini', + 'nano-banana-2', + 'nano-banana-pro', + 'nano-banana', + 'grok-imagine-image', + ], + }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'outputFormat', + title: 'Output Format', + type: 'dropdown', + options: [ + { label: 'JPEG', id: 'jpeg' }, + { label: 'PNG', id: 'png' }, + ], + value: () => 'jpeg', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'flux-2-pro' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'moderation', + title: 'Moderation', + type: 'dropdown', + options: [ + { label: 'Auto', id: 'auto' }, + { label: 'Low', id: 'low' }, + ], + value: () => 'auto', + condition: { field: 'provider', value: 'openai' }, + dependsOn: ['provider', 'model'], + }, + { + id: 'safetyTolerance', + title: 'Safety Tolerance', + type: 'dropdown', + options: [ + { label: '1', id: '1' }, + { label: '2', id: '2' }, + { label: '3', id: '3' }, + { label: '4', id: '4' }, + { label: '5', id: '5' }, + { label: '6', id: '6' }, + ], + value: () => '4', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: ['nano-banana-2', 'nano-banana-pro', 'nano-banana'] }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'safetyTolerance', + title: 'Safety Tolerance', + type: 'dropdown', + options: [ + { label: '1', id: '1' }, + { label: '2', id: '2' }, + { label: '3', id: '3' }, + { label: '4', id: '4' }, + { label: '5', id: '5' }, + ], + value: () => '2', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'flux-2-pro' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'thinkingLevel', + title: 'Thinking Level', + type: 'dropdown', + options: [ + { label: 'Minimal', id: 'minimal' }, + { label: 'High', id: 'high' }, + ], + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'nano-banana-2' }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'enableWebSearch', + title: 'Web Search', + type: 'switch', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: ['nano-banana-2', 'nano-banana-pro'] }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'enableSafetyChecker', + title: 'Safety Checker', + type: 'switch', + defaultValue: true, + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: ['seedream-v4.5', 'flux-2-pro'] }, + }, + dependsOn: ['provider', 'model'], + }, + { + id: 'apiKey', + title: 'API Key', + type: 'short-input', + required: true, + placeholder: 'Enter your provider API key', + password: true, + connectionDroppable: false, + }, + ], + tools: { + access: ['image_generate'], + config: { + tool: () => 'image_generate', + params: (params) => { + if (!params.apiKey) { + throw new Error('API key is required') + } + if (!params.prompt) { + throw new Error('Prompt is required') + } + + const provider = params.provider || 'openai' + const defaultModel = + provider === 'gemini' + ? 'gemini-3.1-flash-image-preview' + : provider === 'falai' + ? 'nano-banana-2' + : 'gpt-image-1.5' + + return { + provider, + model: params.model || defaultModel, + prompt: params.prompt, + apiKey: params.apiKey, + ...(params.size && { size: params.size }), + ...(params.aspectRatio && { aspectRatio: params.aspectRatio }), + ...(params.resolution && { resolution: params.resolution }), + ...(params.quality && { quality: params.quality }), + ...(params.background && { background: params.background }), + ...(params.outputFormat && { outputFormat: params.outputFormat }), + ...(params.moderation && { moderation: params.moderation }), + ...(params.safetyTolerance && { safetyTolerance: params.safetyTolerance }), + ...(params.thinkingLevel && { thinkingLevel: params.thinkingLevel }), + ...(params.enableWebSearch !== undefined && { + enableWebSearch: parseOptionalBooleanInput(params.enableWebSearch), + }), + ...(params.enableSafetyChecker !== undefined && { + enableSafetyChecker: parseOptionalBooleanInput(params.enableSafetyChecker), + }), + } + }, + }, + }, + inputs: { + provider: { type: 'string', description: 'Image generation provider' }, + prompt: { type: 'string', description: 'Image description prompt' }, + model: { type: 'string', description: 'Image generation model' }, + size: { type: 'string', description: 'Image size' }, + aspectRatio: { type: 'string', description: 'Image aspect ratio' }, + resolution: { type: 'string', description: 'Image resolution' }, + quality: { type: 'string', description: 'Image quality level' }, + background: { type: 'string', description: 'Background type' }, + outputFormat: { type: 'string', description: 'Output image format' }, + moderation: { type: 'string', description: 'Moderation level' }, + safetyTolerance: { type: 'string', description: 'Fal.ai safety tolerance' }, + thinkingLevel: { type: 'string', description: 'Fal.ai thinking level' }, + enableWebSearch: { type: 'boolean', description: 'Enable Fal.ai web search grounding' }, + enableSafetyChecker: { type: 'boolean', description: 'Enable Fal.ai safety checker' }, + apiKey: { type: 'string', description: 'Provider API key' }, + }, + outputs: { + content: { type: 'string', description: 'Generated image URL or identifier' }, + image: { type: 'file', description: 'Generated image file' }, + imageUrl: { type: 'string', description: 'Generated image URL' }, + provider: { type: 'string', description: 'Provider used' }, + model: { type: 'string', description: 'Model used' }, + metadata: { type: 'json', description: 'Generation metadata' }, + }, +} diff --git a/apps/sim/blocks/blocks/video_generator.ts b/apps/sim/blocks/blocks/video_generator.ts index eaccc6cde1f..4476891c297 100644 --- a/apps/sim/blocks/blocks/video_generator.ts +++ b/apps/sim/blocks/blocks/video_generator.ts @@ -1,8 +1,75 @@ import { VideoIcon } from '@/components/icons' -import { AuthMode, type BlockConfig, IntegrationType } from '@/blocks/types' -import { normalizeFileInput } from '@/blocks/utils' +import { AuthMode, type BlockConfig, IntegrationType, type SubBlockConfig } from '@/blocks/types' +import { normalizeFileInput, parseOptionalBooleanInput } from '@/blocks/utils' import type { VideoBlockResponse } from '@/tools/video/types' +const FALAI_PREVIOUS_MODEL_OPTIONS = [ + { label: 'Google Veo 3.1', id: 'veo-3.1' }, + { label: 'OpenAI Sora 2', id: 'sora-2' }, + { label: 'Kling 2.5 Turbo Pro', id: 'kling-2.5-turbo-pro' }, + { label: 'Kling 2.1 Pro', id: 'kling-2.1-pro' }, + { label: 'MiniMax Hailuo 2.3 Pro', id: 'minimax-hailuo-2.3-pro' }, + { label: 'MiniMax Hailuo 2.3 Standard', id: 'minimax-hailuo-2.3-standard' }, + { label: 'WAN 2.1', id: 'wan-2.1' }, + { label: 'LTXV 0.9.8', id: 'ltxv-0.9.8' }, +] + +const FALAI_LATEST_MODEL_OPTIONS = [ + { label: 'Google Veo 3.1', id: 'veo-3.1' }, + { label: 'Google Veo 3.1 Fast', id: 'veo-3.1-fast' }, + { label: 'OpenAI Sora 2', id: 'sora-2' }, + { label: 'OpenAI Sora 2 Pro', id: 'sora-2-pro' }, + { label: 'ByteDance Seedance 2.0', id: 'seedance-2.0' }, + { label: 'ByteDance Seedance 2.0 Fast', id: 'seedance-2.0-fast' }, + { label: 'Kling 3.0 Pro', id: 'kling-v3-pro' }, + { label: 'Kling 3.0 4K', id: 'kling-v3-4k' }, + { label: 'Kling O3 Pro', id: 'kling-o3-pro' }, + { label: 'Kling O3 4K', id: 'kling-o3-4k' }, + { label: 'MiniMax Hailuo 2.3 Pro', id: 'minimax-hailuo-2.3-pro' }, + { label: 'MiniMax Hailuo 2.3 Standard', id: 'minimax-hailuo-2.3-standard' }, + { label: 'WAN 2.2 A14B Turbo', id: 'wan-2.2-a14b-turbo' }, + { label: 'LTX 2.3', id: 'ltx-2.3' }, + { label: 'LTX 2.3 Fast', id: 'ltx-2.3-fast' }, +] + +const FALAI_VEO_MODELS = ['veo-3.1', 'veo-3.1-fast'] +const FALAI_SORA_MODELS = ['sora-2', 'sora-2-pro'] +const FALAI_SEEDANCE_STANDARD_MODELS = ['seedance-2.0'] +const FALAI_SEEDANCE_FAST_MODELS = ['seedance-2.0-fast'] +const FALAI_SEEDANCE_MODELS = [...FALAI_SEEDANCE_STANDARD_MODELS, ...FALAI_SEEDANCE_FAST_MODELS] +const FALAI_KLING_LATEST_MODELS = ['kling-v3-pro', 'kling-v3-4k', 'kling-o3-pro', 'kling-o3-4k'] +const FALAI_KLING_LEGACY_MODELS = ['kling-2.5-turbo-pro', 'kling-2.1-pro'] +const FALAI_MINIMAX_STANDARD_MODELS = ['minimax-hailuo-2.3-standard', 'minimax-hailuo-02-standard'] +const FALAI_MINIMAX_PRO_MODELS = ['minimax-hailuo-2.3-pro', 'minimax-hailuo-02-pro'] +const FALAI_WAN_MODELS = ['wan-2.2-a14b-turbo'] +const FALAI_LTX_MODELS = ['ltx-2.3', 'ltx-2.3-fast'] +const FALAI_AUDIO_DEFAULT_ON_MODELS = [ + ...FALAI_VEO_MODELS, + ...FALAI_SEEDANCE_MODELS, + 'kling-v3-pro', + 'kling-v3-4k', + ...FALAI_LTX_MODELS, +] +const FALAI_AUDIO_DEFAULT_OFF_MODELS = ['kling-o3-pro', 'kling-o3-4k'] + +const withFalAIModelOptions = ( + subBlocks: SubBlockConfig[], + options: SubBlockConfig['options'] +): SubBlockConfig[] => + subBlocks.map((subBlock) => { + const condition = subBlock.condition + if ( + subBlock.id === 'model' && + typeof condition === 'object' && + condition?.field === 'provider' && + condition.value === 'falai' + ) { + return { ...subBlock, options } + } + + return subBlock + }) + export const VideoGeneratorBlock: BlockConfig = { type: 'video_generator', name: 'Video Generator (Legacy)', @@ -71,8 +138,11 @@ export const VideoGeneratorBlock: BlockConfig = { title: 'Model', type: 'dropdown', condition: { field: 'provider', value: 'minimax' }, - options: [{ label: 'Hailuo 2.3', id: 'hailuo-02' }], - value: () => 'hailuo-02', + options: [ + { label: 'Hailuo 2.3', id: 'hailuo-2.3' }, + { label: 'Hailuo-02', id: 'hailuo-02' }, + ], + value: () => 'hailuo-2.3', dependsOn: ['provider'], required: false, }, @@ -97,16 +167,7 @@ export const VideoGeneratorBlock: BlockConfig = { title: 'Model', type: 'dropdown', condition: { field: 'provider', value: 'falai' }, - options: [ - { label: 'Google Veo 3.1', id: 'veo-3.1' }, - { label: 'OpenAI Sora 2', id: 'sora-2' }, - { label: 'Kling 2.5 Turbo Pro', id: 'kling-2.5-turbo-pro' }, - { label: 'Kling 2.1 Pro', id: 'kling-2.1-pro' }, - { label: 'MiniMax Hailuo 2.3 Pro', id: 'minimax-hailuo-2.3-pro' }, - { label: 'MiniMax Hailuo 2.3 Standard', id: 'minimax-hailuo-2.3-standard' }, - { label: 'WAN 2.1', id: 'wan-2.1' }, - { label: 'LTXV 0.9.8', id: 'ltxv-0.9.8' }, - ], + options: FALAI_PREVIOUS_MODEL_OPTIONS, value: () => 'veo-3.1', dependsOn: ['provider'], required: true, @@ -182,19 +243,107 @@ export const VideoGeneratorBlock: BlockConfig = { required: false, }, - // Duration selection - Fal.ai (only for Kling and MiniMax models) { id: 'duration', title: 'Duration (seconds)', type: 'dropdown', condition: { - field: 'model', - value: [ - 'kling-2.5-turbo-pro', - 'kling-2.1-pro', - 'minimax-hailuo-2.3-pro', - 'minimax-hailuo-2.3-standard', - ], + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_VEO_MODELS }, + }, + options: [ + { label: '4', id: '4' }, + { label: '6', id: '6' }, + { label: '8', id: '8' }, + ], + value: () => '8', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SORA_MODELS }, + }, + options: [ + { label: '4', id: '4' }, + { label: '8', id: '8' }, + { label: '12', id: '12' }, + { label: '16', id: '16' }, + { label: '20', id: '20' }, + ], + value: () => '4', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_MODELS }, + }, + options: [ + { label: '4', id: '4' }, + { label: '5', id: '5' }, + { label: '6', id: '6' }, + { label: '7', id: '7' }, + { label: '8', id: '8' }, + { label: '9', id: '9' }, + { label: '10', id: '10' }, + { label: '11', id: '11' }, + { label: '12', id: '12' }, + { label: '13', id: '13' }, + { label: '14', id: '14' }, + { label: '15', id: '15' }, + ], + value: () => '5', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_KLING_LATEST_MODELS }, + }, + options: [ + { label: '3', id: '3' }, + { label: '4', id: '4' }, + { label: '5', id: '5' }, + { label: '6', id: '6' }, + { label: '7', id: '7' }, + { label: '8', id: '8' }, + { label: '9', id: '9' }, + { label: '10', id: '10' }, + { label: '11', id: '11' }, + { label: '12', id: '12' }, + { label: '13', id: '13' }, + { label: '14', id: '14' }, + { label: '15', id: '15' }, + ], + value: () => '5', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_KLING_LEGACY_MODELS }, }, options: [ { label: '5', id: '5' }, @@ -205,6 +354,64 @@ export const VideoGeneratorBlock: BlockConfig = { dependsOn: ['model'], required: false, }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_MINIMAX_STANDARD_MODELS }, + }, + options: [ + { label: '6', id: '6' }, + { label: '10', id: '10' }, + ], + value: () => '6', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'ltx-2.3' }, + }, + options: [ + { label: '6', id: '6' }, + { label: '8', id: '8' }, + { label: '10', id: '10' }, + ], + value: () => '6', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'ltx-2.3-fast' }, + }, + options: [ + { label: '6', id: '6' }, + { label: '8', id: '8' }, + { label: '10', id: '10' }, + { label: '12', id: '12' }, + { label: '14', id: '14' }, + { label: '16', id: '16' }, + { label: '18', id: '18' }, + { label: '20', id: '20' }, + ], + value: () => '6', + dependsOn: ['model'], + required: false, + }, // Aspect ratio selection - Veo (only 16:9 and 9:16) { @@ -253,19 +460,74 @@ export const VideoGeneratorBlock: BlockConfig = { required: false, }, - // Aspect ratio selection - Fal.ai (only for Kling and MiniMax models) { id: 'aspectRatio', title: 'Aspect Ratio', type: 'dropdown', condition: { - field: 'model', - value: [ - 'kling-2.5-turbo-pro', - 'kling-2.1-pro', - 'minimax-hailuo-2.3-pro', - 'minimax-hailuo-2.3-standard', - ], + field: 'provider', + value: 'falai', + and: { + field: 'model', + value: [...FALAI_VEO_MODELS, ...FALAI_SORA_MODELS, ...FALAI_LTX_MODELS], + }, + }, + options: [ + { label: '16:9', id: '16:9' }, + { label: '9:16', id: '9:16' }, + ], + value: () => '16:9', + dependsOn: ['model'], + required: false, + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_MODELS }, + }, + options: [ + { label: 'Auto', id: 'auto' }, + { label: '21:9', id: '21:9' }, + { label: '16:9', id: '16:9' }, + { label: '4:3', id: '4:3' }, + { label: '1:1', id: '1:1' }, + { label: '3:4', id: '3:4' }, + { label: '9:16', id: '9:16' }, + ], + value: () => 'auto', + dependsOn: ['model'], + required: false, + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: [...FALAI_KLING_LATEST_MODELS, ...FALAI_WAN_MODELS] }, + }, + options: [ + { label: '16:9', id: '16:9' }, + { label: '9:16', id: '9:16' }, + { label: '1:1', id: '1:1' }, + ], + value: () => '16:9', + dependsOn: ['model'], + required: false, + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_KLING_LEGACY_MODELS }, }, options: [ { label: '16:9', id: '16:9' }, @@ -310,8 +572,129 @@ export const VideoGeneratorBlock: BlockConfig = { dependsOn: ['provider'], required: false, }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_VEO_MODELS }, + }, + options: [ + { label: '720p', id: '720p' }, + { label: '1080p', id: '1080p' }, + { label: '4K', id: '4k' }, + ], + value: () => '1080p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'sora-2' }, + }, + options: [{ label: '720p', id: '720p' }], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'sora-2-pro' }, + }, + options: [ + { label: '720p', id: '720p' }, + { label: '1080p', id: '1080p' }, + { label: 'True 1080p', id: 'true_1080p' }, + ], + value: () => '1080p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_STANDARD_MODELS }, + }, + options: [ + { label: '480p', id: '480p' }, + { label: '720p', id: '720p' }, + { label: '1080p', id: '1080p' }, + ], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_FAST_MODELS }, + }, + options: [ + { label: '480p', id: '480p' }, + { label: '720p', id: '720p' }, + ], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_WAN_MODELS }, + }, + options: [ + { label: '480p', id: '480p' }, + { label: '580p', id: '580p' }, + { label: '720p', id: '720p' }, + ], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_LTX_MODELS }, + }, + options: [ + { label: '1080p', id: '1080p' }, + { label: '1440p', id: '1440p' }, + { label: '2160p', id: '2160p' }, + ], + value: () => '1080p', + dependsOn: ['model'], + required: false, + }, - // Note: MiniMax resolution is fixed per endpoint (Pro=1080p, Standard=768p) + // Note: MiniMax resolution is fixed per endpoint (Pro=1080p for 6s, Standard=768p) // Runway-specific: Visual reference (REQUIRED for Gen-4) { @@ -346,16 +729,54 @@ export const VideoGeneratorBlock: BlockConfig = { condition: { field: 'provider', value: 'minimax' }, dependsOn: ['provider'], }, - - // API Key { - id: 'apiKey', - title: 'API Key', - type: 'short-input', - placeholder: 'Enter your provider API key', - password: true, - required: true, - }, + id: 'promptOptimizer', + title: 'Prompt Optimizer', + type: 'switch', + defaultValue: true, + condition: { + field: 'provider', + value: 'falai', + and: { + field: 'model', + value: [...FALAI_MINIMAX_PRO_MODELS, ...FALAI_MINIMAX_STANDARD_MODELS], + }, + }, + dependsOn: ['model'], + }, + { + id: 'generateAudio', + title: 'Generate Audio', + type: 'switch', + defaultValue: true, + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_AUDIO_DEFAULT_ON_MODELS }, + }, + dependsOn: ['model'], + }, + { + id: 'generateAudio', + title: 'Generate Audio', + type: 'switch', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_AUDIO_DEFAULT_OFF_MODELS }, + }, + dependsOn: ['model'], + }, + + // API Key + { + id: 'apiKey', + title: 'API Key', + type: 'short-input', + placeholder: 'Enter your provider API key', + password: true, + required: true, + }, ], tools: { @@ -390,7 +811,8 @@ export const VideoGeneratorBlock: BlockConfig = { visualReference: params.visualReference, consistencyMode: params.consistencyMode, stylePreset: params.stylePreset, - promptOptimizer: params.promptOptimizer, + promptOptimizer: parseOptionalBooleanInput(params.promptOptimizer), + generateAudio: parseOptionalBooleanInput(params.generateAudio), cameraControl: params.cameraControl ? typeof params.cameraControl === 'string' ? JSON.parse(params.cameraControl) @@ -403,7 +825,7 @@ export const VideoGeneratorBlock: BlockConfig = { inputs: { provider: { type: 'string', - description: 'Video generation provider (runway, veo, luma, minimax)', + description: 'Video generation provider (runway, veo, luma, minimax, falai)', }, apiKey: { type: 'string', description: 'Provider API key' }, model: { @@ -418,11 +840,11 @@ export const VideoGeneratorBlock: BlockConfig = { duration: { type: 'number', description: 'Video duration in seconds' }, aspectRatio: { type: 'string', - description: 'Aspect ratio (16:9, 9:16, 1:1) - not available for MiniMax', + description: 'Aspect ratio for supported providers and models', }, resolution: { type: 'string', - description: 'Video resolution - not available for MiniMax (fixed per endpoint)', + description: 'Video resolution for supported providers and models', }, visualReference: { type: 'json', description: 'Reference image for Runway (UserFile)' }, consistencyMode: { @@ -434,6 +856,10 @@ export const VideoGeneratorBlock: BlockConfig = { type: 'boolean', description: 'Enable prompt optimization for MiniMax (default: true)', }, + generateAudio: { + type: 'boolean', + description: 'Generate native audio when supported by the selected model', + }, cameraControl: { type: 'json', description: 'Camera controls for Luma (pan, zoom, tilt, truck, tracking)', @@ -455,7 +881,7 @@ export const VideoGeneratorV2Block: BlockConfig = { ...VideoGeneratorBlock, type: 'video_generator_v2', name: 'Video Generator', - hideFromToolbar: false, + hideFromToolbar: true, subBlocks: [ { id: 'provider', @@ -500,8 +926,11 @@ export const VideoGeneratorV2Block: BlockConfig = { title: 'Model', type: 'dropdown', condition: { field: 'provider', value: 'minimax' }, - options: [{ label: 'Hailuo 2.3', id: 'hailuo-02' }], - value: () => 'hailuo-02', + options: [ + { label: 'Hailuo 2.3', id: 'hailuo-2.3' }, + { label: 'Hailuo-02', id: 'hailuo-02' }, + ], + value: () => 'hailuo-2.3', dependsOn: ['provider'], required: false, }, @@ -523,16 +952,7 @@ export const VideoGeneratorV2Block: BlockConfig = { title: 'Model', type: 'dropdown', condition: { field: 'provider', value: 'falai' }, - options: [ - { label: 'Google Veo 3.1', id: 'veo-3.1' }, - { label: 'OpenAI Sora 2', id: 'sora-2' }, - { label: 'Kling 2.5 Turbo Pro', id: 'kling-2.5-turbo-pro' }, - { label: 'Kling 2.1 Pro', id: 'kling-2.1-pro' }, - { label: 'MiniMax Hailuo 2.3 Pro', id: 'minimax-hailuo-2.3-pro' }, - { label: 'MiniMax Hailuo 2.3 Standard', id: 'minimax-hailuo-2.3-standard' }, - { label: 'WAN 2.1', id: 'wan-2.1' }, - { label: 'LTXV 0.9.8', id: 'ltxv-0.9.8' }, - ], + options: FALAI_PREVIOUS_MODEL_OPTIONS, value: () => 'veo-3.1', dependsOn: ['provider'], required: true, @@ -602,23 +1022,170 @@ export const VideoGeneratorV2Block: BlockConfig = { title: 'Duration (seconds)', type: 'dropdown', condition: { - field: 'model', - value: [ - 'kling-2.5-turbo-pro', - 'kling-2.1-pro', - 'minimax-hailuo-2.3-pro', - 'minimax-hailuo-2.3-standard', - ], + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_VEO_MODELS }, + }, + options: [ + { label: '4', id: '4' }, + { label: '6', id: '6' }, + { label: '8', id: '8' }, + ], + value: () => '8', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SORA_MODELS }, + }, + options: [ + { label: '4', id: '4' }, + { label: '8', id: '8' }, + { label: '12', id: '12' }, + { label: '16', id: '16' }, + { label: '20', id: '20' }, + ], + value: () => '4', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_MODELS }, + }, + options: [ + { label: '4', id: '4' }, + { label: '5', id: '5' }, + { label: '6', id: '6' }, + { label: '7', id: '7' }, + { label: '8', id: '8' }, + { label: '9', id: '9' }, + { label: '10', id: '10' }, + { label: '11', id: '11' }, + { label: '12', id: '12' }, + { label: '13', id: '13' }, + { label: '14', id: '14' }, + { label: '15', id: '15' }, + ], + value: () => '5', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_KLING_LATEST_MODELS }, }, options: [ + { label: '3', id: '3' }, + { label: '4', id: '4' }, { label: '5', id: '5' }, + { label: '6', id: '6' }, + { label: '7', id: '7' }, { label: '8', id: '8' }, + { label: '9', id: '9' }, { label: '10', id: '10' }, + { label: '11', id: '11' }, + { label: '12', id: '12' }, + { label: '13', id: '13' }, + { label: '14', id: '14' }, + { label: '15', id: '15' }, ], value: () => '5', dependsOn: ['model'], required: false, }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_KLING_LEGACY_MODELS }, + }, + options: [ + { label: '5', id: '5' }, + { label: '8', id: '8' }, + { label: '10', id: '10' }, + ], + value: () => '5', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_MINIMAX_STANDARD_MODELS }, + }, + options: [ + { label: '6', id: '6' }, + { label: '10', id: '10' }, + ], + value: () => '6', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'ltx-2.3' }, + }, + options: [ + { label: '6', id: '6' }, + { label: '8', id: '8' }, + { label: '10', id: '10' }, + ], + value: () => '6', + dependsOn: ['model'], + required: false, + }, + { + id: 'duration', + title: 'Duration (seconds)', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'ltx-2.3-fast' }, + }, + options: [ + { label: '6', id: '6' }, + { label: '8', id: '8' }, + { label: '10', id: '10' }, + { label: '12', id: '12' }, + { label: '14', id: '14' }, + { label: '16', id: '16' }, + { label: '18', id: '18' }, + { label: '20', id: '20' }, + ], + value: () => '6', + dependsOn: ['model'], + required: false, + }, { id: 'aspectRatio', title: 'Aspect Ratio', @@ -665,13 +1232,69 @@ export const VideoGeneratorV2Block: BlockConfig = { title: 'Aspect Ratio', type: 'dropdown', condition: { - field: 'model', - value: [ - 'kling-2.5-turbo-pro', - 'kling-2.1-pro', - 'minimax-hailuo-2.3-pro', - 'minimax-hailuo-2.3-standard', - ], + field: 'provider', + value: 'falai', + and: { + field: 'model', + value: [...FALAI_VEO_MODELS, ...FALAI_SORA_MODELS, ...FALAI_LTX_MODELS], + }, + }, + options: [ + { label: '16:9', id: '16:9' }, + { label: '9:16', id: '9:16' }, + ], + value: () => '16:9', + dependsOn: ['model'], + required: false, + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_MODELS }, + }, + options: [ + { label: 'Auto', id: 'auto' }, + { label: '21:9', id: '21:9' }, + { label: '16:9', id: '16:9' }, + { label: '4:3', id: '4:3' }, + { label: '1:1', id: '1:1' }, + { label: '3:4', id: '3:4' }, + { label: '9:16', id: '9:16' }, + ], + value: () => 'auto', + dependsOn: ['model'], + required: false, + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: [...FALAI_KLING_LATEST_MODELS, ...FALAI_WAN_MODELS] }, + }, + options: [ + { label: '16:9', id: '16:9' }, + { label: '9:16', id: '9:16' }, + { label: '1:1', id: '1:1' }, + ], + value: () => '16:9', + dependsOn: ['model'], + required: false, + }, + { + id: 'aspectRatio', + title: 'Aspect Ratio', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_KLING_LEGACY_MODELS }, }, options: [ { label: '16:9', id: '16:9' }, @@ -708,6 +1331,127 @@ export const VideoGeneratorV2Block: BlockConfig = { dependsOn: ['provider'], required: false, }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_VEO_MODELS }, + }, + options: [ + { label: '720p', id: '720p' }, + { label: '1080p', id: '1080p' }, + { label: '4K', id: '4k' }, + ], + value: () => '1080p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'sora-2' }, + }, + options: [{ label: '720p', id: '720p' }], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: 'sora-2-pro' }, + }, + options: [ + { label: '720p', id: '720p' }, + { label: '1080p', id: '1080p' }, + { label: 'True 1080p', id: 'true_1080p' }, + ], + value: () => '1080p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_STANDARD_MODELS }, + }, + options: [ + { label: '480p', id: '480p' }, + { label: '720p', id: '720p' }, + { label: '1080p', id: '1080p' }, + ], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_SEEDANCE_FAST_MODELS }, + }, + options: [ + { label: '480p', id: '480p' }, + { label: '720p', id: '720p' }, + ], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_WAN_MODELS }, + }, + options: [ + { label: '480p', id: '480p' }, + { label: '580p', id: '580p' }, + { label: '720p', id: '720p' }, + ], + value: () => '720p', + dependsOn: ['model'], + required: false, + }, + { + id: 'resolution', + title: 'Resolution', + type: 'dropdown', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_LTX_MODELS }, + }, + options: [ + { label: '1080p', id: '1080p' }, + { label: '1440p', id: '1440p' }, + { label: '2160p', id: '2160p' }, + ], + value: () => '1080p', + dependsOn: ['model'], + required: false, + }, { id: 'visualReferenceUpload', title: 'Reference Image', @@ -748,6 +1492,44 @@ export const VideoGeneratorV2Block: BlockConfig = { condition: { field: 'provider', value: 'minimax' }, dependsOn: ['provider'], }, + { + id: 'promptOptimizer', + title: 'Prompt Optimizer', + type: 'switch', + defaultValue: true, + condition: { + field: 'provider', + value: 'falai', + and: { + field: 'model', + value: [...FALAI_MINIMAX_PRO_MODELS, ...FALAI_MINIMAX_STANDARD_MODELS], + }, + }, + dependsOn: ['model'], + }, + { + id: 'generateAudio', + title: 'Generate Audio', + type: 'switch', + defaultValue: true, + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_AUDIO_DEFAULT_ON_MODELS }, + }, + dependsOn: ['model'], + }, + { + id: 'generateAudio', + title: 'Generate Audio', + type: 'switch', + condition: { + field: 'provider', + value: 'falai', + and: { field: 'model', value: FALAI_AUDIO_DEFAULT_OFF_MODELS }, + }, + dependsOn: ['model'], + }, { id: 'apiKey', title: 'API Key', @@ -788,7 +1570,8 @@ export const VideoGeneratorV2Block: BlockConfig = { visualReference: normalizeFileInput(params.visualReference, { single: true }), consistencyMode: params.consistencyMode, stylePreset: params.stylePreset, - promptOptimizer: params.promptOptimizer, + promptOptimizer: parseOptionalBooleanInput(params.promptOptimizer), + generateAudio: parseOptionalBooleanInput(params.generateAudio), cameraControl: params.cameraControl ? typeof params.cameraControl === 'string' ? JSON.parse(params.cameraControl) @@ -800,7 +1583,7 @@ export const VideoGeneratorV2Block: BlockConfig = { inputs: { provider: { type: 'string', - description: 'Video generation provider (runway, veo, luma, minimax)', + description: 'Video generation provider (runway, veo, luma, minimax, falai)', }, apiKey: { type: 'string', description: 'Provider API key' }, model: { @@ -815,11 +1598,11 @@ export const VideoGeneratorV2Block: BlockConfig = { duration: { type: 'number', description: 'Video duration in seconds' }, aspectRatio: { type: 'string', - description: 'Aspect ratio (16:9, 9:16, 1:1) - not available for MiniMax', + description: 'Aspect ratio for supported providers and models', }, resolution: { type: 'string', - description: 'Video resolution - not available for MiniMax (fixed per endpoint)', + description: 'Video resolution for supported providers and models', }, visualReference: { type: 'json', description: 'Reference image for Runway (UserFile)' }, consistencyMode: { @@ -831,9 +1614,30 @@ export const VideoGeneratorV2Block: BlockConfig = { type: 'boolean', description: 'Enable prompt optimization for MiniMax (default: true)', }, + generateAudio: { + type: 'boolean', + description: 'Generate native audio when supported by the selected model', + }, cameraControl: { type: 'json', description: 'Camera controls for Luma (pan, zoom, tilt, truck, tracking)', }, }, } + +export const VideoGeneratorV3Block: BlockConfig = { + ...VideoGeneratorV2Block, + type: 'video_generator_v3', + name: 'Video Generator', + description: 'Generate videos from text using AI', + longDescription: + 'Generate high-quality videos from text prompts using leading AI providers. Supports Runway, Google Veo, Luma, MiniMax, and Fal.ai multi-model generation with provider-specific durations, aspect ratios, resolutions, prompt optimization, and native audio controls.', + docsLink: 'https://docs.sim.ai/tools/video_generator', + category: 'tools', + integrationType: IntegrationType.AI, + tags: ['video-generation', 'llm'], + bgColor: '#181C1E', + icon: VideoIcon, + hideFromToolbar: false, + subBlocks: withFalAIModelOptions(VideoGeneratorV2Block.subBlocks, FALAI_LATEST_MODEL_OPTIONS), +} diff --git a/apps/sim/blocks/registry.ts b/apps/sim/blocks/registry.ts index 2e008d00edb..c36a84400b1 100644 --- a/apps/sim/blocks/registry.ts +++ b/apps/sim/blocks/registry.ts @@ -97,7 +97,7 @@ import { HumanInTheLoopBlock } from '@/blocks/blocks/human_in_the_loop' import { HunterBlock } from '@/blocks/blocks/hunter' import { IAMBlock } from '@/blocks/blocks/iam' import { IdentityCenterBlock } from '@/blocks/blocks/identity_center' -import { ImageGeneratorBlock } from '@/blocks/blocks/image_generator' +import { ImageGeneratorBlock, ImageGeneratorV2Block } from '@/blocks/blocks/image_generator' import { ImapBlock } from '@/blocks/blocks/imap' import { IncidentioBlock } from '@/blocks/blocks/incidentio' import { InfisicalBlock } from '@/blocks/blocks/infisical' @@ -217,7 +217,11 @@ import { TypeformBlock } from '@/blocks/blocks/typeform' import { UpstashBlock } from '@/blocks/blocks/upstash' import { VariablesBlock } from '@/blocks/blocks/variables' import { VercelBlock } from '@/blocks/blocks/vercel' -import { VideoGeneratorBlock, VideoGeneratorV2Block } from '@/blocks/blocks/video_generator' +import { + VideoGeneratorBlock, + VideoGeneratorV2Block, + VideoGeneratorV3Block, +} from '@/blocks/blocks/video_generator' import { VisionBlock, VisionV2Block } from '@/blocks/blocks/vision' import { WaitBlock } from '@/blocks/blocks/wait' import { WealthboxBlock } from '@/blocks/blocks/wealthbox' @@ -351,6 +355,7 @@ export const registry: Record = { iam: IAMBlock, identity_center: IdentityCenterBlock, image_generator: ImageGeneratorBlock, + image_generator_v2: ImageGeneratorV2Block, imap: ImapBlock, incidentio: IncidentioBlock, infisical: InfisicalBlock, @@ -481,6 +486,7 @@ export const registry: Record = { variables: VariablesBlock, video_generator: VideoGeneratorBlock, video_generator_v2: VideoGeneratorV2Block, + video_generator_v3: VideoGeneratorV3Block, vision: VisionBlock, vision_v2: VisionV2Block, wait: WaitBlock, diff --git a/apps/sim/blocks/utils.ts b/apps/sim/blocks/utils.ts index c22596b34cd..830a4642e66 100644 --- a/apps/sim/blocks/utils.ts +++ b/apps/sim/blocks/utils.ts @@ -588,7 +588,10 @@ export const BUILT_IN_TOOL_TYPES = new Set([ 'search', 'thinking', 'image_generator', + 'image_generator_v2', 'video_generator', + 'video_generator_v2', + 'video_generator_v3', 'vision', 'translate', 'tts', diff --git a/apps/sim/lib/api/contracts/tools/media/image.ts b/apps/sim/lib/api/contracts/tools/media/image.ts index f138eaf297e..0172a899cbe 100644 --- a/apps/sim/lib/api/contracts/tools/media/image.ts +++ b/apps/sim/lib/api/contracts/tools/media/image.ts @@ -1,13 +1,57 @@ import { z } from 'zod' +import { toolBooleanSchema, toolJsonResponseSchema } from '@/lib/api/contracts/tools/media/shared' import { defineRouteContract } from '@/lib/api/contracts/types' +export const imageProviders = ['openai', 'gemini', 'falai'] as const +const MISSING_IMAGE_FIELDS_ERROR = 'Missing required fields: provider, apiKey, and prompt' + export const imageProxyQuerySchema = z.object({ url: z.string({ error: 'Missing URL parameter' }).min(1, 'Missing URL parameter'), }) +export const imageToolBodySchema = z + .object({ + provider: z + .string({ error: MISSING_IMAGE_FIELDS_ERROR }) + .min(1, MISSING_IMAGE_FIELDS_ERROR) + .refine((provider) => imageProviders.includes(provider as (typeof imageProviders)[number]), { + message: `Invalid provider. Must be one of: ${imageProviders.join(', ')}`, + }), + apiKey: z.string({ error: MISSING_IMAGE_FIELDS_ERROR }).min(1, MISSING_IMAGE_FIELDS_ERROR), + model: z.string().optional(), + prompt: z.string({ error: MISSING_IMAGE_FIELDS_ERROR }).min(1, MISSING_IMAGE_FIELDS_ERROR), + size: z.string().optional(), + aspectRatio: z.string().optional(), + resolution: z.string().optional(), + quality: z.string().optional(), + background: z.string().optional(), + outputFormat: z.string().optional(), + moderation: z.string().optional(), + safetyTolerance: z.string().optional(), + numImages: z.coerce.number().int().optional(), + seed: z.coerce.number().int().optional(), + enableSafetyChecker: toolBooleanSchema.optional(), + enableWebSearch: toolBooleanSchema.optional(), + thinkingLevel: z.string().optional(), + workspaceId: z.string().optional(), + workflowId: z.string().optional(), + executionId: z.string().optional(), + userId: z.string().optional(), + }) + .passthrough() + +export type ImageToolBody = z.infer + export const imageProxyContract = defineRouteContract({ method: 'GET', path: '/api/tools/image', query: imageProxyQuerySchema, response: { mode: 'binary' }, }) + +export const imageToolContract = defineRouteContract({ + method: 'POST', + path: '/api/tools/image', + body: imageToolBodySchema, + response: { mode: 'json', schema: toolJsonResponseSchema }, +}) diff --git a/apps/sim/lib/api/contracts/tools/media/shared.ts b/apps/sim/lib/api/contracts/tools/media/shared.ts index 30f2122ce0e..b13ed6ab2eb 100644 --- a/apps/sim/lib/api/contracts/tools/media/shared.ts +++ b/apps/sim/lib/api/contracts/tools/media/shared.ts @@ -12,3 +12,16 @@ export const toolJsonResponseSchema = z data: z.unknown().optional(), }) .passthrough() + +export const toolBooleanSchema = z.preprocess( + (value) => { + if (typeof value === 'boolean') return value + if (typeof value !== 'string') return value + + const normalized = value.trim().toLowerCase() + if (normalized === 'true' || normalized === '1') return true + if (normalized === 'false' || normalized === '0' || normalized === '') return false + return value + }, + z.boolean({ error: 'must be a boolean (true/false)' }) +) diff --git a/apps/sim/lib/api/contracts/tools/media/video.ts b/apps/sim/lib/api/contracts/tools/media/video.ts index edeb44e1288..222e9385237 100644 --- a/apps/sim/lib/api/contracts/tools/media/video.ts +++ b/apps/sim/lib/api/contracts/tools/media/video.ts @@ -1,6 +1,6 @@ import { z } from 'zod' import { userFileSchema } from '@/lib/api/contracts/primitives' -import { toolJsonResponseSchema } from '@/lib/api/contracts/tools/media/shared' +import { toolBooleanSchema, toolJsonResponseSchema } from '@/lib/api/contracts/tools/media/shared' import { defineRouteContract } from '@/lib/api/contracts/types' export const videoProviders = ['runway', 'veo', 'luma', 'minimax', 'falai'] as const @@ -23,7 +23,8 @@ export const videoToolBodySchema = z visualReference: userFileSchema.optional(), cameraControl: z.unknown().optional(), endpoint: z.string().optional(), - promptOptimizer: z.boolean().optional(), + promptOptimizer: toolBooleanSchema.optional(), + generateAudio: toolBooleanSchema.optional(), workspaceId: z.string().optional(), workflowId: z.string().optional(), executionId: z.string().optional(), diff --git a/apps/sim/tools/image/generate.ts b/apps/sim/tools/image/generate.ts new file mode 100644 index 00000000000..e67de7e5428 --- /dev/null +++ b/apps/sim/tools/image/generate.ts @@ -0,0 +1,230 @@ +import type { ImageGenerationParams, ImageGenerationResponse } from '@/tools/image/types' +import type { ToolConfig } from '@/tools/types' + +export const imageGenerateTool: ToolConfig = { + id: 'image_generate', + name: 'Image Generator', + description: 'Generate images with OpenAI GPT Image, Google Nano Banana, or Fal.ai image models', + version: '1.0.0', + + params: { + provider: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Image generation provider: openai, gemini, or falai', + }, + apiKey: { + type: 'string', + required: true, + visibility: 'user-only', + description: 'Provider API key', + }, + model: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: + 'Provider model ID, such as gpt-image-1.5, gemini-3.1-flash-image-preview, or nano-banana-2', + }, + prompt: { + type: 'string', + required: true, + visibility: 'user-or-llm', + description: 'Text prompt describing the image to generate', + }, + size: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Provider-specific image size', + }, + aspectRatio: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Aspect ratio, such as auto, 1:1, 16:9, or 9:16', + }, + resolution: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Provider-specific image resolution, such as 1K, 2K, 4K, 1k, or 2k', + }, + quality: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Provider-specific image quality', + }, + background: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Background setting when supported', + }, + outputFormat: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Output image format: png, jpeg, or webp where supported', + }, + moderation: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'OpenAI moderation level: auto or low', + }, + safetyTolerance: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Fal.ai safety tolerance when supported', + }, + numImages: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Number of images to generate, subject to provider limits', + }, + seed: { + type: 'number', + required: false, + visibility: 'user-or-llm', + description: 'Random seed when supported', + }, + enableSafetyChecker: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Enable the Fal.ai safety checker when supported', + }, + enableWebSearch: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Enable web search grounding when supported by the selected Fal.ai model', + }, + thinkingLevel: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Fal.ai thinking level when supported: minimal or high', + }, + }, + + request: { + url: '/api/tools/image', + method: 'POST', + headers: () => ({ + 'Content-Type': 'application/json', + }), + body: ( + params: ImageGenerationParams & { + _context?: { workspaceId?: string; workflowId?: string; executionId?: string } + } + ) => ({ + provider: params.provider, + apiKey: params.apiKey, + model: params.model, + prompt: params.prompt, + size: params.size, + aspectRatio: params.aspectRatio, + resolution: params.resolution, + quality: params.quality, + background: params.background, + outputFormat: params.outputFormat, + moderation: params.moderation, + safetyTolerance: params.safetyTolerance, + numImages: params.numImages, + seed: params.seed, + enableSafetyChecker: params.enableSafetyChecker, + enableWebSearch: params.enableWebSearch, + thinkingLevel: params.thinkingLevel, + workspaceId: params._context?.workspaceId, + workflowId: params._context?.workflowId, + executionId: params._context?.executionId, + }), + }, + + transformResponse: async (response: Response) => { + const data = (await response.json()) as { + error?: string + content?: string + image?: string + imageUrl?: string + imageFile?: unknown + fileName?: string + contentType?: string + provider?: string + model?: string + metadata?: ImageGenerationResponse['output']['metadata'] + } + + if (!response.ok || data.error) { + return { + success: false, + error: data.error || 'Image generation failed', + output: { + content: '', + image: '', + imageUrl: '', + provider: data.provider || '', + model: data.model || '', + metadata: { + provider: data.provider || '', + model: data.model || '', + }, + }, + } + } + + const image = + data.imageFile || + data.image || + (data.imageUrl + ? { + name: data.fileName || 'generated-image.png', + url: data.imageUrl, + mimeType: data.contentType || 'image/png', + } + : '') + + return { + success: true, + output: { + content: data.content || data.imageUrl || 'direct-image', + image, + imageUrl: data.imageUrl || '', + provider: data.provider || data.metadata?.provider || '', + model: data.model || data.metadata?.model || '', + metadata: { + provider: data.provider || data.metadata?.provider || '', + model: data.model || data.metadata?.model || '', + ...data.metadata, + }, + }, + } + }, + + outputs: { + content: { type: 'string', description: 'Generated image URL or identifier' }, + image: { type: 'file', description: 'Generated image file' }, + imageUrl: { type: 'string', description: 'Generated image URL' }, + provider: { type: 'string', description: 'Provider used' }, + model: { type: 'string', description: 'Model used' }, + metadata: { + type: 'json', + description: 'Generation metadata', + properties: { + provider: { type: 'string', description: 'Provider used' }, + model: { type: 'string', description: 'Model used' }, + description: { type: 'string', description: 'Provider description', optional: true }, + revisedPrompt: { type: 'string', description: 'Revised prompt', optional: true }, + seed: { type: 'number', description: 'Seed used for generation', optional: true }, + jobId: { type: 'string', description: 'Provider job ID', optional: true }, + contentType: { type: 'string', description: 'Image MIME type', optional: true }, + }, + }, + }, +} diff --git a/apps/sim/tools/image/index.ts b/apps/sim/tools/image/index.ts new file mode 100644 index 00000000000..4f3737cb697 --- /dev/null +++ b/apps/sim/tools/image/index.ts @@ -0,0 +1,3 @@ +import { imageGenerateTool } from '@/tools/image/generate' + +export { imageGenerateTool } diff --git a/apps/sim/tools/image/types.ts b/apps/sim/tools/image/types.ts new file mode 100644 index 00000000000..cb34c9adb42 --- /dev/null +++ b/apps/sim/tools/image/types.ts @@ -0,0 +1,40 @@ +import type { ToolResponse } from '@/tools/types' + +export interface ImageGenerationParams { + provider: 'openai' | 'gemini' | 'falai' + apiKey: string + model?: string + prompt: string + size?: string + aspectRatio?: string + resolution?: string + quality?: string + background?: string + outputFormat?: string + moderation?: string + safetyTolerance?: string + numImages?: number + seed?: number + enableSafetyChecker?: boolean + enableWebSearch?: boolean + thinkingLevel?: string +} + +export interface ImageGenerationResponse extends ToolResponse { + output: { + content: string + image: unknown + imageUrl: string + provider: string + model: string + metadata: { + provider: string + model: string + description?: string + revisedPrompt?: string + seed?: number + jobId?: string + contentType?: string + } + } +} diff --git a/apps/sim/tools/openai/image.ts b/apps/sim/tools/openai/image.ts index 2d5ee547873..0874a327b84 100644 --- a/apps/sim/tools/openai/image.ts +++ b/apps/sim/tools/openai/image.ts @@ -5,6 +5,15 @@ import type { ToolConfig } from '@/tools/types' const logger = createLogger('ImageTool') +const GPT_IMAGE_SIZES = ['auto', '1024x1024', '1536x1024', '1024x1536'] as const +const GPT_IMAGE_2_SIZES = [...GPT_IMAGE_SIZES, '2560x1440', '3840x2160'] as const +const GPT_IMAGE_MODELS = [ + 'gpt-image-2', + 'gpt-image-1.5', + 'gpt-image-1', + 'gpt-image-1-mini', +] as const + export const imageTool: ToolConfig = { id: 'openai_image', name: 'Image Generator', @@ -16,7 +25,8 @@ export const imageTool: ToolConfig = { type: 'string', required: true, visibility: 'user-only', - description: 'The model to use (dall-e-3, gpt-image-1, or gpt-image-2)', + description: + 'The model to use. Supports dall-e-3, gpt-image-2, gpt-image-1.5, gpt-image-1, and gpt-image-1-mini.', }, prompt: { type: 'string', @@ -29,13 +39,13 @@ export const imageTool: ToolConfig = { required: true, visibility: 'user-or-llm', description: - 'Image size. dall-e-3: 1024x1024, 1024x1792, or 1792x1024. gpt-image-1: auto, 1024x1024, 1536x1024, or 1024x1536. gpt-image-2: auto or any size with edges ≤3840px and multiples of 16 (e.g. 1024x1024, 1536x1024, 1024x1536, 2560x1440, 3840x2160).', + 'Image size. dall-e-3: 1024x1024, 1024x1792, or 1792x1024. GPT Image models: auto, 1024x1024, 1536x1024, or 1024x1536. gpt-image-2 also supports 2560x1440 and 3840x2160.', }, quality: { type: 'string', required: false, visibility: 'user-or-llm', - description: 'Quality. dall-e-3: standard|hd. gpt-image-1/gpt-image-2: auto|low|medium|high', + description: 'Quality. dall-e-3: standard|hd. GPT Image models: auto|low|medium|high', }, style: { type: 'string', @@ -47,26 +57,25 @@ export const imageTool: ToolConfig = { type: 'string', required: false, visibility: 'user-or-llm', - description: - 'Background. gpt-image-1: auto|transparent|opaque. gpt-image-2: auto|opaque (transparent not supported)', + description: 'Background for GPT Image models: auto|transparent|opaque', }, outputFormat: { type: 'string', required: false, visibility: 'user-or-llm', - description: 'Output image format (png, jpeg, webp), only for gpt-image-1 and gpt-image-2', + description: 'Output image format (png, jpeg, webp), only for GPT Image models', }, moderation: { type: 'string', required: false, visibility: 'user-or-llm', - description: 'Moderation level (auto or low), only for gpt-image-1 and gpt-image-2', + description: 'Moderation level (auto or low), only for GPT Image models', }, n: { type: 'number', required: false, visibility: 'hidden', - description: 'The number of images to generate (1-10)', + description: 'Reserved for legacy callers. This tool returns a single generated image.', }, apiKey: { type: 'string', @@ -84,17 +93,31 @@ export const imageTool: ToolConfig = { Authorization: `Bearer ${params.apiKey}`, }), body: (params) => { + const requestedModel = String(params.model || 'dall-e-3') + const requestedSize = String(params.size || '') + const size = + requestedModel === 'dall-e-3' + ? ['1024x1024', '1024x1792', '1792x1024'].includes(requestedSize) + ? requestedSize + : '1024x1024' + : requestedModel === 'gpt-image-2' && + GPT_IMAGE_2_SIZES.includes(requestedSize as (typeof GPT_IMAGE_2_SIZES)[number]) + ? requestedSize + : GPT_IMAGE_MODELS.includes(requestedModel as (typeof GPT_IMAGE_MODELS)[number]) && + GPT_IMAGE_SIZES.includes(requestedSize as (typeof GPT_IMAGE_SIZES)[number]) + ? requestedSize + : 'auto' const body: BaseImageRequestBody = { - model: params.model, + model: requestedModel, prompt: params.prompt, - size: params.size || (params.model === 'dall-e-3' ? '1024x1024' : 'auto'), - n: params.n ? Number(params.n) : 1, + size, + n: 1, } - if (params.model === 'dall-e-3') { + if (requestedModel === 'dall-e-3') { if (params.quality) body.quality = params.quality if (params.style) body.style = params.style - } else if (params.model === 'gpt-image-1' || params.model === 'gpt-image-2') { + } else if (GPT_IMAGE_MODELS.includes(requestedModel as (typeof GPT_IMAGE_MODELS)[number])) { if (params.quality) body.quality = params.quality if (params.background) body.background = params.background if (params.outputFormat) body.output_format = params.outputFormat @@ -118,7 +141,7 @@ export const imageTool: ToolConfig = { }) } - const modelName = params?.model || 'dall-e-3' + const modelName = String(params?.model || 'dall-e-3') let imageUrl = null let base64Image = null diff --git a/apps/sim/tools/openai/types.ts b/apps/sim/tools/openai/types.ts index 532666e20cd..568a27cd7f4 100644 --- a/apps/sim/tools/openai/types.ts +++ b/apps/sim/tools/openai/types.ts @@ -5,7 +5,7 @@ export interface BaseImageRequestBody { prompt: string size: string n: number - [key: string]: any // Allow for additional properties + [key: string]: unknown } export interface DalleResponse extends ToolResponse { diff --git a/apps/sim/tools/registry.ts b/apps/sim/tools/registry.ts index b65d6bda699..c402d54e374 100644 --- a/apps/sim/tools/registry.ts +++ b/apps/sim/tools/registry.ts @@ -1277,6 +1277,7 @@ import { identityCenterListInstancesTool, identityCenterListPermissionSetsTool, } from '@/tools/identity_center' +import { imageGenerateTool } from '@/tools/image' import { incidentioActionsListTool, incidentioActionsShowTool, @@ -4982,6 +4983,7 @@ export const tools: Record = { datadog_create_downtime: datadogCreateDowntimeTool, datadog_list_downtimes: datadogListDowntimesTool, datadog_cancel_downtime: datadogCancelDowntimeTool, + image_generate: imageGenerateTool, openai_image: openAIImageTool, microsoft_ad_list_users: microsoftAdListUsersTool, microsoft_ad_get_user: microsoftAdGetUserTool, diff --git a/apps/sim/tools/video/falai.ts b/apps/sim/tools/video/falai.ts index 82cb89e0184..77999d87820 100644 --- a/apps/sim/tools/video/falai.ts +++ b/apps/sim/tools/video/falai.ts @@ -1,11 +1,12 @@ import type { ToolConfig } from '@/tools/types' import type { VideoParams, VideoResponse } from '@/tools/video/types' +import { parseBooleanParam, parseBooleanParamWithDefault } from '@/tools/video/utils' export const falaiVideoTool: ToolConfig = { id: 'video_falai', name: 'Fal.ai Video Generation', description: - 'Generate videos using Fal.ai platform with access to multiple models including Veo 3.1, Sora 2, Kling 2.5, MiniMax Hailuo, and more', + 'Generate videos using Fal.ai with access to Veo 3.1, Sora 2, Seedance 2.0, Kling 3.0, MiniMax Hailuo 2.3, WAN 2.2, LTX 2.3, and previously supported models', version: '1.0.0', params: { @@ -26,7 +27,7 @@ export const falaiVideoTool: ToolConfig = { required: true, visibility: 'user-or-llm', description: - 'Fal.ai model: veo-3.1 (Google Veo 3.1), sora-2 (OpenAI Sora 2), kling-2.5-turbo-pro (Kling 2.5 Turbo Pro), kling-2.1-pro (Kling 2.1 Master), minimax-hailuo-2.3-pro (MiniMax Hailuo Pro), minimax-hailuo-2.3-standard (MiniMax Hailuo Standard), wan-2.1 (WAN T2V), ltxv-0.9.8 (LTXV 13B)', + 'Fal.ai model: veo-3.1, veo-3.1-fast, sora-2, sora-2-pro, seedance-2.0, seedance-2.0-fast, kling-v3-pro, kling-v3-4k, kling-o3-pro, kling-o3-4k, minimax-hailuo-2.3-pro, minimax-hailuo-2.3-standard, wan-2.2-a14b-turbo, ltx-2.3, ltx-2.3-fast, plus previously supported model IDs', }, prompt: { type: 'string', @@ -50,7 +51,8 @@ export const falaiVideoTool: ToolConfig = { type: 'string', required: false, visibility: 'user-or-llm', - description: 'Video resolution (varies by model): 540p, 720p, 1080p', + description: + 'Video resolution (varies by model): 480p, 580p, 720p, 1080p, true_1080p, 1440p, 2160p, 4k', }, promptOptimizer: { type: 'boolean', @@ -58,6 +60,12 @@ export const falaiVideoTool: ToolConfig = { visibility: 'user-or-llm', description: 'Enable prompt optimization for MiniMax models (default: true)', }, + generateAudio: { + type: 'boolean', + required: false, + visibility: 'user-or-llm', + description: 'Generate native audio when supported by the selected Fal.ai model', + }, }, request: { @@ -78,7 +86,8 @@ export const falaiVideoTool: ToolConfig = { duration: params.duration, aspectRatio: params.aspectRatio, resolution: params.resolution, - promptOptimizer: params.promptOptimizer !== false, // Default true for MiniMax + promptOptimizer: parseBooleanParamWithDefault(params.promptOptimizer, true), + generateAudio: parseBooleanParam(params.generateAudio), workspaceId: params._context?.workspaceId, workflowId: params._context?.workflowId, executionId: params._context?.executionId, diff --git a/apps/sim/tools/video/minimax.ts b/apps/sim/tools/video/minimax.ts index 941c0d04b8e..369b3953475 100644 --- a/apps/sim/tools/video/minimax.ts +++ b/apps/sim/tools/video/minimax.ts @@ -1,5 +1,6 @@ import type { ToolConfig } from '@/tools/types' import type { VideoParams, VideoResponse } from '@/tools/video/types' +import { parseBooleanParamWithDefault } from '@/tools/video/utils' export const minimaxVideoTool: ToolConfig = { id: 'video_minimax', @@ -25,7 +26,7 @@ export const minimaxVideoTool: ToolConfig = { type: 'string', required: false, visibility: 'user-or-llm', - description: 'MiniMax model: hailuo-02 (default)', + description: 'MiniMax model: hailuo-2.3 (default) or hailuo-02', }, prompt: { type: 'string', @@ -39,6 +40,12 @@ export const minimaxVideoTool: ToolConfig = { visibility: 'user-or-llm', description: 'Video duration in seconds (6 or 10, default: 6)', }, + endpoint: { + type: 'string', + required: false, + visibility: 'user-or-llm', + description: 'Quality endpoint: standard (768P) or pro (1080P for 6s videos)', + }, promptOptimizer: { type: 'boolean', required: false, @@ -60,10 +67,11 @@ export const minimaxVideoTool: ToolConfig = { ) => ({ provider: 'minimax', apiKey: params.apiKey, - model: params.model || 'hailuo-02', + model: params.model || 'hailuo-2.3', prompt: params.prompt, duration: params.duration || 6, - promptOptimizer: params.promptOptimizer !== false, // Default true + endpoint: params.endpoint || 'standard', + promptOptimizer: parseBooleanParamWithDefault(params.promptOptimizer, true), workspaceId: params._context?.workspaceId, workflowId: params._context?.workflowId, executionId: params._context?.executionId, diff --git a/apps/sim/tools/video/types.ts b/apps/sim/tools/video/types.ts index 6b1ae5768bf..5d31e29e2c7 100644 --- a/apps/sim/tools/video/types.ts +++ b/apps/sim/tools/video/types.ts @@ -9,18 +9,18 @@ export interface VideoParams { duration?: number aspectRatio?: string resolution?: string - // Provider-specific features - visualReference?: UserFile // Runway only (required for Runway) + /** Runway only, required for Runway generation */ + visualReference?: UserFile cameraControl?: { - // Luma only pan?: number zoom?: number tilt?: number truck?: number tracking?: boolean } - endpoint?: string // MiniMax: 'pro' | 'standard' - promptOptimizer?: boolean // MiniMax and Fal.ai MiniMax models + endpoint?: string + promptOptimizer?: boolean + generateAudio?: boolean } export interface VideoResponse extends ToolResponse { @@ -49,9 +49,12 @@ export interface VideoBlockResponse extends ToolResponse { } interface RunwayParams extends Omit { - model?: 'gen-4-turbo' // Only gen4_turbo supports image-to-video - visualReference: UserFile // REQUIRED for Gen-4 - resolution?: '720p' // Gen-4 Turbo outputs at 720p + /** Only gen4_turbo supports image-to-video */ + model?: 'gen-4-turbo' + /** Required for Gen-4 */ + visualReference: UserFile + /** Gen-4 Turbo outputs at 720p */ + resolution?: '720p' duration?: 5 | 10 } @@ -77,7 +80,7 @@ interface LumaParams extends Omit { } interface MinimaxParams extends Omit { - model?: 'hailuo-02' + model?: 'hailuo-2.3' | 'hailuo-02' endpoint?: 'pro' | 'standard' promptOptimizer?: boolean duration?: 6 | 10 diff --git a/apps/sim/tools/video/utils.ts b/apps/sim/tools/video/utils.ts new file mode 100644 index 00000000000..10c8ac20454 --- /dev/null +++ b/apps/sim/tools/video/utils.ts @@ -0,0 +1,13 @@ +export function parseBooleanParam(value: unknown): boolean | undefined { + if (typeof value === 'boolean') return value + if (typeof value !== 'string') return undefined + + const normalized = value.trim().toLowerCase() + if (normalized === 'true' || normalized === '1') return true + if (normalized === 'false' || normalized === '0' || normalized === '') return false + return undefined +} + +export function parseBooleanParamWithDefault(value: unknown, defaultValue: boolean): boolean { + return parseBooleanParam(value) ?? defaultValue +} diff --git a/scripts/generate-docs.ts b/scripts/generate-docs.ts index 65d93e3e393..c2c50cbbc6a 100755 --- a/scripts/generate-docs.ts +++ b/scripts/generate-docs.ts @@ -208,11 +208,13 @@ function copyIconsFile(): void { } /** - * Generate icon mapping from all block definitions - * Maps block types to their icon component names - * Skips blocks that don't have documentation generated (same logic as generateBlockDoc) + * Generate icon mapping from block definitions. + * Docs need hidden historical version keys so old BlockInfoCard references and + * versioned docs links still render icons, while landing only needs visible blocks. */ -async function generateIconMapping(): Promise> { +async function generateIconMapping(options: { + includeHidden: boolean +}): Promise> { try { console.log('Generating icon mapping from block definitions...') @@ -280,8 +282,8 @@ async function generateIconMapping(): Promise> { continue } - // Only add non-hidden blocks to icon mapping (docs won't be generated for hidden) - if (!hideFromToolbar) { + const isVersionedBlockType = /_v\d+$/.test(blockType) + if (!hideFromToolbar || (options.includeHidden && isVersionedBlockType)) { iconMapping[blockType] = iconName } } @@ -3708,13 +3710,14 @@ async function generateAllBlockDocs() { // Copy icons from sim app to docs app copyIconsFile() - // Generate icon mapping from block definitions - const iconMapping = await generateIconMapping() - writeIconMapping(iconMapping) + // Generate icon mappings from block definitions + const docsIconMapping = await generateIconMapping({ includeHidden: true }) + const visibleIconMapping = await generateIconMapping({ includeHidden: false }) + writeIconMapping(docsIconMapping) // Generate landing integrations page data (JSON + icon mapping) - await writeIntegrationsJson(iconMapping) - writeIntegrationsIconMapping(iconMapping) + await writeIntegrationsJson(visibleIconMapping) + writeIntegrationsIconMapping(visibleIconMapping) // Get hidden and visible block types before generating docs const { hiddenTypes, visibleDisplayNames } = await getHiddenAndVisibleBlockTypes()