{
  "name": "* 🗄️W57th Vector 2.0",
  "nodes": [
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "m1",
              "name": "content",
              "value": "={{ [\n  'public_url: '       + $json.public_url,\n  'album: '            + $json.album,\n  'file_date: '        + $json.file_date,\n  'mime_type: '        + $json.mime_type,\n  'summary: '          + $json.summary,\n  'detail_description: ' + $json.detail_description,\n  'tags: '             + $json.tags.join(', ')\n].join('\\n\\n') }}",
              "type": "string"
            },
            {
              "id": "m2",
              "name": "metadata",
              "value": "={{ {\n  name:          $json.name,\n  public_url:    $json.public_url,\n  album:         $json.album,\n  file_date:     $json.file_date ?? null,\n  mime_type:     $json.mime_type,\n  score_social: parseInt($json.score_social) ?? null,\n  score_quality: parseInt($json.score_quality) ?? null,\n  chunk_index:   $json.chunk_index ?? null,\n  chunk_total:   $json.chunk_total ?? null\n} }}",
              "type": "object"
            }
          ]
        },
        "options": {}
      },
      "id": "592243ca-8bd1-49f6-abcd-6ed8b122e8ec",
      "name": "Build Record",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        1376,
        288
      ]
    },
    {
      "parameters": {},
      "id": "0a454a14-620e-4acb-9490-59ae1e81595b",
      "name": "Embeddings Gemini1",
      "type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
      "typeVersion": 1,
      "position": [
        1568,
        432
      ],
      "credentials": {
        "googlePalmApi": {
          "id": "2qUzA3DAEFRc7DI1",
          "name": "Gemini FRAINK key"
        }
      }
    },
    {
      "parameters": {
        "jsonMode": "expressionData",
        "jsonData": "={{ $json.content }}",
        "textSplittingMode": "custom",
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "metadata",
                "value": "={{ $json.metadata }}"
              }
            ]
          }
        }
      },
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "typeVersion": 1.1,
      "position": [
        1712,
        432
      ],
      "id": "62881126-de15-42a1-904f-30aa6605a645",
      "name": "Default Data Loader"
    },
    {
      "parameters": {
        "separator": "NEVER_SPLIT_XYZ",
        "chunkSize": 50000
      },
      "type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter",
      "typeVersion": 1,
      "position": [
        1728,
        528
      ],
      "id": "6db24c96-9f71-4b43-a6ed-dd8ec5633c6b",
      "name": "Character Text Splitter"
    },
    {
      "parameters": {
        "mode": "insert",
        "tableName": "vec10",
        "options": {}
      },
      "id": "32ea4f84-550c-48cf-ba9d-59f914afe258",
      "name": "Vector Ingest",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreSupabase",
      "typeVersion": 1,
      "position": [
        1568,
        288
      ],
      "credentials": {
        "supabaseApi": {
          "id": "fDZJQQDFT5tFE3WD",
          "name": "Vector Supabase"
        }
      }
    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://n8n.airpg.ai/webhook/pdf",
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "mimeType",
              "value": "={{ $json.mimeType }}"
            },
            {
              "name": "public_url",
              "value": "={{ $json.public_url }}"
            },
            {
              "name": "album",
              "value": "={{ $('Set Input').item.json.prefix }}"
            },
            {
              "name": "file_date",
              "value": "={{ $json.file_date }}"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.4,
      "position": [
        -816,
        288
      ],
      "id": "61967eb2-cec5-43fc-9d88-36632c2780b5",
      "name": "To PDF"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://n8n.airpg.ai/webhook/image",
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "mimeType",
              "value": "={{ $json.mimeType }}"
            },
            {
              "name": "public_url",
              "value": "={{ $json.public_url }}"
            },
            {
              "name": "album",
              "value": "={{ $('Set Input').first().json.prefix }}"
            },
            {
              "name": "file_date",
              "value": "={{ $json.file_date }}"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.4,
      "position": [
        -816,
        16
      ],
      "id": "d695d3eb-332f-4c82-b906-b2b602f693e3",
      "name": "To Image"
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "image",
        "options": {}
      },
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2.1,
      "position": [
        -560,
        16
      ],
      "id": "9f945a72-9cc9-4a37-b0e0-96d1edd07346",
      "name": "Image",
      "webhookId": "0287891b-7da8-4b25-b5b0-68688c5b5870"
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "pdf",
        "options": {}
      },
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2.1,
      "position": [
        -560,
        288
      ],
      "id": "6d94e686-e5e1-4579-9d9a-6ba1c583abb3",
      "name": "PDF",
      "webhookId": "0287891b-7da8-4b25-b5b0-68688c5b5870"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://n8n.airpg.ai/webhook/imageF",
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "output",
              "value": "={{ $json.output }}"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.4,
      "position": [
        288,
        16
      ],
      "id": "d11c1be6-c881-4956-93c4-6eae3bdef437",
      "name": "To Image F"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "https://n8n.airpg.ai/webhook/pdfF",
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "output",
              "value": "={{ $json.output }}"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.4,
      "position": [
        288,
        288
      ],
      "id": "3786e8f6-9f2b-43a6-8381-287406c726be",
      "name": "To PDF F"
    },
    {
      "parameters": {
        "jsCode": "// ═══════════════════════════════════════════════════════════════════\n// ver 3.4 Groq Parse: Sentence-aware transcript chunker & auto-tagger\n// ~900 char target, 1 sentence overlap, outputs flat Build Record shape\n// CHANGE 3.4: Parallel tag extraction via Promise.all (was sequential await)\n// ═══════════════════════════════════════════════════════════════════\nconst CHUNK_TARGET = 900;\nconst https = require('https');\n\n// ── SECTION 1: Chunker ──────────────────────────────────────────\n// Splits transcript into sentence-aware chunks with 1-sentence overlap.\n// Falls back to treating full text as one chunk if no sentence endings found.\nfunction chunkTranscript(text) {\n    const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];\n    const chunks = [];\n    let current = '';\n    let lastSentence = '';\n    let chunkIndex = 0;\n\n    for (const sentence of sentences) {\n        const trimmed = sentence.trim();\n        if (!trimmed) continue;\n\n        if ((current + ' ' + trimmed).length > CHUNK_TARGET && current.length > 0) {\n            chunks.push({ chunk_index: chunkIndex++, chunk_text: current.trim() });\n            current = lastSentence + ' ' + trimmed; // 1-sentence overlap\n        } else {\n            current = current ? current + ' ' + trimmed : trimmed;\n        }\n        lastSentence = trimmed;\n    }\n\n    if (current.trim()) {\n        chunks.push({ chunk_index: chunkIndex, chunk_text: current.trim() });\n    }\n\n    return chunks;\n}\n\n// ── SECTION 2: Tag Extractor ─────────────────────────────────────\n// Single Groq API call for one chunk. Returns parsed string array.\n// Regex-extracts JSON array to handle Groq wrapping response in prose/markdown.\nasync function extractTags(text) {\n    const response = await new Promise((resolve, reject) => {\n        const body = JSON.stringify({\n            model: 'llama-3.1-8b-instant',\n            max_tokens: 30,\n            messages: [{\n                role: 'user',\n                content: `Return 3 to 5 specific topical keywords from this text as a JSON array. No filler words. Text: \"${text}\"`\n            }]\n        });\n\n        const req = https.request({\n            hostname: 'api.groq.com',\n            path: '/openai/v1/chat/completions',\n            method: 'POST',\n            headers: {\n                'Authorization': `Bearer ${$env.GROQ_API_KEY}`,\n                'Content-Type': 'application/json'\n            }\n        }, res => {\n            let data = '';\n            res.on('data', chunk => data += chunk);\n            res.on('end', () => resolve(JSON.parse(data)));\n        });\n\n        req.on('error', reject);\n        req.write(body);\n        req.end();\n    });\n\n    try {\n        const raw = response.choices[0].message.content;\n        const match = raw.match(/\\[[\\s\\S]*?\\]/);\n        return match ? JSON.parse(match[0]) : [];\n    } catch(e) {\n        return [];\n    }\n}\n\n// ── SECTION 3: Main Loop ─────────────────────────────────────────\n// Iterates all transcript inputs, chunks each, then fires ALL tag\n// extraction calls in parallel per transcript via Promise.all.\n// Upstream refs: FFmpeg Audio (public_url), Set Input (album/prefix).\nconst results = [];\nconst allInputs = $input.all();\nconst ffmpegItems = $('FFmpeg Audio').all();\nconst setInputItem = $('Set Input').first();\nconst album = setInputItem?.json?.prefix || null;\n\nfor (let i = 0; i < allInputs.length; i++) {\n    const item = allInputs[i];\n    const transcript = item.json.transcript;\n    if (!transcript) continue;\n\n    const matchedFFmpeg = ffmpegItems[i] || ffmpegItems[0] || {};\n    const public_url = matchedFFmpeg.json?.public_url || null;\n\n    const chunks = chunkTranscript(transcript.trim());\n\n    // Fire all tag calls for this transcript's chunks simultaneously\n    const allTags = await Promise.all(chunks.map(chunk => extractTags(chunk.chunk_text)));\n\n    // Build result records now that all tags are resolved\n    chunks.forEach((chunk, idx) => {\n        results.push({\n            json: {\n                public_url,\n                album,\n                file_date:          item.json.file_date,\n                mime_type:          'video_transcript',\n                chunk_index:        chunk.chunk_index,\n                chunk_total:        chunks.length,\n                summary:            `Transcript chunk ${chunk.chunk_index + 1} of ${chunks.length}`,\n                detail_description: `[Transcript Chunk]: ${chunk.chunk_text}`,\n                tags:               allTags[idx],\n                score_social:       null,\n                score_quality:      null\n            }\n        });\n    });\n}\n\nreturn results;"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        16,
        608
      ],
      "id": "8c4c8ce3-4594-4fc4-a3a8-424857a60aaf",
      "name": "Groq Parse"
    },
    {
      "parameters": {},
      "type": "n8n-nodes-base.merge",
      "typeVersion": 3.2,
      "position": [
        528,
        784
      ],
      "id": "ea48c97b-de65-4258-8f3f-6698d14d842c",
      "name": "Merge1"
    },
    {
      "parameters": {
        "conditions": {
          "options": {
            "caseSensitive": true,
            "leftValue": "",
            "typeValidation": "strict",
            "version": 2
          },
          "conditions": [
            {
              "leftValue": "={{ ['.jpg','.jpeg','.png','.webp','.mp4','.mov','.avi','.mkv','.pdf'].some(ext => $json.key.toLowerCase().endsWith(ext)) }}",
              "rightValue": "={{ ['.jpg','.jpeg','.png','.webp','.mp4','.mov','.avi','.mkv','.pdf'].some(ext => $json.key.toLowerCase().endsWith(ext)) }}",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "id": "13d34ed6-213f-40c1-9352-0443bd0320ca"
            }
          ],
          "combinator": "and"
        },
        "options": {
          "ignoreCase": false
        }
      },
      "id": "36e8cbc5-24e5-417e-a746-01e5eb83b5fe",
      "name": "Skip Unknown Types",
      "type": "n8n-nodes-base.filter",
      "typeVersion": 2.2,
      "position": [
        -128,
        -304
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "mt",
              "name": "mimeType",
              "value": "={{ $json.key.toLowerCase().match(/\\.(mp4|mov|avi|mkv)$/) ? 'video' : $json.key.toLowerCase().match(/\\.pdf$/) ? 'pdf' : 'image' }}",
              "type": "string"
            },
            {
              "id": "fn",
              "name": "public_url",
              "value": "={{ $('Set Input').item.json.bucket_url + '/' + $json.key }}",
              "type": "string"
            },
            {
              "id": "e0ddfafc-9e2b-4954-8b84-4d3b05dad705",
              "name": "file_date",
              "value": "={{ $json.last_modified }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "adcc3000-6660-46a0-9657-0a2d9b923045",
      "name": "Set Media Type",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        96,
        -304
      ]
    },
    {
      "parameters": {
        "fieldToSplitOut": "objects",
        "options": {}
      },
      "id": "9b4491cc-ca79-45f9-9dc1-aa068fe953fb",
      "name": "Split Out",
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        -336,
        -304
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "1",
              "name": "bucket",
              "value": "={{ $json.body.bucket }}",
              "type": "string"
            },
            {
              "id": "2",
              "name": "prefix",
              "value": "={{ $json.body.folder_url.split('/').slice(3).filter(s => s.length > 0 && !s.includes('.')).join('/') }}",
              "type": "string"
            },
            {
              "id": "4476cf58-f93e-495e-a296-94caf529a2db",
              "name": "bucket_url",
              "value": "={{ $json.body.folder_url.split('/').slice(0, 3).join('/') }}",
              "type": "string"
            },
            {
              "id": "4",
              "name": "temp_root",
              "value": "C:\\Users\\USERFOLDER\\vector",
              "type": "string"
            },
            {
              "id": "5",
              "name": "frames_dir",
              "value": "C:\\Users\\USERFOLDER\\vector\\frames",
              "type": "string"
            },
            {
              "id": "6",
              "name": "video_path",
              "value": "C:\\Users\\USERFOLDER\\vector\\video",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "28184ffc-68cd-40c8-9351-1a0353acb341",
      "name": "Set Input",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        -784,
        -304
      ]
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "gd1",
              "name": "name",
              "value": "={{ $json.name }}",
              "type": "string"
            },
            {
              "id": "gd2",
              "name": "public_url",
              "value": "=https://drive.google.com/thumbnail?sz=w1920&id={{ $json.id }}",
              "type": "string"
            },
            {
              "id": "gd3",
              "name": "view_url",
              "value": "=https://drive.google.com/file/d/{{ $json.id }}/preview",
              "type": "string"
            },
            {
              "id": "83850e76-24ef-45f4-909d-8bd0e74e6c17",
              "name": "mimeType",
              "value": "={{ $json.name.toLowerCase().match(/\\.(mp4|mov|avi|mkv)$/) ? 'video' : ($json.name.toLowerCase().match(/\\.pdf$/) ? 'pdf' : 'image') }}",
              "type": "string"
            },
            {
              "id": "0f4f74b3-a8f8-49b1-a2ab-2d2d3422bb96",
              "name": "download_url",
              "value": "=https://drive.google.com/uc?id={{ $json.id }}&export=download",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "1a3db4a9-7844-410c-995f-57ea9f364f2b",
      "name": "Normalise GDrive Item",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        -1856,
        624
      ],
      "disabled": true
    },
    {
      "parameters": {
        "resource": "fileFolder",
        "queryString": "=",
        "returnAll": true,
        "filter": {
          "folderId": {
            "__rl": true,
            "value": "={{ $json.album_name }}",
            "mode": "id"
          }
        },
        "options": {}
      },
      "id": "c5a16eb6-5187-46ab-babd-361aadf0d91d",
      "name": "List Google Drive1",
      "type": "n8n-nodes-base.googleDrive",
      "typeVersion": 3,
      "position": [
        -2096,
        624
      ],
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "x3ZSEBzUKx57u1qp",
          "name": "Dave Ggl Main"
        }
      },
      "disabled": true
    },
    {
      "parameters": {
        "jsCode": "// ═══════════════════════════════════════════════════════════════════\n// ver 3.0 Node 2: Audio Extractor\n// video_id is now a public URL - extract base name for file ops\n// line 43 -q:a 8 -ac 1 -ar 16000   **lower to 9 and 8000 **\n// ═══════════════════════════════════════════════════════════════════\nconst fs   = require('fs');\nconst path = require('path');\nconst { execSync } = require('child_process');\n\nconst toFF  = p => p.replace(/\\\\/g, '/');\nconst toWin = p => p.replace(/\\//g, '\\\\');\n\nconst incomingItems = $input.all();\nconst videosDir     = toWin($('Set Input').first().json.video_path);\n\nconst results = [];\nconst processedVideos = new Set();\n\nfor (const item of incomingItems) {\n    const publicUrl  = item.json.public_url;\n    const videoFile  = item.json.source_video_path;\n\n    if (!publicUrl || !videoFile || processedVideos.has(publicUrl)) continue;\n    processedVideos.add(publicUrl);\n\n    if (!fs.existsSync(toWin(videoFile))) continue;\n\n    // Extract safe filename from source_video_path, not public URL\n    const baseName = path.basename(videoFile, path.extname(videoFile));\n    const audioFile = toFF(path.join(videosDir, baseName + '.mp3'));\n\n    let hasAudio = false;\n    try {\n        const audioProbe = execSync(\n            `ffprobe -v error -select_streams a -show_entries stream=codec_type -of csv=p=0 \"${toFF(videoFile)}\"`,\n            { windowsHide: true }\n        ).toString().trim();\n        hasAudio = audioProbe.includes('audio');\n    } catch(e) {}\n\n    if (hasAudio) {\n        try {\n            execSync(\n                `ffmpeg -y -i \"${toFF(videoFile)}\" -vn -acodec libmp3lame -q:a 9 -ac 1 -ar 8000 \"${audioFile}\"`,\n                { windowsHide: true }\n            );\n\n            if (fs.existsSync(toWin(audioFile))) {\n                const audioBuffer = fs.readFileSync(toWin(audioFile));\n                results.push({\n                    json: {\n                        mimeType:   'audio/mpeg',\n                        video_id:   publicUrl,\n                        public_url: publicUrl,\n                        album:      item.json.album || null,\n                        file_date:  item.json.file_date,\n                        filename:   baseName + '.mp3',\n                        filepath:   audioFile,\n                        has_audio:  true\n                    },\n                    binary: {\n                        file: {\n                            data:     audioBuffer.toString('base64'),\n                            mimeType: 'audio/mpeg',\n                            fileName: baseName + '.mp3'\n                        }\n                    }\n                });\n            }\n        } catch(e) {}\n    }\n}\n\nreturn results;"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        -464,
        608
      ],
      "id": "78df4a30-bdd8-42f1-99bb-bb73e00368fc",
      "name": "FFmpeg Audio"
    },
    {
      "parameters": {
        "content": "## VIDEO SECTION: High-Speed FFmpeg Media Processing Pipeline (n8n Single-Process)\n\n### Context & Environment\n\n* **Platform:** n8n (Self-Hosted v2.13.4+, running locally on Windows 11)\n* **Configuration:** `N8N_RUNNERS_ENABLED=false` (Runs directly in the main server thread; Task Runners are disabled).\n* **Architecture Constraints:** Standard modern n8n binary helper functions (`this.helpers.returnBinaryAsBuffer`) will drop out or cause serialization failures down the line when Task Runners are off.\n* **The Solution:** The pipeline utilizes an optimized, classic **Base64 serialization design pattern** to safely store binary data directly within the item stream.\n\n---\n\n### Pipeline Architecture\n\nThe workflow passes data sequentially across three specific nodes:\n\n```\n[ Set Input ] ➔ [ Node 1: FFmpeg ] ➔ [ Node 2: FFmpeg Audio ] ➔ [ Filter ] ➔ [ Groq STT ]\n\n```\n\n#### 1. Node 1: High-Speed Visual Frame Cutter\n\n* **Input:** Receives an array of raw video URLs.\n* **Execution:** Clears the scratch directories once. Iterates linearly over the videos, downloads them via `curl`, extracts structural duration properties using `ffprobe`, and cuts JPEG frames.\n* **Output Payload:** Emits an array of frame items. To maintain the pipeline speed and prevent Node 2 from having to blindly search the filesystem or guess video extensions, Node 1 explicitly attaches the absolute path of the downloaded source video to **every single frame item** as `source_video_path`.\n* **Binary Handling:** ```javascript\nbinary: { data: { data: buffer.toString('base64'), mimeType: 'image/jpeg', fileName: file } }\n```\n\n#### 2. Node 2: Linear Pass-Through + Audio Extractor\n\n* **Input:** Receives the array of frame items from Node 1.\n* **Execution:** 1. Immediately clones all incoming frame objects into the main results array to guarantee seamless pass-through down the canvas.\n2. Tracks processed video IDs using a high-speed JavaScript `Set()` to deduplicate operations.\n3. Uses the explicit `source_video_path` passed from Node 1 to check the source file directly.\n4. Runs an ultra-fast `ffprobe` query to detect if a valid audio stream channel exists.\n5. If an audio stream is present, it instantly executes an optimized FFmpeg extraction rip directly to `.mp3`.\n* **Output Payload:** Appends the newly generated `.mp3` object entries to the passed-through frames array.\n* **Binary Handling:** Uses the matching classic serialization layer:\n```javascript\nbinary: { audio: { data: audioBuffer.toString('base64'), mimeType: 'audio/mp3', fileName: file } }\n\n\n#### 3. Downstream Processing: The HTTP Node Trap\n\n* **The Error:** Modern n8n HTTP Request nodes (v4.4+) configured for `multipart-form-data` with a binary parameter field do not look for inline base64 string properties. Instead, they try to parse the input field via n8n's internal binary disk data service. When they encounter raw file metadata fields from this setup, they attempt to map the Windows drive letter identifier as an internal storage tracker, resulting in the fatal crash: `No binary data manager found for: C`.\n* **The Safe Fix:** To send the generated audio payloads to the **Groq STT** (Whisper API) node without using a translation node:\nCustom code node for proper call params and outputs.",
        "height": 128,
        "width": 384,
        "color": "#2E2E2E"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2176,
        448
      ],
      "typeVersion": 1,
      "id": "50bee182-1ac2-4164-b272-9ede69bc4894",
      "name": "Sticky Note"
    },
    {
      "parameters": {
        "jsCode": "// Ver 1.0 calls to Groq Stt to transcribe. API token from $env at n8n startup\n\nconst fs = require('fs');\nconst FormData = require('form-data');\nconst https = require('https');\n\nconst results = [];\n\nfor (const item of $input.all()) {\n    if (!item.binary?.file) continue;\n    \n    const audioPath = item.json.filepath;\n    const audioBuffer = fs.readFileSync(audioPath);\n    \n    const form = new FormData();\n    form.append('file', audioBuffer, {\n        filename: item.json.filename,\n        contentType: 'audio/mpeg'\n    });\n    form.append('model', 'whisper-large-v3-turbo');\n    \n    const transcript = await new Promise((resolve, reject) => {\n        const req = https.request({\n            hostname: 'api.groq.com',\n            path: '/openai/v1/audio/transcriptions',\n            method: 'POST',\n            headers: {\n                ...form.getHeaders(),\n                'Authorization': `Bearer ${$env.GROQ_API_KEY}`\n            }\n        }, res => {\n            let data = '';\n            res.on('data', chunk => data += chunk);\n            res.on('end', () => resolve(JSON.parse(data)));\n        });\n        req.on('error', reject);\n        form.pipe(req);\n    });\n    \n    results.push({\n        json: {\n            ...item.json,\n            transcript: transcript.text\n        }\n    });\n}\n\nreturn results;"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        -224,
        608
      ],
      "id": "508ba05d-666e-44cf-8de2-8e1ae56307c9",
      "name": "HTTP Groq STT"
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "imageF",
        "options": {}
      },
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2.1,
      "position": [
        544,
        16
      ],
      "id": "f3377257-ecc0-4ac6-bdbd-9aea0208bb27",
      "name": "Image F",
      "webhookId": "0287891b-7da8-4b25-b5b0-68688c5b5870"
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "pdfF",
        "options": {}
      },
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2.1,
      "position": [
        544,
        288
      ],
      "id": "502ac100-85f9-4802-b317-cf9a86e14b53",
      "name": "PDF F",
      "webhookId": "0287891b-7da8-4b25-b5b0-68688c5b5870"
    },
    {
      "parameters": {
        "content": "## IMAGE ANALYZE - Separate Executions",
        "height": 256,
        "width": 1108,
        "color": "#333333"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -624,
        -64
      ],
      "typeVersion": 1,
      "id": "4c74681f-f278-476a-b6fe-7371324bcd33",
      "name": "Sticky Note1"
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "p4",
              "name": "album",
              "value": "={{ JSON.parse($json['body.output'].replace(/```json|```/g,'').trim()).album }}",
              "type": "string"
            },
            {
              "id": "73c9a5c1-dab3-4a1a-b4fd-2deebf6bb170",
              "name": "public_url",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).public_url }}",
              "type": "string"
            },
            {
              "id": "78cc1d51-ee49-4eea-905c-69889822bc42",
              "name": "file_date",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).file_date }}",
              "type": "string"
            },
            {
              "id": "fe31547c-d07b-4596-b70a-4bae6b875116",
              "name": "mime_type",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).mime_type }}",
              "type": "string"
            },
            {
              "id": "23c86fea-bade-4f87-83af-91fee6909f17",
              "name": "category",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).category }}",
              "type": "string"
            },
            {
              "id": "379776fc-9d1d-462b-8096-207995ef5bef",
              "name": "summary",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).summary }}",
              "type": "string"
            },
            {
              "id": "3958886b-243d-4aa3-92d4-56330a8d1de6",
              "name": "detail_description",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).detail_description }}",
              "type": "string"
            },
            {
              "id": "15bb6849-3df7-4111-93b5-e19c96d5055c",
              "name": "tags",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).tags }}",
              "type": "array"
            },
            {
              "id": "e1dd3b41-a164-428d-9fb6-b1d60b7efe57",
              "name": "score_social",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).score_social }}",
              "type": "string"
            },
            {
              "id": "62d0c89c-3a7d-4378-923e-fd1e48b3dfbd",
              "name": "score_quality",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).score_quality }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "5062c4e1-1d4b-498e-8467-6f3644d1fdd0",
      "name": "Parse1",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        896,
        16
      ]
    },
    {
      "parameters": {
        "jsCode": "// ver 1.0 Build Name code\n// Helper function to slugify text roots cleanly\nfunction slugify(text) {\n  return text\n    .toLowerCase()\n    .replace(/[\\s_]+/g, '-')     // Spaces/underscores to hyphens\n    .replace(/[^a-z0-9-]/g, ''); // Strip special characters\n}\n\n// Track item counts per unique file root in this specific execution batch\nconst typeCounters = {};\n\n// Process all items passing through the flow\nfor (const item of $input.all()) {\n  const mimeType = item.json.mime_type || '';\n  const url = item.json.public_url || '';\n  \n  // 1. Extract file title root from URL safely\n  const rawFileName = decodeURIComponent(url.split('/').pop().split('.').shift() || 'unknown');\n  const baseSlug = slugify(rawFileName);\n  \n  // 2. Determine type prefix\n  let prefix = 'img';\n  if (mimeType === 'video_frame') prefix = 'vidframe';\n  if (mimeType === 'video_transcript')  prefix = 'transcript';\n  if (mimeType === 'pdf')         prefix = 'pdf';\n  \n  // 3. For types that split/chunk, generate an incrementing counter suffix\n  if (prefix === 'vidframe' || prefix === 'transcript') {\n    const groupKey = `${prefix}_${baseSlug}`;\n    \n    // Initialize or increment sequence counter\n    typeCounters[groupKey] = (typeCounters[groupKey] || 0) + 1;\n    \n    // Pad sequence number to 2 digits (e.g., 1 -> \"01\", 12 -> \"12\")\n    const sequenceString = String(typeCounters[groupKey]).padStart(2, '0');\n    \n    // Inject the final human-readable name sequence\n    item.json.name = `${groupKey}-${sequenceString}`;\n  } else {\n    // Single-item types (images, pdfs) don't need a counter suffix\n    item.json.name = `${prefix}_${baseSlug}`;\n  }\n}\n\nreturn $input.all();"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        1200,
        288
      ],
      "id": "30163af6-9ace-43d1-8ef8-6d4bea2791e2",
      "name": "Build Name"
    },
    {
      "parameters": {
        "content": "# 📥 Vector App 2.0 Ingest Flow Summary\n\n## ⚡ The Architectural Vision\n\n### Point to a clean Cloudflare R2 (or other public CDN) bucket. Avoid Gdrive, Iphotos types. This flow processes incoming media through blistering-fast parallel HTTP requests and local FFmpeg scripts, then bundles it all into a single structured payload ready for the vector database.\n\n## 🛠️ The Core Engine\n\n### **Trigger & Payload Flattening**\n\nThe flow wakes up whenever a webhook delivers a bucket name and its URL (often fired by an upstream chat AI or file monitor). Instead of stepping through records one by one, the `Split Out` node instantly shatters the list into single parallel streams.\n\n### **The Speed Hack: Direct HTTP vs Native Nodes**\n\nNative Gemini nodes in n8n are great until you try to process a batch and watch them run in a slow, polite, sequential line. To destroy that bottleneck, this workflow drops raw `HTTP Request` nodes to fire parallel webhook hits simultaneously into Gemini. It turns a potential multi-minute waiting room into a high-speed blast of parallel extractions.\n\n### **Groq & Local FFmpeg Magic**\n\nFor video inputs, a heavy-lifting local `Groq Parse` script steps in. It handles a sentence-aware transcript chunker set to a ~900 character target with a 1-sentence overlap. To supercharge keyword creation, it fires *all* tag extraction API queries simultaneously via `Promise.all()` instead of waiting for each chunk to finish.\n\n## ⚙️ Initial Setup & Secrets\n\n### **The 3 Mandatory Folders**\n\nThe workflow executes local file operations on the host machine. Inside the `Set Input` node, you **must** manually create and configure three static absolute directory path variables exactly matching your environment:\n\n* `temp_root`: The base staging area (e.g., `C:\\Users\\...`)\n* `frames_dir`: Where extracted image frames live.\n* `video_path`: Where source video downloads and audio `.mp3` rips are processed.\n\n### **The Credential Stack**\n\nBefore hitting play, ensure your environment has the following standard authentications wired up:\n\n* **GROQ_API_KEY**: Injected at n8n startup into your environment flags (`$env.GROQ_API_KEY`) to handle the Whisper transcription and text chunk tagging.\n* **Gemini API Provider Key**: Named correctly to anchor the vector embedding nodes.\n* **Vector Supabase Database Credentials**: Set up to write straight into the target `vec10` table.",
        "height": 560,
        "width": 384,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2176,
        -128
      ],
      "typeVersion": 1,
      "id": "7c5d7fa2-fb82-4587-9b09-b8ea9f23be00",
      "name": "Sticky Note2"
    },
    {
      "parameters": {},
      "id": "95d29185-a495-4516-92d9-ec883541321d",
      "name": "Embeddings Gemini2",
      "type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
      "typeVersion": 1,
      "position": [
        1472,
        -176
      ],
      "credentials": {
        "googlePalmApi": {
          "id": "2qUzA3DAEFRc7DI1",
          "name": "Gemini FRAINK key"
        }
      }
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "r1",
              "name": "public_url",
              "value": "={{ $json.document.metadata.metadata.public_url }}",
              "type": "string"
            },
            {
              "id": "70759740-6741-407d-9eb2-b80c7557740a",
              "name": "mime_type",
              "value": "={{ $json.document.metadata.metadata.mime_type }}",
              "type": "string"
            },
            {
              "id": "ad30ab1a-1bd4-4db6-b034-fa65b3a8aa31",
              "name": "summary",
              "value": "={{ $json.document.pageContent.match(/summary:\\s*(.+?)(?=\\n\\n|\\n[a-z_]+:)/s)?.[1]?.trim() }}",
              "type": "string"
            },
            {
              "id": "88cc6719-633f-4ab8-a5d1-6ea4f071b34f",
              "name": "score",
              "value": "={{ $json.score }}",
              "type": "number"
            }
          ]
        },
        "options": {}
      },
      "id": "178b81a9-f599-4b07-88cd-dbd8e76cd424",
      "name": "Ranked Results",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        1648,
        -64
      ]
    },
    {
      "parameters": {
        "content": "# Vector DB Query\n\n\n\n\n\n\n\n\n\n\n",
        "height": 512,
        "width": 934,
        "color": "#1D3F1C"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1120,
        -384
      ],
      "typeVersion": 1,
      "id": "53e856b8-c312-4be7-83d3-053ea493ebe9",
      "name": "Sticky Note3"
    },
    {
      "parameters": {
        "content": "## 📌 Notes: Vector DB; Tldr\n\n## 🗄️ Old school: what it is\n### A personal filing cabinet for your media — but instead of \nfolders, you describe what you want and it finds it. Like \nGoogle Photos search, but for your own private cloud, with \nno Big Tech seeing your files.\n\n## 🤖 New school: what it actually is\n### Multimodal personal RAG pipeline — ingests UGC (photos, \nvideos, PDFs) from Cloudflare R2, runs Gemini Vision + AI \nanalysis per asset, stores semantic embeddings + scored \nmetadata in Supabase pgvector, and exposes a voice AI agent \n(STT → LLM → TTS) with Postgres chat memory, web search, \nand a live media renderer in the UI.\n\n🔑\nmultimodal RAG  ·  vector embeddings  ·  semantic search\nUGC ingest  ·  pgvector / Supabase  ·  n8n orchestration\nGemini Vision  ·  Groq Whisper STT  ·  Cloudflare R2 CDN\n\n### 👤 Built for\nSolo professional prepping social posts  ·  personal life \narchive  ·  travel album retrieval  ·  receipt + doc lookup \nby voice  ·  private alternative to Google Photos / Notion AI\n\n⚙️ Setup\n1. Cloudflare R2 bucket with public CDN domain\n2. Supabase project — enable pgvector extension, \n   create vec10 table (id, content, metadata, embedding)\n3. n8n instance (self-hosted or cloud)\n4. API keys: Gemini, Groq, Supabase service role\n5. Set album name in Vector Ingest1 webhook payload\n   — everything else is automatic from there",
        "height": 704,
        "width": 464,
        "color": "#F9FBA7"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1792,
        -128
      ],
      "typeVersion": 1,
      "id": "3a6116aa-e8be-4391-af2b-cb3576aa0b97",
      "name": "Sticky Note6"
    },
    {
      "parameters": {
        "content": "## Test Gdrive",
        "height": 192,
        "width": 480,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2160,
        576
      ],
      "typeVersion": 1,
      "id": "7a77fc71-0259-4a89-863b-8588d5a3f4eb",
      "name": "Sticky Note5"
    },
    {
      "parameters": {
        "mode": "load",
        "tableName": {
          "__rl": true,
          "value": "vec10",
          "mode": "list",
          "cachedResultName": "vec10"
        },
        "prompt": "={{ $json.body.query }}",
        "options": {}
      },
      "id": "98b0fd43-5ac6-4f78-9a39-b3d896b25dfe",
      "name": "Vector Query",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreSupabase",
      "typeVersion": 1,
      "position": [
        1472,
        -288
      ],
      "credentials": {
        "supabaseApi": {
          "id": "fDZJQQDFT5tFE3WD",
          "name": "Vector Supabase"
        }
      }
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "vector-query",
        "responseMode": "responseNode",
        "options": {}
      },
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2.1,
      "position": [
        1248,
        -288
      ],
      "id": "76d6292f-792a-4d13-aa52-759ef683f341",
      "name": "Vector Query1",
      "webhookId": "a2f0df0f-f0f3-4a60-ba93-dbfa3ec927dc"
    },
    {
      "parameters": {
        "respondWith": "allIncomingItems",
        "options": {}
      },
      "type": "n8n-nodes-base.respondToWebhook",
      "typeVersion": 1.5,
      "position": [
        1840,
        -64
      ],
      "id": "a8a1d4b9-27fc-4fa3-930b-6e08a321ccf7",
      "name": "Respond to Webhook"
    },
    {
      "parameters": {
        "content": "# Vector DB Ingest",
        "height": 560,
        "width": 928,
        "color": 4
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1120,
        128
      ],
      "typeVersion": 1,
      "id": "dd945249-ee1d-4081-bb41-55bfe022278a",
      "name": "Sticky Note4"
    },
    {
      "parameters": {
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "mimeType"
            },
            {
              "fieldToAggregate": "public_url"
            },
            {
              "fieldToAggregate": "file_date"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.aggregate",
      "typeVersion": 1,
      "position": [
        -992,
        16
      ],
      "id": "8c5380f3-31f7-45ba-a2cf-5a649df219aa",
      "name": "Aggregate"
    },
    {
      "parameters": {
        "fieldToSplitOut": "body.mimeType, body.public_url, body.file_date",
        "options": {}
      },
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        -336,
        16
      ],
      "id": "b64bc2ea-a04a-4d94-b5a1-faf0a6e581f4",
      "name": "Split Out1",
      "executeOnce": false
    },
    {
      "parameters": {
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "candidates[0].content.parts[0].text",
              "renameField": true,
              "outputFieldName": "output"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.aggregate",
      "typeVersion": 1,
      "position": [
        80,
        16
      ],
      "id": "7f1e9930-fbf4-4508-b36b-4bcc346935d0",
      "name": "Aggregate1"
    },
    {
      "parameters": {
        "fieldToSplitOut": "body.output",
        "options": {}
      },
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        720,
        16
      ],
      "id": "0c74c0ce-ebb2-400c-9e6f-d77f1768f1ee",
      "name": "Split Out2"
    },
    {
      "parameters": {
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "mimeType"
            },
            {
              "fieldToAggregate": "public_url"
            },
            {
              "fieldToAggregate": "file_date"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.aggregate",
      "typeVersion": 1,
      "position": [
        -992,
        288
      ],
      "id": "dce99e78-9097-4825-91de-2b63368e5f54",
      "name": "Aggregate2"
    },
    {
      "parameters": {
        "fieldToSplitOut": "body.mimeType, body.public_url, body.file_date",
        "options": {}
      },
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        -336,
        288
      ],
      "id": "4447d368-1844-4779-941c-252b10602dd1",
      "name": "Split Out3",
      "executeOnce": false
    },
    {
      "parameters": {
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "candidates[0].content.parts[0].text",
              "renameField": true,
              "outputFieldName": "output"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.aggregate",
      "typeVersion": 1,
      "position": [
        80,
        288
      ],
      "id": "5da43361-fc78-4ebb-a134-ae93d656555c",
      "name": "Aggregate3"
    },
    {
      "parameters": {
        "fieldToSplitOut": "body.output",
        "options": {}
      },
      "type": "n8n-nodes-base.splitOut",
      "typeVersion": 1,
      "position": [
        720,
        288
      ],
      "id": "e087c21b-34fb-440d-91a6-9d5dd06279f7",
      "name": "Split Out4"
    },
    {
      "parameters": {
        "resource": "document",
        "modelId": {
          "__rl": true,
          "value": "models/gemini-2.5-flash-lite",
          "mode": "list",
          "cachedResultName": "models/gemini-2.5-flash-lite"
        },
        "text": "=TASK:\nAnalyze the document in the Document URL(s) visually for semantic retrieval, embeddings, and social/cinematic understanding.\n\n## Input Data for Pass-Through:\npublic_url: {{ $json[\"body.public_url\"] }}\nalbum: {{ $('PDF').item.json.body.album }}\nfile_date: {{ $json[\"body.file_date\"] }}\n\n# EVALUATE\n1. Category - you must select exactly ONE value for the \"category\" field based on these definitions:\n- \"personal_document\": receipt, list, plan, travel, private.\n- \"social_content\": Vlogs, online datasheet, podcast reference, script.\n- \"professional_presentation\": tutorials, online walkthoughs, portfolio renders, pitch slides, corporate presentation.\n- \"professional_documents\": document, other company content.\n\n2. Summary - a single, short, dense sentence capturing the core action or subject. Perfect for keyword search.\n\n3. Detail Description - Rich semantic description optimized for vector search. Include ALL of: document type, purpose, key entities (people/companies/places), dates, amounts, actions, outcomes, every major section or topic covered. If it is a contract cover parties, terms, payment, dates. If technical cover every system or spec mentioned.\n\n4. Tags - a flat string array of 5 to 8 low-level keywords (lowercase, alphanumeric, hyphenated if multi-word) capturing intent, objects, styles, materials, people, items.\n\n5. Scores - evaluate 'score_social': Rate viral/engagement potential. Is it catchy, scroll-stopping, or highly shareable? Integer 1-10.\n- evaluate 'score_quality': Rate technical execution. Is it stable, clear, well-framed, and is the subject context obvious? Integer 1-10.\n\n# REQUIRED SCHEMA\nReturn a clean, raw JSON object matching this structure identically:\n{\n  \"public_url\": {{ $('PDF').item.json.body.public_url }},\n  \"mime_type\": \"pdf\",\n  \"album\": {{ $('PDF').item.json.body.album }},\n  \"file_date\": {{ $json[\"body.file_date\"] }},\n  \"category\": \"Choose one from rules\",\n  \"summary\": \"Short searchable sentence\",\n  \"detail_description\": \"Dense physical description of layout, materials, colors, and items\",\n  \"tags\": [],\n  \"score_social\": 1,\n  \"score_quality\": 1\n}",
        "documentUrls": "={{ $json[\"body.public_url\"] }}",
        "simplify": false,
        "options": {}
      },
      "id": "9c2aec33-a5d7-4ba5-a3ad-3a1814f1ce4c",
      "name": "Gemini PDF",
      "type": "@n8n/n8n-nodes-langchain.googleGemini",
      "typeVersion": 1.1,
      "position": [
        -128,
        288
      ],
      "retryOnFail": true,
      "credentials": {
        "googlePalmApi": {
          "id": "2qUzA3DAEFRc7DI1",
          "name": "Gemini FRAINK key"
        }
      }
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "p4",
              "name": "album",
              "value": "={{ JSON.parse($json['body.output'].replace(/```json|```/g,'').trim()).album }}",
              "type": "string"
            },
            {
              "id": "73c9a5c1-dab3-4a1a-b4fd-2deebf6bb170",
              "name": "public_url",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).public_url }}",
              "type": "string"
            },
            {
              "id": "7daab872-1a88-439e-a035-b32e020a4bbb",
              "name": "file_date",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).file_date }}",
              "type": "string"
            },
            {
              "id": "fe31547c-d07b-4596-b70a-4bae6b875116",
              "name": "mime_type",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).mime_type }}",
              "type": "string"
            },
            {
              "id": "23c86fea-bade-4f87-83af-91fee6909f17",
              "name": "category",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).category }}",
              "type": "string"
            },
            {
              "id": "379776fc-9d1d-462b-8096-207995ef5bef",
              "name": "summary",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).summary }}",
              "type": "string"
            },
            {
              "id": "3958886b-243d-4aa3-92d4-56330a8d1de6",
              "name": "detail_description",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).detail_description }}",
              "type": "string"
            },
            {
              "id": "15bb6849-3df7-4111-93b5-e19c96d5055c",
              "name": "tags",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).tags }}",
              "type": "array"
            },
            {
              "id": "e1dd3b41-a164-428d-9fb6-b1d60b7efe57",
              "name": "score_social",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).score_social }}",
              "type": "string"
            },
            {
              "id": "62d0c89c-3a7d-4378-923e-fd1e48b3dfbd",
              "name": "score_quality",
              "value": "={{ JSON.parse($json[\"body.output\"].replace(/```json|```/g,'').trim()).score_quality }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "9573dd04-00c9-46c4-9c7a-c4ef67d34732",
      "name": "Parse3",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        896,
        288
      ]
    },
    {
      "parameters": {
        "content": "## PDF ANALYZE - Separate Executions",
        "height": 320,
        "width": 1108,
        "color": "#3B3B3B"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -624,
        192
      ],
      "typeVersion": 1,
      "id": "58af132e-a9c9-4e31-afab-96fa037caa84",
      "name": "Sticky Note9"
    },
    {
      "parameters": {
        "content": "## VIDEO FRAMES ANALYZE",
        "height": 304,
        "width": 1108,
        "color": "#3B3B3B"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -624,
        832
      ],
      "typeVersion": 1,
      "id": "c847697c-e80a-49f3-9555-f5b094b0eb37",
      "name": "Sticky Note10"
    },
    {
      "parameters": {
        "operation": "list",
        "bucketName": "={{ $json.bucket }}",
        "listOptions": {
          "prefix": "={{ $json.prefix }}"
        }
      },
      "type": "n8n-nodes-cloudflare-r2-storage.cloudflareR2Storage",
      "typeVersion": 1,
      "position": [
        -560,
        -304
      ],
      "id": "9a3cd1fe-0b64-4521-9489-6fd0b9e0f993",
      "name": "List R2 Folder",
      "credentials": {
        "cloudflareR2StorageApi": {
          "id": "IYgY6Cdy8RwrUhy4",
          "name": "Cloudflare R2 vector"
        }
      }
    },
    {
      "parameters": {
        "rules": {
          "values": [
            {
              "conditions": {
                "options": {
                  "caseSensitive": true,
                  "leftValue": "",
                  "typeValidation": "strict",
                  "version": 2
                },
                "conditions": [
                  {
                    "leftValue": "={{ $json.mimeType }}",
                    "rightValue": "image",
                    "operator": {
                      "type": "string",
                      "operation": "equals"
                    },
                    "id": "f8e63397-79eb-4bd0-b3cd-e1fe84954832"
                  }
                ],
                "combinator": "and"
              },
              "renameOutput": true,
              "outputKey": "image"
            },
            {
              "conditions": {
                "options": {
                  "caseSensitive": true,
                  "leftValue": "",
                  "typeValidation": "strict",
                  "version": 2
                },
                "conditions": [
                  {
                    "id": "a3cb0f5f-96db-4276-8fae-8e235ba215e5",
                    "leftValue": "={{ $json.mimeType }}",
                    "rightValue": "pdf",
                    "operator": {
                      "type": "string",
                      "operation": "equals",
                      "name": "filter.operator.equals"
                    }
                  }
                ],
                "combinator": "and"
              },
              "renameOutput": true,
              "outputKey": "pdf"
            },
            {
              "conditions": {
                "options": {
                  "caseSensitive": true,
                  "leftValue": "",
                  "typeValidation": "strict",
                  "version": 2
                },
                "conditions": [
                  {
                    "leftValue": "={{ $json.mimeType }}",
                    "rightValue": "video",
                    "operator": {
                      "type": "string",
                      "operation": "equals"
                    },
                    "id": "2db50870-96ad-40cd-8ebe-55df51b7a99f"
                  }
                ],
                "combinator": "and"
              },
              "renameOutput": true,
              "outputKey": "video"
            }
          ]
        },
        "options": {}
      },
      "id": "15dc4b08-0e9f-4557-b381-6a132caba4ba",
      "name": "Route By Type",
      "type": "n8n-nodes-base.switch",
      "typeVersion": 3.2,
      "position": [
        -1248,
        272
      ]
    },
    {
      "parameters": {
        "content": "## VIDEO TRANSCRIPTION ANALYZE",
        "height": 320,
        "width": 1108,
        "color": "#303030"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -624,
        512
      ],
      "typeVersion": 1,
      "id": "f67a6147-1ba6-40b6-b5ac-1c981e42b0b7",
      "name": "Sticky Note11"
    },
    {
      "parameters": {
        "content": "### 📦 BUILD RECORD — Content & Metadata Design\n\n### content (text, embedded by vector DB):\n  Combines public_url, album, file_date, mime_type,\n  summary, detail_description, and tags into a single\n  human-readable string. This is what gets embedded —\n  rich prose descriptions drive semantic retrieval quality.\n  file_date is formatted human-readable here so date-based\n  queries (\"files from May\") match naturally.\n\n### metadata (jsonb, for filtering only):\n  Stores name, public_url, album, mime_type, file_date\n  (ISO 8601 for SQL range queries), score_social,\n  score_quality (integers), chunk_index, chunk_total.\n  Never embedded — used for post-retrieval filtering only.\n  scores stored as integers for WHERE comparisons.",
        "height": 128,
        "width": 400,
        "color": 4
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1440,
        144
      ],
      "typeVersion": 1,
      "id": "23998327-af6b-48e8-9110-89b0ae616bc6",
      "name": "Sticky Note12"
    },
    {
      "parameters": {
        "content": "### ⚡ PARALLEL FLOWS — Latency Strategy\n\n### Images, PDFs, and Videos are routed and executed\nin separate webhook-triggered flows running simultaneously.\nThis avoids sequential bottlenecks — a 2min video\ntranscode does not block 5 image embeds.\n\n### Each execution carries exactly one file's data,\nso upstream node references like .first() are safe\nand always point to the correct item.",
        "height": 96,
        "width": 436,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        672,
        416
      ],
      "typeVersion": 1,
      "id": "5739b73d-1809-4490-b274-c2033ed3f543",
      "name": "Sticky Note13"
    },
    {
      "parameters": {
        "content": "### 🔁 AGGREGATE → WEBHOOK → SPLIT OUT\n\n### Each branch aggregates items into a single array payload\nbefore sending to the sub-execution webhook. This means:\n  - One clean HTTP call per file batch\n  - Easier debugging — inspect one payload, not N items\n  - Sub-flow receives predictable structure every time\n\n### Split Out on the other side restores individual items\nfor per-item processing (Gemini, Parse, Build Record).",
        "height": 96,
        "width": 436,
        "color": "#404040"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        48,
        192
      ],
      "typeVersion": 1,
      "id": "cfffe92d-66ff-4a63-a12d-5afe55d93ecb",
      "name": "Sticky Note14"
    },
    {
      "parameters": {
        "content": "### 🎙️ VIDEO TRANSCRIPT — Custom Code Pipeline\n\n### FFmpeg → FFmpeg Audio → HTTP Groq STT → Groq Parse\n\nFFmpeg extracts frames (scene-detect or interval).\nFFmpeg Audio strips audio to low-bitrate mono mp3\n  (8kHz, q:a 9) — smallest file Whisper accepts cleanly.\n### HTTP Groq STT calls Whisper-large-v3-turbo via Groq API.\nGroq Parse chunks transcript sentence-aware at ~700 chars\n  with 1 sentence overlap for context continuity.\n\n### Each chunk calls Llama-3.1-8b-instant via Groq API\nto generate 3-5 meaningful semantic tags — replacing\nthe previous frequency-based tagger which produced\nstopword noise like \"all, right, can\".\n\n### mime_type: video_transcript distinguishes these from\nvideo_frame records from the same source file.\nfile_date passes through all nodes from Set Media Type.",
        "height": 112,
        "width": 420,
        "color": "#303030"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        64,
        720
      ],
      "typeVersion": 1,
      "id": "9af86847-647f-4197-8e2b-85868c5561d0",
      "name": "Sticky Note15"
    },
    {
      "parameters": {
        "content": "### 🎞️ FRAME EXTRACTION — Smart Scene Detection\n\n### FFmpeg splits videos into JPEG frames using either:\n  - Scene change detection (short videos < 3min)\n  - Fixed interval fallback (longer videos)\n\n### Frames stored as individual items with binary data,\neach embedded separately by Gemini Vision. This means:\n  - Each meaningful visual moment gets its own vector\n  - Retrieval finds the exact frame, not just the video\n  - Storage is simple flat files, no complex processing",
        "height": 112,
        "width": 436,
        "color": "#3B3B3B"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        48,
        1008
      ],
      "typeVersion": 1,
      "id": "d2015621-300c-42cd-8e91-293a8da22870",
      "name": "Sticky Note16"
    },
    {
      "parameters": {
        "content": "### ☁️ CLOUDFLARE R2 — CDN-First Storage Strategy\n\n### All media lives in R2 with clean public CDN URLs:\n  https://vector.airpg.ai/album/filename.ext\n\nWhy this matters:\n  - URLs are permanent, simple, and directly embeddable\n  - No OAuth, no expiring tokens, no Drive API complexity\n  - Gemini and other models fetch directly via URL\n  - last_modified from R2 API gives reliable file_date\n  - Scales to any client without GDrive folder sharing,\n    permissions, or convoluted URL structures\n\nRecommended to all clients over GDrive/Dropbox for\nany pipeline where media URLs need to be stable and\npublicly accessible for AI model consumption.",
        "height": 80,
        "width": 432,
        "color": "#171717"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -560,
        -464
      ],
      "typeVersion": 1,
      "id": "646158dd-da06-47a2-8eea-1c4492c70b9f",
      "name": "Sticky Note17"
    },
    {
      "parameters": {
        "httpMethod": "POST",
        "path": "vector-ingest",
        "options": {
          "rawBody": false
        }
      },
      "id": "d99fde5b-1a24-40c7-bcc7-e2cbd4a40874",
      "name": "Vector Ingest2",
      "type": "n8n-nodes-base.webhook",
      "typeVersion": 2.1,
      "position": [
        -992,
        -304
      ],
      "webhookId": "e7f0b960-a7e9-4c24-8fed-c431dcf7fba4"
    },
    {
      "parameters": {
        "content": "## INTAKE -  Webhook from Web Voice AI - public_url",
        "height": 320,
        "width": 1556,
        "color": "#3B3B3B"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1072,
        -384
      ],
      "typeVersion": 1,
      "id": "e7506f0a-e3d4-4ed7-88c5-a95476afe3a4",
      "name": "Sticky Note18"
    },
    {
      "parameters": {
        "content": "## Webhook System",
        "height": 576,
        "width": 640,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        480,
        -64
      ],
      "typeVersion": 1,
      "id": "914c6489-ce22-47a9-90c6-465da05d5c01",
      "name": "Sticky Note8"
    },
    {
      "parameters": {
        "content": "## Webhook System",
        "height": 560,
        "width": 452,
        "color": 7
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1072,
        -64
      ],
      "typeVersion": 1,
      "id": "a2fc8192-e2b0-42b6-8cd5-493e3e02e4a4",
      "name": "Sticky Note19"
    },
    {
      "parameters": {
        "content": "## **Vector DB** Upload:\n## image | pdf | video via single CDN folder",
        "height": 144,
        "width": 384,
        "color": "#F9FBA7"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2176,
        -272
      ],
      "typeVersion": 1,
      "id": "713d1ad8-b981-4f7a-8348-7f5b0f509ed6",
      "name": "Sticky Note20"
    },
    {
      "parameters": {
        "content": "## Stores:\n### \"name\": unique name for each file, e.g. \"vidframe-alphie-01\"\n### \"album\": public CDN folder\n### \"file_date\": ISO date\n### \"mime_type\": \"video_frame\" | \"video_transcript\" | \"image\" | \"pdf\"\"pdf\n### \"public_url\": pubic CDN url\n### \"chunk_index\": \"1 of 5\"\n### \"chunk_total\": \"5\"\n### \"score_social\": \"7\"\n### \"score_quality\": \"3\"\n### \"summary\": concise summary of object\n### \"detail description\": full description, or transcription.\n### \"tags\": []\n### ** embeddings **",
        "height": 240,
        "width": 272,
        "color": 4
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2048,
        448
      ],
      "typeVersion": 1,
      "id": "25af22b8-12ea-47ef-b521-754a58e8b30b",
      "name": "Sticky Note21"
    },
    {
      "parameters": {
        "jsCode": "// ═══════════════════════════════════════════════════════════════════\n// Node: High-Speed Visual Frame Cutter + Gemini Payload Builder (Async Parallel)\n// Version: 4.1.3 (Original Base + Regex Character space, #, ?, &, - Sanitization Patch)\n// Upgrade: Thread-safe dynamic isolation + Smart bounding-box aspect scale.\n// ═══════════════════════════════════════════════════════════════════\n\n// ── DEV DEBUG TUNING CONSTANTS ──────────────────────────────────────\nconst LONG_VIDEO_THRESHOLD_MIN = 3;      // Videos >= this use True Scene Detection\nconst MAX_FRAMES_PER_VIDEO     = 15;     // Absolute ceiling for total frames captured per video\nconst SCENE_DETECT_THRESHOLD   = 0.4;    // Sensitivity: 0.4 = 40% visual shift required to trigger a frame capture\n\n// Logarithmic Math Settings for short videos (< 3 mins), lower is less frames\nconst LOG_MATH_MULTIPLIER    = 2.0;      // 1.5-3.0 Controls scale curve steepness (Lowered per design testing)\nconst LOG_MATH_OFFSET        = 1.0;      // Shifts baseline up/down\nconst MIN_FRAMES_SHORT       = 2;        // Absolute fallback floor so short videos never result in 0 or 1 frames\n\nconst JPEG_QUALITY            = 6;       // ffmpeg -q:v (1=best quality, 31=worst)\nconst DOWNLOAD_TIMEOUT_S      = 120;     // curl max-time in seconds\nconst FFMPEG_TIMEOUT_S        = 240;     // ffmpeg execution timeout in seconds\n\n// ── HELPERS ──────────────────────────────────────────────────────────\nconst fs              = require('fs');\nconst path            = require('path');\nconst { exec }        = require('child_process');\nconst util            = require('util');\nconst execAsync       = util.promisify(exec); // Non-blocking wrapper for terminal execution\n\nconst toFF            = p => p.replace(/\\\\/g, '/');\nconst toWin           = p => p.replace(/\\//g, '\\\\');\n\n// ── DIRS ─────────────────────────────────────────────────────────────\nconst items     = $input.all();\nconst framesDir = toWin($('Set Input').first().json.frames_dir);\nconst videosDir = toWin($('Set Input').first().json.video_path);\n\n// ── INITIAL CLEANUP (Fires once immediately before loop starts) ──────\nfor (const dir of [framesDir, videosDir]) {\n    if (!fs.existsSync(dir)) {\n        fs.mkdirSync(dir, { recursive: true });\n    } else {\n        for (const f of fs.readdirSync(dir)) {\n            try { fs.unlinkSync(path.join(dir, f)); } catch {}\n        }\n    }\n}\n\n// ── MAIN PARALLEL LOOP ───────────────────────────────────────────────\nconst processingPromises = items.map(async (item) => {\n    const videoUrl   = item.json.public_url;\n    const fileDate   = item.json.file_date;\n    \n    // PATCH: Expand your original space replacement to natively encode hashes, question marks, and ampersands\n    const encodedUrl = videoUrl\n        .replace(/ /g, '%20')\n        .replace(/#/g, '%23')\n        .replace(/\\?/g, '%3F')\n        .replace(/&/g, '%26');\n\n    // Derive safe file name from incoming URL\n    const rawName   = path.basename(decodeURIComponent(videoUrl));\n    const ext       = path.extname(rawName);\n    const baseName  = path.basename(rawName, ext).replace(/\\s+/g, '_');\n    const videoFile = toFF(path.join(videosDir, baseName + ext));\n\n    // Isolated tracking array so parallel tasks don't corrupt each other's data\n    const createdFramePaths = [];\n\n    // ── PARALLEL DOWNLOAD ───────────────────────────────────────────\n    try {\n        await execAsync(`curl -L --max-time ${DOWNLOAD_TIMEOUT_S} --retry 1 -o \"${videoFile}\" \"${encodedUrl}\"`);\n    } catch(e) {\n        return []; // skip this video if download fails without crashing entire stream\n    }\n\n    const dlSize = fs.existsSync(toWin(videoFile)) ? fs.statSync(toWin(videoFile)).size : 0;\n    if (dlSize === 0) return [];\n\n    // ── PROBE DURATION ─────────────────────────────────────────────\n    let durationSecs = 0;\n    try {\n        const { stdout } = await execAsync(`ffprobe -v error -show_entries format=duration -of csv=p=0 \"${videoFile}\"`);\n        durationSecs = parseFloat(stdout.trim()) || 0;\n    } catch(e) {}\n\n    const durationMins = durationSecs / 60;\n    const isLongVideo  = durationMins >= LONG_VIDEO_THRESHOLD_MIN;\n    const outputPattern = toFF(path.join(framesDir, `${baseName}-%02d.jpg`));\n\n    // ── DYNAMIC FRAME EXTRACTION STRATEGY ───────────────────────────\n    if (!isLongVideo) {\n        // Method A: Logarithmic scaling math for short videos (< 3 mins)\n        let targetFrames = Math.round((Math.log(durationSecs) * LOG_MATH_MULTIPLIER) + LOG_MATH_OFFSET);\n        \n        targetFrames = Math.max(MIN_FRAMES_SHORT, targetFrames);\n        targetFrames = Math.min(MAX_FRAMES_PER_VIDEO, targetFrames);\n\n        const calculatedInterval = (durationSecs / targetFrames).toFixed(2);\n\n        try {\n            // Uses smart bounding-box scaling to lock the longest side to 960px (Supports portrait/shorts natively)\n            await execAsync(`ffmpeg -y -i \"${videoFile}\" -vf \"fps=1/${calculatedInterval},scale='if(gt(iw,ih),960,-2)':'if(gt(iw,ih),-2,960)'\" -q:v ${JPEG_QUALITY} \"${outputPattern}\"`);\n            \n            // Explicitly verify and collect files that match this video's signature\n            for (let i = 1; i <= (targetFrames + 2); i++) {\n                const frameNum = String(i).padStart(2, '0');\n                const expectedFile = path.join(framesDir, `${baseName}-${frameNum}.jpg`);\n                if (fs.existsSync(expectedFile)) createdFramePaths.push(expectedFile);\n            }\n        } catch(e) {}\n\n    } else {\n        // Method B: True Scene Change Detection for long videos (3 to 10+ mins)\n        try {\n            await execAsync(`ffmpeg -y -i \"${videoFile}\" -vf \"select='gt(scene,${SCENE_DETECT_THRESHOLD})',scale='if(gt(iw,ih),960,-2)':'if(gt(iw,ih),-2,960)'\" -vsync vfr -q:v ${JPEG_QUALITY} \"${outputPattern}\"`);\n            \n            // FIXED: Isolated direct lookup array build instead of live global directory parsing\n            const filesInDir = fs.readdirSync(framesDir);\n            const discoveredFrames = [];\n            \n            for (const f of filesInDir) {\n                if (f.startsWith(baseName) && f.endsWith('.jpg')) {\n                    discoveredFrames.push(path.join(framesDir, f));\n                }\n            }\n\n            discoveredFrames.sort(); // Natural alphabetical sorting verification\n            const cappedFrames = discoveredFrames.slice(0, MAX_FRAMES_PER_VIDEO);\n            createdFramePaths.push(...cappedFrames);\n        } catch(e) {}\n    }\n\n    // ── BUILD LOCAL BATCH ───────────────────────────────────────────\n    const localResults = [];\n    createdFramePaths.sort(); // Maintain absolute visual sequence order\n\n    for (const fullPath of createdFramePaths) {\n        if (!fs.existsSync(fullPath)) continue;\n        \n        const file = path.basename(fullPath);\n        const buffer = fs.readFileSync(fullPath);\n\n        localResults.push({\n            json: {\n                filename:          file,\n                filepath:          fullPath,\n                public_url:        videoUrl,\n                file_date:         fileDate,\n                source_video_path: videoFile,\n                mimeType:          \"video\",\n                duration_min:      durationMins.toFixed(1),\n                base64_data:       buffer.toString('base64')\n            }\n        });\n    }\n\n    return localResults;\n});\n\n// Await completion of all concurrent OS CPU/Network operations\nconst allVideoBatches = await Promise.all(processingPromises);\n\n// Flatten the multi-video array matrices down to a single item stream for n8n\nreturn allVideoBatches.flat();"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        -752,
        784
      ],
      "id": "db4e79d1-c09e-495b-8885-d02a48d4f266",
      "name": "FFmpeg"
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "0951eee4-0394-4efe-90ac-cb65234fd8ff",
              "name": "output",
              "value": "={{ $json.candidates[0].content.parts[0].text }}",
              "type": "string"
            },
            {
              "id": "762a3a15-45f5-4c22-8c33-f7302980cbce",
              "name": "",
              "value": "",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        16,
        880
      ],
      "id": "6f5a7a44-e465-49ff-b949-d7ba6863863f",
      "name": "Output"
    },
    {
      "parameters": {
        "assignments": {
          "assignments": [
            {
              "id": "23c86fea-bade-4f87-83af-91fee6909f17",
              "name": "category",
              "value": "={{ JSON.parse($json.output.slice(0, $json.output.indexOf('}') + 1)).category }}",
              "type": "string"
            },
            {
              "id": "379776fc-9d1d-462b-8096-207995ef5bef",
              "name": "summary",
              "value": "={{ JSON.parse($json.output.slice(0, $json.output.indexOf('}') + 1)).summary }}",
              "type": "string"
            },
            {
              "id": "3958886b-243d-4aa3-92d4-56330a8d1de6",
              "name": "detail_description",
              "value": "={{ JSON.parse($json.output.slice(0, $json.output.indexOf('}') + 1)).detail_description}}",
              "type": "string"
            },
            {
              "id": "15bb6849-3df7-4111-93b5-e19c96d5055c",
              "name": "tags",
              "value": "={{ JSON.parse($json.output.slice(0, $json.output.indexOf('}') + 1)).tags }}",
              "type": "array"
            },
            {
              "id": "e1dd3b41-a164-428d-9fb6-b1d60b7efe57",
              "name": "score_social",
              "value": "={{ JSON.parse($json.output.slice(0, $json.output.indexOf('}') + 1)).score_social }}",
              "type": "string"
            },
            {
              "id": "62d0c89c-3a7d-4378-923e-fd1e48b3dfbd",
              "name": "score_quality",
              "value": "={{ JSON.parse($json.output.slice(0, $json.output.indexOf('}') + 1)).score_quality }}",
              "type": "string"
            },
            {
              "id": "05a7c465-eb4c-4a61-8c2d-78eed47ed6f9",
              "name": "public_url",
              "value": "={{ $('FFmpeg').item.json.public_url }}",
              "type": "string"
            },
            {
              "id": "529b3075-3e51-4b87-aac3-fc07fae691aa",
              "name": "album",
              "value": "={{ $('Set Input').first().json.prefix }}",
              "type": "string"
            },
            {
              "id": "b2e2f8e6-0a5d-4f68-bda7-7d4df6cad888",
              "name": "file_date",
              "value": "={{ $('FFmpeg').item.json.file_date }}",
              "type": "string"
            },
            {
              "id": "8ce3af81-b4d1-4941-89d6-d0a89767b5c5",
              "name": "mime_type",
              "value": "video_frame",
              "type": "string"
            },
            {
              "id": "f2046201-6504-49f1-9fc9-ec8854aa1435",
              "name": "name",
              "value": "={{ $('FFmpeg').item.json.filename }}",
              "type": "string"
            }
          ]
        },
        "options": {}
      },
      "id": "86a87ee6-eef5-437f-9bdc-dc3d209c2d17",
      "name": "Parse Output",
      "type": "n8n-nodes-base.set",
      "typeVersion": 3.4,
      "position": [
        240,
        880
      ]
    },
    {
      "parameters": {
        "jsCode": "// Loop through every item coming from the previous FFmpeg node\nreturn $input.all().map(item => {\n  return {\n    json: {\n      ...item.json, // Automatically passes through filename, base64_data, public_url, file_date, etc.\n      prompt: `\n# TASK\nAnalyze the attached visual asset and output a structured metadata object mapping its physical properties and content domain.\n\n# EVALUATE\n1. Category - you must select exactly ONE value for the \"category\" field based on these definitions:\n- \"personal_life\": Family, friends, vacations, private moments.\n- \"curated_inspiration\": Saved clips, interviews, podcasts, news.\n- \"social_content\": Vlogs, talking to camera, trendy content explicitly shot for public growth.\n- \"professional_work\": Real-world site walks, materials, company meeting.\n- \"professional_presentation\": tutorials, online walkthoughs, portfolio renders, pitch slides, corporate materials.\n- \"professional_documents\": document, paperwork content and review.\n\n2. Summary - a single, short, dense sentence capturing the core action or subject. Perfect for keyword search.\n\n3. Detail Description - a comprehensive, objective visual semantic description optimized for vector search embeddings. 3 sentences.\n\n4. Tags - a flat string array of 5 to 8 low-level keywords (lowercase, alphanumeric, hyphenated if multi-word) capturing visible objects, styles, materials, people, items.\n\n5. Scores - evaluate 'score_social': Rate viral/engagement potential. Is it catchy, scroll-stopping, or highly shareable?\n- evaluate 'score_quality': Rate technical execution. Is it stable, clear, well-framed, and is the subject context obvious?\n\n# REQUIRED SCHEMA\nReturn a clean, raw JSON object matching this structure identically:\n{\n  \"mime_type\": \"video_frame\",\n  \"category\": \"Choose one from rules\",\n  \"summary\": \"Short searchable sentence\",\n  \"detail_description\": \"Dense physical description of layout, materials, colors, and items\",\n  \"tags\": [],\n  \"score_social\": 1,\n  \"score_quality\": 1\n} \n`\n    }\n  };\n});"
      },
      "type": "n8n-nodes-base.code",
      "typeVersion": 2,
      "position": [
        -464,
        880
      ],
      "id": "dd55c56e-7fad-4f81-9961-1469fb1e7635",
      "name": "Prompt Code"
    },
    {
      "parameters": {
        "method": "POST",
        "url": "=https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\n  \"contents\": [\n    {\n      \"role\": \"user\",\n      \"parts\": [\n        {\n          \"inline_data\": {\n            \"mime_type\": \"image/jpeg\",\n            \"data\": \"{{ $json.base64_data }}\"\n          }\n        },\n        {\n          \"text\": {{ JSON.stringify($json.prompt) }}\n        }\n      ]\n    }\n  ],\n  \"generationConfig\": {\n    \"responseMimeType\": \"application/json\",\n    \"temperature\": 0.2\n  }\n}",
        "options": {
          "response": {}
        }
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.4,
      "position": [
        -224,
        880
      ],
      "id": "77931c45-0bba-4d6d-a375-074d4f1755b0",
      "name": "GEMINI video_frame",
      "credentials": {
        "httpHeaderAuth": {
          "id": "ZnyE0VrTqcsrXHyx",
          "name": "Gemini"
        }
      }
    },
    {
      "parameters": {
        "method": "POST",
        "url": "=https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "sendBody": true,
        "specifyBody": "json",
        "jsonBody": "={\n  \"contents\": [\n    {\n      \"role\": \"user\",\n      \"parts\": [\n        {\n          \"file_data\": {\n            \"mime_type\": \"image/jpeg\",\n            \"file_uri\": {{ JSON.stringify($json[\"body.public_url\"]) }}\n          }\n        },\n        {\n          \"text\": {{ JSON.stringify(\"TASK:\\nAnalyze the image in the input URL visually for semantic retrieval, embeddings, and social/cinematic understanding.\\n\\n## Input Data for Pass-Through:\\n- public_url: \" + $json[\"body.public_url\"] + \"\\n- album: \" + $('Image').item.json.body.album + \"\\n- file_date: \" + $json[\"body.file_date\"] + \"\\n\\n# EVALUATE\\n1. Category - you must select exactly ONE value for the \\\"category\\\" field based on these definitions:\\n- \\\"personal_life\\\": Family, friends, vacations, private moments.\\n- \\\"curated_inspiration\\\": Saved clips, interviews, podcasts, news.\\n- \\\"social_content\\\": Vlogs, talking to camera, trendy content explicitly shot for public growth.\\n- \\\"professional_work\\\": Real-world site walks, materials, company meeting.\\n- \\\"professional_presentation\\\": tutorials, online walkthoughs, portfolio renders, pitch slides, corporate materials.\\n- \\\"professional_documents\\\": document, paperwork content and review.\\n\\n2. Summary - a single, short, dense sentence capturing the core action or subject. Perfect for keyword search.\\n\\n3. Detail Description - an objective visual semantic description optimized for vector search embeddings. 2 to 4 sentences.\\n\\n4. Tags - a flat string array of 5 to 8 low-level keywords (lowercase, alphanumeric, hyphenated if multi-word) capturing visible objects, styles, materials, people, items.\\n\\n5. Scores - evaluate 'score_social': Rate viral/engagement potential. Is it catchy, scroll-stopping, or highly shareable?\\n- evaluate 'score_quality': Rate technical execution. Is it stable, clear, well-framed, and is the subject context obvious?\\n\\n# REQUIRED SCHEMA\\nReturn a clean, raw JSON object matching this structure identically:\\n{\\n  \\\"public_url\\\": \\\"\" + $json[\"body.public_url\"] + \"\\\",\\n  \\\"mime_type\\\": \\\"image\\\",\\n  \\\"album\\\": \\\"\" + $('Image').item.json.body.album + \"\\\",\\n  \\\"file_date\\\": \\\"\" + $json[\"body.file_date\"] + \"\\\",\\n  \\\"category\\\": \\\"Choose one from rules\\\",\\n  \\\"summary\\\": \\\"Short searchable sentence\\\",\\n  \\\"detail_description\\\": \\\"Dense physical description of layout, materials, colors, and items\\\",\\n  \\\"tags\\\": [],\\n  \\\"score_social\\\": 1,\\n  \\\"score_quality\\\": 1\\n}\") }}\n        }\n      ]\n    }\n  ],\n  \"generationConfig\": {\n    \"responseMimeType\": \"application/json\",\n    \"temperature\": 0.2\n  }\n}",
        "options": {
          "response": {}
        }
      },
      "type": "n8n-nodes-base.httpRequest",
      "typeVersion": 4.4,
      "position": [
        -128,
        16
      ],
      "id": "33bb8ccb-5e6e-4d53-aef4-eb73364fccc2",
      "name": "GEMINI image",
      "credentials": {
        "httpHeaderAuth": {
          "id": "ZnyE0VrTqcsrXHyx",
          "name": "Gemini"
        }
      }
    },
    {
      "parameters": {
        "content": "### 🎞️ Parallel Video Frame Cutter (v4.0.0)\n**Purpose:** Processes an array of video URLs concurrently using native OS tools (`curl` + `ffmpeg`) instead of blocking the Node.js event loop. Outputs Base64 frame strings optimized for Gemini Vision analysis.\n\n---\n\n#### ⚙️ Developer Configurations (Top of Node)\n* **`LONG_VIDEO_THRESHOLD_MIN` (Default: `3`):** Videos shorter than this use mathematical scaling; longer videos trigger visual scene change detection.\n* **`MAX_FRAMES_PER_VIDEO` (Default: `15`):** Hard ceiling cap to prevent payload bloating on complex videos.\n* **`SCENE_DETECT_THRESHOLD` (Default: `0.4`):** Sensitivity metric. Lower = more frames captured during visual transitions.\n* **`LOG_MATH_MULTIPLIER` (Range: `2.5 - 4.0`):** Tunes the frame-capture curve for short clips. \n\n---\n\n#### 📐 Smart Aspect-Ratio Scaling\nTo prevent portrait videos (Shorts/Reels) from upscaling and bloating payload sizes, the filter checks the orientation dimensions dynamically:\n`scale='if(gt(iw,ih),1280,-2)':'if(gt(iw,ih),-2,1280)'`\n* **Landscape:** Locks max width to 1280px (auto-height).\n* **Portrait:** Locks max height to 1280px (auto-width).\n* *Tip: Lower `1280` to `960` or `854` to shrink payload storage sizes down significantly.*\n\n---\n\n#### ⚠️ Runtime Requirements\n1.  **Node Settings:** Ensure **Mode** is set to `Run Once For All Items` (do not run per-item, or parallelism is disabled).\n2.  **Environment:** Requires globally accessible `curl`, `ffmpeg`, and `ffprobe` binaries in the host system PATH.\n3.  **Directory Mapping:** Expects `frames_dir` and `video_path` string inputs from preceding node metadata. Wipes old files *once* immediately at startup.",
        "color": "#171717"
      },
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -864,
        976
      ],
      "typeVersion": 1,
      "id": "c76db73a-a87f-4f81-b87a-af1a98c373f3",
      "name": "Sticky Note22"
    }
  ],
  "pinData": {},
  "connections": {
    "Build Record": {
      "main": [
        [
          {
            "node": "Vector Ingest",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings Gemini1": {
      "ai_embedding": [
        [
          {
            "node": "Vector Ingest",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Default Data Loader": {
      "ai_document": [
        [
          {
            "node": "Vector Ingest",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Character Text Splitter": {
      "ai_textSplitter": [
        [
          {
            "node": "Default Data Loader",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "Image": {
      "main": [
        [
          {
            "node": "Split Out1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF": {
      "main": [
        [
          {
            "node": "Split Out3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Groq Parse": {
      "main": [
        [
          {
            "node": "Merge1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge1": {
      "main": [
        [
          {
            "node": "Build Name",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Skip Unknown Types": {
      "main": [
        [
          {
            "node": "Set Media Type",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out": {
      "main": [
        [
          {
            "node": "Skip Unknown Types",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Media Type": {
      "main": [
        [
          {
            "node": "Route By Type",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Input": {
      "main": [
        [
          {
            "node": "List R2 Folder",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "List Google Drive1": {
      "main": [
        [
          {
            "node": "Normalise GDrive Item",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "FFmpeg Audio": {
      "main": [
        [
          {
            "node": "HTTP Groq STT",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Groq STT": {
      "main": [
        [
          {
            "node": "Groq Parse",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Image F": {
      "main": [
        [
          {
            "node": "Split Out2",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF F": {
      "main": [
        [
          {
            "node": "Split Out4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse1": {
      "main": [
        [
          {
            "node": "Build Name",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Build Name": {
      "main": [
        [
          {
            "node": "Build Record",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings Gemini2": {
      "ai_embedding": [
        [
          {
            "node": "Vector Query",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Vector Ingest": {
      "main": [
        []
      ]
    },
    "Ranked Results": {
      "main": [
        [
          {
            "node": "Respond to Webhook",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Vector Query": {
      "main": [
        [
          {
            "node": "Ranked Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Vector Query1": {
      "main": [
        [
          {
            "node": "Vector Query",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Respond to Webhook": {
      "main": [
        []
      ]
    },
    "Aggregate": {
      "main": [
        [
          {
            "node": "To Image",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out1": {
      "main": [
        [
          {
            "node": "GEMINI image",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate1": {
      "main": [
        [
          {
            "node": "To Image F",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out2": {
      "main": [
        [
          {
            "node": "Parse1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate2": {
      "main": [
        [
          {
            "node": "To PDF",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out3": {
      "main": [
        [
          {
            "node": "Gemini PDF",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate3": {
      "main": [
        [
          {
            "node": "To PDF F",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out4": {
      "main": [
        [
          {
            "node": "Parse3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Gemini PDF": {
      "main": [
        [
          {
            "node": "Aggregate3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse3": {
      "main": [
        [
          {
            "node": "Build Name",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "List R2 Folder": {
      "main": [
        [
          {
            "node": "Split Out",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Route By Type": {
      "main": [
        [
          {
            "node": "Aggregate",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Aggregate2",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "FFmpeg",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Vector Ingest2": {
      "main": [
        [
          {
            "node": "Set Input",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "FFmpeg": {
      "main": [
        [
          {
            "node": "Prompt Code",
            "type": "main",
            "index": 0
          },
          {
            "node": "FFmpeg Audio",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Output": {
      "main": [
        [
          {
            "node": "Parse Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse Output": {
      "main": [
        [
          {
            "node": "Merge1",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Prompt Code": {
      "main": [
        [
          {
            "node": "GEMINI video_frame",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "GEMINI video_frame": {
      "main": [
        [
          {
            "node": "Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "GEMINI image": {
      "main": [
        [
          {
            "node": "Aggregate1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  },
  "active": true,
  "settings": {
    "executionOrder": "v1",
    "binaryMode": "separate",
    "timeSavedMode": "fixed",
    "callerPolicy": "workflowsFromSameOwner",
    "availableInMCP": true
  },
  "versionId": "dc260af1-7bf8-4aa0-9c23-cdb74ba5dda2",
  "meta": {
    "templateCredsSetupCompleted": true,
    "aiBuilderAssisted": true,
    "builderVariant": "mcp",
    "instanceId": "f65635fe42aeebebf44eaa521e38cac56bb6b5ccb60db8ca4a763a8fbba06ee8"
  },
  "nodeGroups": [],
  "id": "igxJpGVratDu4fLH",
  "tags": []
}