Spaces:

jbilcke-hf
/

ai-comic-factory

Running on CPU Upgrade

App Files Files Community

982

jbilcke-hf HF staff commited on Jul 10

Commit

95a4e14

•

1 Parent(s): b194c9d

working on an experimental speech bubble display

Browse files

Files changed (17) hide show

package-lock.json +6 -0
package.json +1 -0
src/app/interface/panel/bubble/index.tsx +1 -0
src/app/interface/panel/index.tsx +33 -8
src/app/interface/top-menu/index.tsx +13 -0
src/app/main.tsx +8 -1
src/app/queries/getStoryContinuation.ts +2 -0
src/app/queries/getSystemPrompt.ts +3 -3
src/app/queries/mockLLMResponse.ts +11 -3
src/app/queries/predictNextPanels.ts +5 -4
src/app/store/index.ts +47 -4
src/lib/bubble/injectSpeechBubbleInTheBackground.ts +419 -0
src/lib/createLlamaPrompt.ts +1 -1
src/lib/dirtyGeneratedPanelCleaner.ts +3 -0
src/lib/dirtyGeneratedPanelsParser.ts +5 -2
src/lib/parseBadJSON.ts +3 -2
src/types.ts +1 -0

package-lock.json CHANGED Viewed

@@ -12,6 +12,7 @@
         "@anthropic-ai/sdk": "^0.19.1",
         "@huggingface/hub": "^0.15.1",
         "@huggingface/inference": "^2.6.1",
         "@radix-ui/react-accordion": "^1.1.2",
         "@radix-ui/react-avatar": "^1.0.3",
         "@radix-ui/react-checkbox": "^1.0.4",
@@ -828,6 +829,11 @@
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
     "node_modules/@next/env": {
       "version": "14.2.3",
       "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz",

         "@anthropic-ai/sdk": "^0.19.1",
         "@huggingface/hub": "^0.15.1",
         "@huggingface/inference": "^2.6.1",
+        "@mediapipe/tasks-vision": "^0.10.14",
         "@radix-ui/react-accordion": "^1.1.2",
         "@radix-ui/react-avatar": "^1.0.3",
         "@radix-ui/react-checkbox": "^1.0.4",
         "@jridgewell/sourcemap-codec": "^1.4.14"
       }
     },
+    "node_modules/@mediapipe/tasks-vision": {
+      "version": "0.10.14",
+      "resolved": "https://registry.npmjs.org/@mediapipe/tasks-vision/-/tasks-vision-0.10.14.tgz",
+      "integrity": "sha512-vOifgZhkndgybdvoRITzRkIueWWSiCKuEUXXK6Q4FaJsFvRJuwgg++vqFUMlL0Uox62U5aEXFhHxlhV7Ja5e3Q=="
+    },
     "node_modules/@next/env": {
       "version": "14.2.3",
       "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz",

package.json CHANGED Viewed

@@ -13,6 +13,7 @@
     "@anthropic-ai/sdk": "^0.19.1",
     "@huggingface/hub": "^0.15.1",
     "@huggingface/inference": "^2.6.1",
     "@radix-ui/react-accordion": "^1.1.2",
     "@radix-ui/react-avatar": "^1.0.3",
     "@radix-ui/react-checkbox": "^1.0.4",

     "@anthropic-ai/sdk": "^0.19.1",
     "@huggingface/hub": "^0.15.1",
     "@huggingface/inference": "^2.6.1",
+    "@mediapipe/tasks-vision": "^0.10.14",
     "@radix-ui/react-accordion": "^1.1.2",
     "@radix-ui/react-avatar": "^1.0.3",
     "@radix-ui/react-checkbox": "^1.0.4",

src/app/interface/panel/bubble/index.tsx CHANGED Viewed

@@ -15,6 +15,7 @@ export function Bubble({ children, onChange }: {
   const ref = useRef<HTMLDivElement>(null)
   const zoomLevel = useStore(s => s.zoomLevel)
   const showCaptions = useStore(s => s.showCaptions)
   const text = useRef(`${children || ''}`)

   const ref = useRef<HTMLDivElement>(null)
   const zoomLevel = useStore(s => s.zoomLevel)
+  const showSpeeches = useStore(s => s.showSpeeches)
   const showCaptions = useStore(s => s.showCaptions)
   const text = useRef(`${children || ''}`)

src/app/interface/panel/index.tsx CHANGED Viewed

@@ -2,22 +2,23 @@
 import { useEffect, useRef, useState, useTransition } from "react"
 import { RxReload, RxPencil2 } from "react-icons/rx"
 import { RenderedScene, RenderingModelVendor } from "@/types"
 import { getRender, newRender } from "@/app/engine/render"
 import { useStore } from "@/app/store"
 import { cn } from "@/lib/utils"
 import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
 import { Progress } from "@/app/interface/progress"
 import { EditModal } from "../edit-modal"
-import { Bubble } from "./bubble"
 import { getSettings } from "../settings-dialog/getSettings"
-import { useLocalStorage } from "usehooks-ts"
 import { localStorageKeys } from "../settings-dialog/localStorageKeys"
 import { defaultSettings } from "../settings-dialog/defaultSettings"
 export function Panel({
   page,
   nbPanels,
@@ -35,22 +36,18 @@ export function Panel({
   // panel id, between 0 and (nbPanels - 1)
   panel: number
   className?: string
   width?: number
   height?: number
  }) {
   // index of the panel in the whole app
   const panelIndex = page * nbPanels + panel
   // the panel Id must be unique across all pages
   const panelId = `${panelIndex}`
   // console.log(`panel/index.tsx: <Panel panelId=${panelId}> rendered again!`)
   const [mouseOver, setMouseOver] = useState(false)
   const ref = useRef<HTMLImageElement>(null)
   const font = useStore(s => s.font)
@@ -63,6 +60,10 @@ export function Panel({
   const setPanelPrompt = useStore(s => s.setPanelPrompt)
   const captions = useStore(s => s.captions)
   const caption = captions[panelIndex] || ""
   const setPanelCaption = useStore(s => s.setPanelCaption)
@@ -95,6 +96,28 @@ export function Panel({
   let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
   /*
   console.log("panel/index.tsx: DEBUG: " + JSON.stringify({
     page,
@@ -204,6 +227,7 @@ export function Panel({
           if (newRendered.status === "completed") {
             setGeneratingImages(panelId, false)
             addToUpscaleQueue(panelId, newRendered)
           } else if (!newRendered.status || newRendered.status === "error") {
             setGeneratingImages(panelId, false)
           } else {
@@ -274,6 +298,7 @@ export function Panel({
           console.log("panel finished!")
           setGeneratingImages(panelId, false)
           addToUpscaleQueue(panelId, newRendered)
         }
       } catch (err) {

 import { useEffect, useRef, useState, useTransition } from "react"
 import { RxReload, RxPencil2 } from "react-icons/rx"
+import { useLocalStorage } from "usehooks-ts"
 import { RenderedScene, RenderingModelVendor } from "@/types"
 import { getRender, newRender } from "@/app/engine/render"
 import { useStore } from "@/app/store"
+import { injectSpeechBubbleInTheBackground } from "@/lib/bubble/injectSpeechBubbleInTheBackground"
 import { cn } from "@/lib/utils"
 import { getInitialRenderedScene } from "@/lib/getInitialRenderedScene"
 import { Progress } from "@/app/interface/progress"
 import { EditModal } from "../edit-modal"
 import { getSettings } from "../settings-dialog/getSettings"
 import { localStorageKeys } from "../settings-dialog/localStorageKeys"
 import { defaultSettings } from "../settings-dialog/defaultSettings"
+import { Bubble } from "./bubble"
 export function Panel({
   page,
   nbPanels,
   // panel id, between 0 and (nbPanels - 1)
   panel: number
   className?: string
   width?: number
   height?: number
  }) {
   // index of the panel in the whole app
   const panelIndex = page * nbPanels + panel
   // the panel Id must be unique across all pages
   const panelId = `${panelIndex}`
   // console.log(`panel/index.tsx: <Panel panelId=${panelId}> rendered again!`)
   const [mouseOver, setMouseOver] = useState(false)
   const ref = useRef<HTMLImageElement>(null)
   const font = useStore(s => s.font)
   const setPanelPrompt = useStore(s => s.setPanelPrompt)
+  const speeches = useStore(s => s.speeches)
+  const speech = speeches[panelIndex] || ""
+  const setPanelSpeech = useStore(s => s.setPanelSpeech)
   const captions = useStore(s => s.captions)
   const caption = captions[panelIndex] || ""
   const setPanelCaption = useStore(s => s.setPanelCaption)
   let delay = enableRateLimiter ? (1000 + (500 * panelIndex)) : 1000
+  const addSpeechBubble = async () => {
+    if (!renderedRef.current) { return }
+    // story generation failed
+    if (speech.trim() === "...") { return }
+    console.log('Generating speech bubble...')
+    try {
+      const result = await injectSpeechBubbleInTheBackground({
+        inputImageInBase64: renderedRef.current.assetUrl,
+        text: speech,
+        shape: "oval",
+        line: "straight", // "straight", "bubble", "chaotic"
+        //  font?: string;
+        // debug: true,
+      })
+      renderedRef.current.assetUrl = result
+      setRendered(panelId, renderedRef.current)
+    } catch (err) {
+      console.log(`error: failed to inject the speech bubble: ${err}`)
+    }
+  }
   /*
   console.log("panel/index.tsx: DEBUG: " + JSON.stringify({
     page,
           if (newRendered.status === "completed") {
             setGeneratingImages(panelId, false)
             addToUpscaleQueue(panelId, newRendered)
+            addSpeechBubble()
           } else if (!newRendered.status || newRendered.status === "error") {
             setGeneratingImages(panelId, false)
           } else {
           console.log("panel finished!")
           setGeneratingImages(panelId, false)
           addToUpscaleQueue(panelId, newRendered)
+          addSpeechBubble()
         }
       } catch (err) {

src/app/interface/top-menu/index.tsx CHANGED Viewed

@@ -45,6 +45,9 @@ export function TopMenu() {
   const layout = useStore(s => s.layout)
   const setLayout = useStore(s => s.setLayout)
   const setShowCaptions = useStore(s => s.setShowCaptions)
   const showCaptions = useStore(s => s.showCaptions)
@@ -170,6 +173,16 @@ export function TopMenu() {
           <span className="inline md:hidden">Cap.</span>
         </Label>
         </div>
         {/*
         <div className={cn(
           `transition-all duration-200 ease-in-out`,

   const layout = useStore(s => s.layout)
   const setLayout = useStore(s => s.setLayout)
+  const setShowSpeeches = useStore(s => s.setShowSpeeches)
+  const showSpeeches = useStore(s => s.showSpeeches)
   const setShowCaptions = useStore(s => s.setShowCaptions)
   const showCaptions = useStore(s => s.showCaptions)
           <span className="inline md:hidden">Cap.</span>
         </Label>
         </div>
+        <div className="flex flex-row items-center space-x-3">
+        <Switch
+          checked={showSpeeches}
+          onCheckedChange={setShowSpeeches}
+        />
+        <Label className="text-gray-200 dark:text-gray-200">
+          <span className="hidden md:inline">Bubbles</span>
+          <span className="inline md:hidden">Bub.</span>
+        </Label>
+        </div>
         {/*
         <div className={cn(
           `transition-all duration-200 ease-in-out`,

src/app/main.tsx CHANGED Viewed

@@ -49,8 +49,11 @@ export default function Main() {
   // do we need those?
   const renderedScenes = useStore(s => s.renderedScenes)
-  const captions = useStore(s => s.captions)
   const setCaptions = useStore(s => s.setCaptions)
   const zoomLevel = useStore(s => s.zoomLevel)
@@ -101,6 +104,7 @@ export default function Main() {
   const ref = useRef({
     existingPanels: [] as GeneratedPanel[],
     newPanelsPrompts: [] as string[],
     newCaptions: [] as string[],
     prompt: "",
     preset: "",
@@ -142,6 +146,7 @@ export default function Main() {
       ref.current = {
         existingPanels: [],
         newPanelsPrompts: [],
         newCaptions: [],
         prompt,
         preset: preset?.label || "",
@@ -214,6 +219,7 @@ export default function Main() {
           const endAt = currentPanel + nbPanelsToGenerate
           for (let p = startAt; p < endAt; p++) {
             ref.current.newCaptions.push(ref.current.existingPanels[p]?.caption.trim() || "...")
             const newPanel = joinWords([
               // what we do here is that ideally we give full control to the LLM for prompting,
@@ -231,6 +237,7 @@ export default function Main() {
           // update the frontend
           // console.log("updating the frontend..")
           setCaptions(ref.current.newCaptions)
           setPanels(ref.current.newPanelsPrompts)
           setGeneratingStory(false)

   // do we need those?
   const renderedScenes = useStore(s => s.renderedScenes)
+  const speeches = useStore(s => s.speeches)
+  const setSpeeches = useStore(s => s.setSpeeches)
+  const captions = useStore(s => s.captions)
   const setCaptions = useStore(s => s.setCaptions)
   const zoomLevel = useStore(s => s.zoomLevel)
   const ref = useRef({
     existingPanels: [] as GeneratedPanel[],
     newPanelsPrompts: [] as string[],
+    newSpeeches: [] as string[],
     newCaptions: [] as string[],
     prompt: "",
     preset: "",
       ref.current = {
         existingPanels: [],
         newPanelsPrompts: [],
+        newSpeeches: [],
         newCaptions: [],
         prompt,
         preset: preset?.label || "",
           const endAt = currentPanel + nbPanelsToGenerate
           for (let p = startAt; p < endAt; p++) {
             ref.current.newCaptions.push(ref.current.existingPanels[p]?.caption.trim() || "...")
+            ref.current.newSpeeches.push(ref.current.existingPanels[p]?.speech.trim() || "...")
             const newPanel = joinWords([
               // what we do here is that ideally we give full control to the LLM for prompting,
           // update the frontend
           // console.log("updating the frontend..")
+          setSpeeches(ref.current.newSpeeches)
           setCaptions(ref.current.newCaptions)
           setPanels(ref.current.newPanelsPrompts)
           setGeneratingStory(false)

src/app/queries/getStoryContinuation.ts CHANGED Viewed

@@ -48,6 +48,7 @@ export const getStoryContinuation = async ({
       panels.push({
         panel: startAt + i,
         instructions: `${panelCandidates[i]?.instructions || ""}`,
         caption: `${panelCandidates[i]?.caption || ""}`,
       })
     }
@@ -64,6 +65,7 @@ export const getStoryContinuation = async ({
           userStoryPrompt,
           `${".".repeat(p)}`,
         ]),
         caption: "(Sorry, LLM generation failed: using degraded mode)"
       })
     }

       panels.push({
         panel: startAt + i,
         instructions: `${panelCandidates[i]?.instructions || ""}`,
+        speech: `${panelCandidates[i]?.speech || ""}`,
         caption: `${panelCandidates[i]?.caption || ""}`,
       })
     }
           userStoryPrompt,
           `${".".repeat(p)}`,
         ]),
+        speech: "...",
         caption: "(Sorry, LLM generation failed: using degraded mode)"
       })
     }

src/app/queries/getSystemPrompt.ts CHANGED Viewed

@@ -19,9 +19,9 @@ export function getSystemPrompt({
 }) {
   return [
     `You are a writer specialized in ${preset.llmPrompt}`,
-    `Please write detailed drawing instructions and short (2-3 sentences long) speech captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
-    `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string; }>\`.`,
     // `Give your response as Markdown bullet points.`,
-    `Be brief in the instructions and narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. The captions must be captivating, smart, entertaining. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
   ].filter(item => item).join("\n")
 }

 }) {
   return [
     `You are a writer specialized in ${preset.llmPrompt}`,
+    `Please write detailed drawing instructions and short (2-3 sentences long) speeches and narrator captions for the ${firstNextOrLast} ${nbPanelsToGenerate} panels (out of ${maxNbPanels} in total) of a new story, but keep it open-ended (it will be continued and expanded later). Please make sure each of those ${nbPanelsToGenerate} panels include info about character gender, age, origin, clothes, colors, location, lights, etc. Only generate those ${nbPanelsToGenerate} panels, but take into account the fact the panels are part of a longer story (${maxNbPanels} panels long).`,
+    `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; speech: string; caption: string; }>\`.`,
     // `Give your response as Markdown bullet points.`,
+    `Be brief in the instructions, the speeches and the narrative captions of those ${nbPanelsToGenerate} panels, don't add your own comments. The speech must be captivating, smart, entertaining, usually a sentence or two. Be straight to the point, return JSON and never reply things like "Sure, I can.." etc. Reply using valid JSON!! Important: Write valid JSON!`
   ].filter(item => item).join("\n")
 }

src/app/queries/mockLLMResponse.ts CHANGED Viewed

@@ -3,41 +3,49 @@ import { GeneratedPanels } from "@/types"
 export const mockGeneratedPanels: GeneratedPanels = [{
   "panel": 1,
   "instructions": "wide shot of detective walking towards a UFO crash site",
   "caption": "Detective Jameson investigates a UFO crash in the desert"
 },
 {
   "panel": 2,
   "instructions": "close-up of detective's face, determined expression",
   "caption": "He's been tracking this case for weeks"
 },
 {
   "panel": 3,
   "instructions": "medium shot of detective examining UFO debris",
   "caption": "The evidence is scattered all over the desert"
 },
 {
   "panel": 4,
   "instructions": "close-up of strange symbol on UFO debris",
-  "caption": "But what does this symbol mean?"
 },
 {
   "panel": 5,
   "instructions": "wide shot of detective walking towards a strange rock formation",
   "caption": "Jameson follows a trail that leads him deeper into the desert"
 },
 {
   "panel": 6,
   "instructions": "medium shot of detective discovering an alien body",
-  "caption": "He's not alone in the desert"
 },
 {
   "panel": 7,
   "instructions": "close-up of alien's face, eyes closed, peaceful expression",
   "caption": "An alien life form, deceased"
 },
 {
   "panel": 8,
   "instructions": "wide shot of detective standing over the alien body, looking up at the sky",
-  "caption": "Jameson wonders, what other secrets lie beyond the stars?"
 }
 ]

 export const mockGeneratedPanels: GeneratedPanels = [{
   "panel": 1,
   "instructions": "wide shot of detective walking towards a UFO crash site",
+  "speech": "Hmm.. interesting.",
   "caption": "Detective Jameson investigates a UFO crash in the desert"
 },
 {
   "panel": 2,
   "instructions": "close-up of detective's face, determined expression",
+  "speech": "I've been tracking this case for weeks",
   "caption": "He's been tracking this case for weeks"
 },
 {
   "panel": 3,
   "instructions": "medium shot of detective examining UFO debris",
+  "speech": "...",
   "caption": "The evidence is scattered all over the desert"
 },
 {
   "panel": 4,
   "instructions": "close-up of strange symbol on UFO debris",
+  "speech": " what does this symbol mean?",
+  "caption": "strange symbols"
 },
 {
   "panel": 5,
   "instructions": "wide shot of detective walking towards a strange rock formation",
+  "speech": "I've been tracking this case for weeks",
   "caption": "Jameson follows a trail that leads him deeper into the desert"
 },
 {
   "panel": 6,
   "instructions": "medium shot of detective discovering an alien body",
+  "speech": "I'm not alone in the desert",
+  "caption": "He's not alone"
 },
 {
   "panel": 7,
   "instructions": "close-up of alien's face, eyes closed, peaceful expression",
+  "speech": "...?",
   "caption": "An alien life form, deceased"
 },
 {
   "panel": 8,
   "instructions": "wide shot of detective standing over the alien body, looking up at the sky",
+  "speech": "what other secrets lie beyond the stars?",
+  "caption": "Jameson wonders"
 }
 ]

src/app/queries/predictNextPanels.ts CHANGED Viewed

@@ -31,7 +31,7 @@ export const predictNextPanels = async ({
   // return mockGeneratedPanels
   const existingPanelsTemplate = existingPanels.length
-    ? ` To help you, here are the previous panels and their captions (note: if you see an anomaly here eg. no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(existingPanels, null, 2)}`
     : ''
   const firstNextOrLast =
@@ -55,9 +55,9 @@ export const predictNextPanels = async ({
   let result = ""
-  // we don't require a lot of token for our task
-  // but to be safe, let's count ~130 tokens per panel
-  const nbTokensPerPanel = 130
   const nbMaxNewTokens = nbPanelsToGenerate * nbTokensPerPanel
@@ -115,6 +115,7 @@ export const predictNextPanels = async ({
       .map((cap, i) => ({
         panel: i,
         caption: cap,
         instructions: cap,
       }))
     )

   // return mockGeneratedPanels
   const existingPanelsTemplate = existingPanels.length
+    ? ` To help you, here are the previous panels, their speeches and captions (note: if you see an anomaly here eg. no speech, no caption or the same description repeated multiple times, do not hesitate to fix the story): ${JSON.stringify(existingPanels, null, 2)}`
     : ''
   const firstNextOrLast =
   let result = ""
+  // we don't require a lot of token for our task,
+  // but to be safe, let's count ~200 tokens per panel
+  const nbTokensPerPanel = 200
   const nbMaxNewTokens = nbPanelsToGenerate * nbTokensPerPanel
       .map((cap, i) => ({
         panel: i,
         caption: cap,
+        speech: cap,
         instructions: cap,
       }))
     )

src/app/store/index.ts CHANGED Viewed

@@ -26,8 +26,10 @@ export const useStore = create<{
   currentNbPanels: number
   maxNbPanels: number
   panels: string[]
   captions: string[]
   upscaleQueue: Record<string, RenderedScene>
   showCaptions: boolean
   renderedScenes: Record<string, RenderedScene>
   layout: LayoutName
@@ -55,9 +57,12 @@ export const useStore = create<{
   setPreset: (preset: Preset) => void
   setPanels: (panels: string[]) => void
   setPanelPrompt: (newPrompt: string, index: number) => void
-  setShowCaptions: (showCaptions: boolean) => void
   setLayout: (layout: LayoutName, index?: number) => void
   setLayouts: (layouts: LayoutName[]) => void
   setCaptions: (captions: string[]) => void
   setPanelCaption: (newCaption: string, index: number) => void
   setZoomLevel: (zoomLevel: number) => void
@@ -85,6 +90,7 @@ export const useStore = create<{
     stylePrompt: string
     panels: string[]
     renderedScenes: Record<string, RenderedScene>
     captions: string[]
   }>
   loadClap: (blob: Blob) => Promise<void>
@@ -107,9 +113,11 @@ export const useStore = create<{
   maxNbPanels: 4,
   panels: [],
   captions: [],
   upscaleQueue: {} as Record<string, RenderedScene>,
   renderedScenes: {} as Record<string, RenderedScene>,
   showCaptions: getParam("showCaptions", false),
   // deprecated?
@@ -284,6 +292,24 @@ export const useStore = create<{
       ))
     })
   },
   setCaptions: (captions: string[]) => {
     set({
       captions,
@@ -324,6 +350,7 @@ export const useStore = create<{
       currentNbPages: 1,
       currentNbPanels: currentNbPanelsPerPage,
       panels: [],
       captions: [],
       upscaleQueue: {},
       renderedScenes: {},
@@ -408,6 +435,7 @@ export const useStore = create<{
       currentNbPages: 1,
       currentNbPanels: currentNbPanelsPerPage,
       panels: [],
       captions: [],
       upscaleQueue: {},
       renderedScenes: {},
@@ -431,6 +459,7 @@ export const useStore = create<{
       prompt,
       panels,
       renderedScenes,
       captions
     } = get()
@@ -459,7 +488,7 @@ export const useStore = create<{
     for (let i = 0; i < panels.length; i++) {
       const panel = panels[i]
       const caption = captions[i]
       const renderedScene = renderedScenes[`${i}`]
@@ -492,7 +521,7 @@ export const useStore = create<{
         startTimeInMs: currentElapsedTimeInMs,
         assetDurationInMs: defaultSegmentDurationInMs,
         category: ClapSegmentCategory.DIALOGUE,
-        prompt: caption,
         outputType: ClapOutputType.AUDIO,
         status: ClapSegmentStatus.TO_GENERATE,
       }))
@@ -525,6 +554,7 @@ export const useStore = create<{
     stylePrompt: string
     panels: string[]
     renderedScenes: Record<string, RenderedScene>
     captions: string[]
   }> => {
@@ -534,6 +564,7 @@ export const useStore = create<{
     const panels: string[] = []
     const renderedScenes: Record<string, RenderedScene> = {}
     const captions: string[] = []
     const panelGenerationStatus: Record<number, boolean> = {}
@@ -552,14 +583,21 @@ export const useStore = create<{
         cameraShot,
         clap.segments,
         ClapSegmentCategory.INTERFACE,
       ).at(0) as (ClapSegment | undefined)
     })).filter(item => item.storyboard && item.ui) as {
       camera: ClapSegment
       storyboard: ClapSegment
       ui: ClapSegment
     }[]
-    shots.forEach(({ camera, storyboard, ui }, id) => {
       panels.push(storyboard.prompt)
@@ -582,6 +620,8 @@ export const useStore = create<{
       panelGenerationStatus[id] = false
       captions.push(ui?.prompt || "")
     })
@@ -595,6 +635,7 @@ export const useStore = create<{
       stylePrompt,
       panels,
       renderedScenes,
       captions,
     }
@@ -614,6 +655,7 @@ export const useStore = create<{
       stylePrompt,
       panels,
       renderedScenes,
       captions,
     } = await convertClapToComic(currentClap)
@@ -629,6 +671,7 @@ export const useStore = create<{
       // layout,
       panels,
       renderedScenes,
       captions,
       currentNbPages: Math.round(currentNbPanels / currentNbPanelsPerPage),
       upscaleQueue: {},

   currentNbPanels: number
   maxNbPanels: number
   panels: string[]
+  speeches: string[]
   captions: string[]
   upscaleQueue: Record<string, RenderedScene>
+  showSpeeches: boolean
   showCaptions: boolean
   renderedScenes: Record<string, RenderedScene>
   layout: LayoutName
   setPreset: (preset: Preset) => void
   setPanels: (panels: string[]) => void
   setPanelPrompt: (newPrompt: string, index: number) => void
   setLayout: (layout: LayoutName, index?: number) => void
   setLayouts: (layouts: LayoutName[]) => void
+  setShowSpeeches: (showSpeeches: boolean) => void
+  setSpeeches: (speeches: string[]) => void
+  setPanelSpeech: (newSpeech: string, index: number) => void
+  setShowCaptions: (showCaptions: boolean) => void
   setCaptions: (captions: string[]) => void
   setPanelCaption: (newCaption: string, index: number) => void
   setZoomLevel: (zoomLevel: number) => void
     stylePrompt: string
     panels: string[]
     renderedScenes: Record<string, RenderedScene>
+    speeches: string[]
     captions: string[]
   }>
   loadClap: (blob: Blob) => Promise<void>
   maxNbPanels: 4,
   panels: [],
+  speeches: [],
   captions: [],
   upscaleQueue: {} as Record<string, RenderedScene>,
   renderedScenes: {} as Record<string, RenderedScene>,
+  showSpeeches: getParam("showSpeeches", false),
   showCaptions: getParam("showCaptions", false),
   // deprecated?
       ))
     })
   },
+  setSpeeches: (speeches: string[]) => {
+    set({
+      speeches,
+    })
+  },
+  setShowSpeeches: (showSpeeches: boolean) => {
+    set({
+      showSpeeches,
+    })
+  },
+  setPanelSpeech: (newSpeech, index) => {
+    const { speeches } = get()
+    set({
+      speeches: speeches.map((c, i) => (
+        index === i ? newSpeech : c
+      ))
+    })
+  },
   setCaptions: (captions: string[]) => {
     set({
       captions,
       currentNbPages: 1,
       currentNbPanels: currentNbPanelsPerPage,
       panels: [],
+      speeches: [],
       captions: [],
       upscaleQueue: {},
       renderedScenes: {},
       currentNbPages: 1,
       currentNbPanels: currentNbPanelsPerPage,
       panels: [],
+      speeches: [],
       captions: [],
       upscaleQueue: {},
       renderedScenes: {},
       prompt,
       panels,
       renderedScenes,
+      speeches,
       captions
     } = get()
     for (let i = 0; i < panels.length; i++) {
       const panel = panels[i]
+      const speech = speeches[i]
       const caption = captions[i]
       const renderedScene = renderedScenes[`${i}`]
         startTimeInMs: currentElapsedTimeInMs,
         assetDurationInMs: defaultSegmentDurationInMs,
         category: ClapSegmentCategory.DIALOGUE,
+        prompt: speech,
         outputType: ClapOutputType.AUDIO,
         status: ClapSegmentStatus.TO_GENERATE,
       }))
     stylePrompt: string
     panels: string[]
     renderedScenes: Record<string, RenderedScene>
+    speeches: string[]
     captions: string[]
   }> => {
     const panels: string[] = []
     const renderedScenes: Record<string, RenderedScene> = {}
     const captions: string[] = []
+    const speeches: string[] = []
     const panelGenerationStatus: Record<number, boolean> = {}
         cameraShot,
         clap.segments,
         ClapSegmentCategory.INTERFACE,
+      ).at(0) as (ClapSegment | undefined),
+      dialogue: filterSegments(
+        ClapSegmentFilteringMode.START,
+        cameraShot,
+        clap.segments,
+        ClapSegmentCategory.DIALOGUE,
       ).at(0) as (ClapSegment | undefined)
     })).filter(item => item.storyboard && item.ui) as {
       camera: ClapSegment
       storyboard: ClapSegment
       ui: ClapSegment
+      dialogue: ClapSegment
     }[]
+    shots.forEach(({ camera, storyboard, ui, dialogue }, id) => {
       panels.push(storyboard.prompt)
       panelGenerationStatus[id] = false
+      speeches.push(dialogue?.prompt || "")
       captions.push(ui?.prompt || "")
     })
       stylePrompt,
       panels,
       renderedScenes,
+      speeches,
       captions,
     }
       stylePrompt,
       panels,
       renderedScenes,
+      speeches,
       captions,
     } = await convertClapToComic(currentClap)
       // layout,
       panels,
       renderedScenes,
+      speeches,
       captions,
       currentNbPages: Math.round(currentNbPanels / currentNbPanelsPerPage),
       upscaleQueue: {},

src/lib/bubble/injectSpeechBubbleInTheBackground.ts ADDED Viewed

	@@ -0,0 +1,419 @@

+import { ImageSegmenter, FilesetResolver } from "@mediapipe/tasks-vision"
+export async function injectSpeechBubbleInTheBackground(params: {
+  inputImageInBase64: string;
+  text?: string;
+  shape?: "oval" | "rectangular" |  "cloud" | "thought";
+  line?: "handdrawn" | "straight" | "bubble" | "chaotic";
+  font?: string;
+  debug?: boolean;
+}): Promise<string> {
+  const {
+    inputImageInBase64,
+    text,
+    shape = "oval",
+    line = "handdrawn",
+    font = "Arial",
+    debug = false,
+  } = params;
+  // If no text is provided, return the original image
+  if (!text) {
+    return inputImageInBase64;
+  }
+  // Load the image
+  const image = await loadImage(inputImageInBase64);
+  // Set up canvas
+  const canvas = document.createElement('canvas');
+  canvas.width = image.width;
+  canvas.height = image.height;
+  const ctx = canvas.getContext('2d')!;
+  ctx.drawImage(image, 0, 0);
+  // Set up MediaPipe Image Segmenter
+  const vision = await FilesetResolver.forVisionTasks(
+    "https://cdn.jsdelivr.net/npm/@mediapipe/tasks-vision@latest/wasm"
+  );
+  const imageSegmenter = await ImageSegmenter.createFromOptions(vision, {
+    baseOptions: {
+      modelAssetPath: "https://storage.googleapis.com/mediapipe-models/image_segmenter/deeplab_v3/float32/1/deeplab_v3.tflite",
+      delegate: "GPU"
+    },
+    outputCategoryMask: true,
+    outputConfidenceMasks: false
+  });
+  const segmentationResult = imageSegmenter.segment(image);
+  let characterBoundingBox: { top: number, left: number, width: number, height: number } | null = null;
+  if (segmentationResult.categoryMask) {
+    const mask = segmentationResult.categoryMask.getAsUint8Array();
+    const detectedItems = analyzeSegmentationMask(mask, image.width, image.height);
+    console.log("Detected items:", detectedItems);
+    if (detectedItems.length > 0) {
+      characterBoundingBox = findCharacterBoundingBox(mask, image.width, image.height);
+    }
+    if (debug) {
+      drawSegmentationMask(ctx, mask, image.width, image.height);
+    }
+  }
+  const bubbleLocation = characterBoundingBox
+    ? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top }
+    : { x: image.width / 2, y: image.height / 2 };
+  drawSpeechBubble(ctx, bubbleLocation, text, shape, line, font, !!characterBoundingBox, image.width, image.height, characterBoundingBox);
+  return canvas.toDataURL('image/png');
+}
+function loadImage(base64: string): Promise<HTMLImageElement> {
+  return new Promise((resolve, reject) => {
+    const img = new Image();
+    img.onload = () => resolve(img);
+    img.onerror = reject;
+    img.src = base64;
+  });
+}
+function analyzeSegmentationMask(mask: Uint8Array, width: number, height: number): string[] {
+  const categories = new Set<number>();
+  for (let i = 0; i < mask.length; i++) {
+    if (mask[i] > 0) {
+      categories.add(mask[i]);
+    }
+  }
+  return Array.from(categories).map(c => `unknown-${c}`);
+}
+function findMainCharacterLocation(mask: Uint8Array, width: number, height: number): { x: number, y: number } {
+  let sumX = 0, sumY = 0, count = 0;
+  for (let y = 0; y < height; y++) {
+    for (let x = 0; x < width; x++) {
+      const index = y * width + x;
+      if (mask[index] > 0) {
+        sumX += x;
+        sumY += y;
+        count++;
+      }
+    }
+  }
+  return count > 0 ? { x: sumX / count, y: sumY / count } : { x: width / 2, y: height / 2 };
+}
+function drawSegmentationMask(ctx: CanvasRenderingContext2D, mask: Uint8Array, width: number, height: number) {
+  const imageData = ctx.getImageData(0, 0, width, height);
+  const data = imageData.data;
+  for (let i = 0; i < mask.length; i++) {
+    const category = mask[i];
+    if (category > 0) {
+      // Use a different color for each category
+      const color = getCategoryColor(category);
+      data[i * 4] = color[0];
+      data[i * 4 + 1] = color[1];
+      data[i * 4 + 2] = color[2];
+      data[i * 4 + 3] = 128; // 50% opacity
+    }
+  }
+  ctx.putImageData(imageData, 0, 0);
+}
+function getCategoryColor(category: number): [number, number, number] {
+  // Generate a pseudo-random color based on the category
+  const hue = (category * 137) % 360;
+  return hslToRgb(hue / 360, 1, 0.5);
+}
+function hslToRgb(h: number, s: number, l: number): [number, number, number] {
+  let r, g, b;
+  if (s === 0) {
+    r = g = b = l;
+  } else {
+    const hue2rgb = (p: number, q: number, t: number) => {
+      if (t < 0) t += 1;
+      if (t > 1) t -= 1;
+      if (t < 1/6) return p + (q - p) * 6 * t;
+      if (t < 1/2) return q;
+      if (t < 2/3) return p + (q - p) * (2/3 - t) * 6;
+      return p;
+    };
+    const q = l < 0.5 ? l * (1 + s) : l + s - l * s;
+    const p = 2 * l - q;
+    r = hue2rgb(p, q, h + 1/3);
+    g = hue2rgb(p, q, h);
+    b = hue2rgb(p, q, h - 1/3);
+  }
+  return [Math.round(r * 255), Math.round(g * 255), Math.round(b * 255)];
+}
+function drawSpeechBubble(
+  ctx: CanvasRenderingContext2D,
+  location: { x: number, y: number },
+  text: string,
+  shape: "oval" | "rectangular" | "cloud" | "thought",
+  line: "handdrawn" | "straight" | "bubble" | "chaotic",
+  font: string,
+  characterDetected: boolean,
+  imageWidth: number,
+  imageHeight: number,
+  characterBoundingBox: { top: number, left: number, width: number, height: number } | null
+) {
+  const bubbleWidth = Math.min(300, imageWidth * 0.4);
+  const bubbleHeight = Math.min(150, imageHeight * 0.3);
+  const padding = 20;
+  const fontSize = Math.max(15, Math.min(30, 500 / text.length)); // Increased font size by 25%
+  ctx.font = `${fontSize}px ${font}`;
+  const wrappedText = wrapText(ctx, text, bubbleWidth - padding * 2);
+  const textDimensions = measureTextDimensions(ctx, wrappedText);
+  const finalWidth = Math.max(bubbleWidth, textDimensions.width + padding * 2);
+  const finalHeight = Math.max(bubbleHeight, textDimensions.height + padding * 2);
+  const bubbleLocation = {
+    x: Math.max(finalWidth / 2, Math.min(imageWidth - finalWidth / 2, location.x)),
+    y: Math.max(finalHeight / 2, Math.min(imageHeight - finalHeight / 2, location.y - finalHeight))
+  };
+  ctx.fillStyle = 'white';
+  ctx.strokeStyle = 'black';
+  ctx.lineWidth = 2;
+  ctx.beginPath();
+  drawBubbleShape(ctx, shape, bubbleLocation, finalWidth, finalHeight, location);
+  ctx.fill();
+  ctx.stroke();
+  applyLineStyle(ctx, line);
+  const tailTarget = characterBoundingBox
+    ? { x: characterBoundingBox.left + characterBoundingBox.width / 2, y: characterBoundingBox.top + characterBoundingBox.height * 0.2 }
+    : location;
+  drawTail(ctx, bubbleLocation, finalWidth, finalHeight, tailTarget, shape);
+  ctx.fillStyle = 'black';
+  ctx.textAlign = 'center';
+  ctx.textBaseline = 'middle';
+  drawFormattedText(ctx, wrappedText, bubbleLocation.x, bubbleLocation.y, finalWidth - padding * 2, fontSize);
+}
+function drawBubbleShape(
+  ctx: CanvasRenderingContext2D,
+  shape: "oval" | "rectangular" | "cloud" | "thought",
+  bubbleLocation: { x: number, y: number },
+  width: number,
+  height: number,
+  tailTarget: { x: number, y: number }
+) {
+  switch (shape) {
+    case "oval":
+      drawOvalBubble(ctx, bubbleLocation, width, height);
+      break;
+    case "rectangular":
+      drawRectangularBubble(ctx, bubbleLocation, width, height);
+      break;
+    case "cloud":
+      drawCloudBubble(ctx, bubbleLocation, width, height);
+      break;
+    case "thought":
+      drawThoughtBubble(ctx, bubbleLocation, width, height);
+      break;
+  }
+}
+function drawOvalBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
+  ctx.beginPath();
+  ctx.ellipse(location.x, location.y, width / 2, height / 2, 0, 0, 2 * Math.PI);
+  ctx.closePath();
+}
+function drawRectangularBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
+  const radius = 20;
+  ctx.beginPath();
+  ctx.moveTo(location.x - width / 2 + radius, location.y - height / 2);
+  ctx.lineTo(location.x + width / 2 - radius, location.y - height / 2);
+  ctx.quadraticCurveTo(location.x + width / 2, location.y - height / 2, location.x + width / 2, location.y - height / 2 + radius);
+  ctx.lineTo(location.x + width / 2, location.y + height / 2 - radius);
+  ctx.quadraticCurveTo(location.x + width / 2, location.y + height / 2, location.x + width / 2 - radius, location.y + height / 2);
+  ctx.lineTo(location.x - width / 2 + radius, location.y + height / 2);
+  ctx.quadraticCurveTo(location.x - width / 2, location.y + height / 2, location.x - width / 2, location.y + height / 2 - radius);
+  ctx.lineTo(location.x - width / 2, location.y - height / 2 + radius);
+  ctx.quadraticCurveTo(location.x - width / 2, location.y - height / 2, location.x - width / 2 + radius, location.y - height / 2);
+  ctx.closePath();
+}
+function drawCloudBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
+  const numBumps = Math.floor(width / 40);
+  const bumpRadius = width / (numBumps * 2);
+  ctx.beginPath();
+  ctx.moveTo(location.x - width / 2 + bumpRadius, location.y);
+  // Top
+  for (let i = 0; i < numBumps; i++) {
+    const x = location.x - width / 2 + (i * 2 + 1) * bumpRadius;
+    const y = location.y - height / 2;
+    ctx.quadraticCurveTo(x, y - bumpRadius / 2, x + bumpRadius, y);
+  }
+  // Right
+  for (let i = 0; i < numBumps / 2; i++) {
+    const x = location.x + width / 2;
+    const y = location.y - height / 2 + (i * 2 + 1) * bumpRadius * 2;
+    ctx.quadraticCurveTo(x + bumpRadius / 2, y, x, y + bumpRadius * 2);
+  }
+  // Bottom
+  for (let i = numBumps; i > 0; i--) {
+    const x = location.x - width / 2 + (i * 2 - 1) * bumpRadius;
+    const y = location.y + height / 2;
+    ctx.quadraticCurveTo(x, y + bumpRadius / 2, x - bumpRadius, y);
+  }
+  // Left
+  for (let i = numBumps / 2; i > 0; i--) {
+    const x = location.x - width / 2;
+    const y = location.y - height / 2 + (i * 2 - 1) * bumpRadius * 2;
+    ctx.quadraticCurveTo(x - bumpRadius / 2, y, x, y - bumpRadius * 2);
+  }
+  ctx.closePath();
+}
+function drawThoughtBubble(ctx: CanvasRenderingContext2D, location: { x: number, y: number }, width: number, height: number) {
+  drawCloudBubble(ctx, location, width, height);
+  // The tail for thought bubbles is handled in the drawTail function
+}
+function drawTail(
+  ctx: CanvasRenderingContext2D,
+  bubbleLocation: { x: number, y: number },
+  width: number,
+  height: number,
+  tailTarget: { x: number, y: number },
+  shape: string
+) {
+  const tailLength = Math.min(50, height / 2);
+  const startX = bubbleLocation.x + (tailTarget.x > bubbleLocation.x ? width / 4 : -width / 4);
+  const startY = bubbleLocation.y + height / 2;
+  ctx.beginPath();
+  ctx.moveTo(startX, startY);
+  if (shape === "thought") {
+    const bubbleCount = 3;
+    for (let i = 0; i < bubbleCount; i++) {
+      const t = (i + 1) / (bubbleCount + 1);
+      const x = startX + (tailTarget.x - startX) * t;
+      const y = startY + (tailTarget.y - startY) * t;
+      const radius = 5 * (1 - t);
+      ctx.lineTo(x - radius, y);
+      ctx.arc(x, y, radius, 0, Math.PI * 2);
+    }
+  } else {
+    const controlX = (startX + tailTarget.x) / 2;
+    const controlY = (startY + tailTarget.y + 20) / 2;
+    ctx.quadraticCurveTo(controlX, controlY, tailTarget.x, tailTarget.y);
+    ctx.quadraticCurveTo(controlX, controlY, startX + (tailTarget.x > bubbleLocation.x ? -10 : 10), startY);
+  }
+  ctx.closePath();
+  ctx.fill();
+  ctx.stroke();
+}
+function findCharacterBoundingBox(mask: Uint8Array, width: number, height: number): { top: number, left: number, width: number, height: number } {
+  let minX = width, minY = height, maxX = 0, maxY = 0;
+  for (let y = 0; y < height; y++) {
+    for (let x = 0; x < width; x++) {
+      const index = y * width + x;
+      if (mask[index] > 0) {
+        minX = Math.min(minX, x);
+        minY = Math.min(minY, y);
+        maxX = Math.max(maxX, x);
+        maxY = Math.max(maxY, y);
+      }
+    }
+  }
+  return {
+    top: minY,
+    left: minX,
+    width: maxX - minX,
+    height: maxY - minY
+  };
+}
+function applyLineStyle(ctx: CanvasRenderingContext2D, style: string) {
+  switch (style) {
+    case "handdrawn":
+      ctx.setLineDash([5, 5]);
+      break;
+    case "straight":
+      ctx.setLineDash([]);
+      break;
+    case "bubble":
+      ctx.setLineDash([0, 10]);
+      ctx.lineCap = "round";
+      break;
+    case "chaotic":
+      ctx.setLineDash([10, 5, 2, 5]);
+      break;
+  }
+}
+function wrapText(ctx: CanvasRenderingContext2D, text: string, maxWidth: number): string[] {
+  const words = text.split(' ');
+  const lines: string[] = [];
+  let currentLine = '';
+  for (const word of words) {
+    const testLine = currentLine + (currentLine ? ' ' : '') + word;
+    const metrics = ctx.measureText(testLine);
+    if (metrics.width > maxWidth || word.endsWith('.') || word.endsWith(',')) {
+      lines.push(currentLine);
+      currentLine = word;
+    } else {
+      currentLine = testLine;
+    }
+  }
+  if (currentLine) {
+    lines.push(currentLine);
+  }
+  return lines;
+}
+function measureTextDimensions(ctx: CanvasRenderingContext2D, lines: string[]): { width: number, height: number } {
+  let maxWidth = 0;
+  const lineHeight = ctx.measureText('M').width * 1.2;
+  const height = lineHeight * lines.length;
+  for (const line of lines) {
+    const metrics = ctx.measureText(line);
+    maxWidth = Math.max(maxWidth, metrics.width);
+  }
+  return { width: maxWidth, height };
+}
+function drawFormattedText(ctx: CanvasRenderingContext2D, lines: string[], x: number, y: number, maxWidth: number, fontSize: number) {
+  const lineHeight = fontSize * 1.2;
+  const totalHeight = lineHeight * lines.length;
+  let startY = y - totalHeight / 2 + lineHeight / 2;
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const lineY = startY + i * lineHeight;
+    const maxLineWidth = Math.min(maxWidth, maxWidth * (1 - Math.abs(i - (lines.length - 1) / 2) / lines.length));
+    ctx.fillText(line, x, lineY, maxLineWidth);
+  }
+}

src/lib/createLlamaPrompt.ts CHANGED Viewed

@@ -3,7 +3,7 @@ export function createLlamaPrompt(messages: Array<{ role: string, content: strin
   const B_INST = "[INST]", E_INST = "[/INST]";
   const B_SYS = "<<SYS>>\n", E_SYS = "\n<</SYS>>\n\n";
   const BOS = "<s>", EOS = "</s>";
-  const DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest storywriting assistant. Always answer in a creative and entertaining way, while being safe. Please ensure that your stories and captions are socially unbiased and positive in nature. If a request does not make any sense, go on anyway, as we are writing a fantasy story.";
   if (messages[0].role != "system"){
       messages = [

   const B_INST = "[INST]", E_INST = "[/INST]";
   const B_SYS = "<<SYS>>\n", E_SYS = "\n<</SYS>>\n\n";
   const BOS = "<s>", EOS = "</s>";
+  const DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest storywriting assistant. Always answer in a creative and entertaining way, while being safe. Please ensure that your stories, speeches and captions are socially unbiased and positive in nature. If a request does not make any sense, go on anyway, as we are writing a fantasy story.";
   if (messages[0].role != "system"){
       messages = [

src/lib/dirtyGeneratedPanelCleaner.ts CHANGED Viewed

@@ -3,8 +3,10 @@ import { GeneratedPanel } from "@/types"
 export function dirtyGeneratedPanelCleaner({
   panel,
   instructions,
   caption
 }: GeneratedPanel): GeneratedPanel {
   let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
   let newInstructions = (
     // need to remove from LLM garbage here, too
@@ -34,6 +36,7 @@ export function dirtyGeneratedPanelCleaner({
   return {
     panel,
     instructions: newInstructions,
     caption: newCaption,
   }
 }

 export function dirtyGeneratedPanelCleaner({
   panel,
   instructions,
+  speech,
   caption
 }: GeneratedPanel): GeneratedPanel {
+  let newSpeech = `${speech || ""}`.split(":").pop()?.trim() || ""
   let newCaption = `${caption || ""}`.split(":").pop()?.trim() || ""
   let newInstructions = (
     // need to remove from LLM garbage here, too
   return {
     panel,
     instructions: newInstructions,
+    speech: newSpeech,
     caption: newCaption,
   }
 }

src/lib/dirtyGeneratedPanelsParser.ts CHANGED Viewed

@@ -14,15 +14,18 @@ export function dirtyGeneratedPanelsParser(input: string): GeneratedPanel[] {
   const results = jsonData.map((item, i) => {
     let panel = i
     let caption = item.caption ? item.caption.trim() : ''
     let instructions = item.instructions ? item.instructions.trim() : ''
-    if (!instructions && caption) {
       instructions = caption
     }
     if (!caption && instructions) {
       caption = instructions
     }
-    return { panel, caption, instructions }
   })
   return results

   const results = jsonData.map((item, i) => {
     let panel = i
+    let speech = item.speech ? item.speech.trim() : ''
     let caption = item.caption ? item.caption.trim() : ''
     let instructions = item.instructions ? item.instructions.trim() : ''
+    if (!instructions && !caption && speech) {
+      instructions = speech
+    } else if (!instructions && caption) {
       instructions = caption
     }
     if (!caption && instructions) {
       caption = instructions
     }
+    return { panel, speech, caption, instructions }
   })
   return results

src/lib/parseBadJSON.ts CHANGED Viewed

@@ -5,7 +5,7 @@ export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
   try {
     return JSON.parse(jsonLikeString) as GeneratedPanels
   } catch (err) {
-    var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
     let results = [];
     let match;
@@ -14,7 +14,8 @@ export function parseBadJSON(jsonLikeString: string): GeneratedPanels {
       let json = {
         panel: Number(match[1]),
         instructions: match[2],
-        caption: match[3]
       };
       results.push(json);
     }

   try {
     return JSON.parse(jsonLikeString) as GeneratedPanels
   } catch (err) {
+    var regex = /\{\s*"panel":\s*(\d+),\s*"instructions"\s*:\s*"([^"]+)",\s*"speech"\s*:\s*"([^"]+)",\s*"caption":\s*"([^"]*)"\s*\}/gs;
     let results = [];
     let match;
       let json = {
         panel: Number(match[1]),
         instructions: match[2],
+        speech: match[3],
+        caption: match[4]
       };
       results.push(json);
     }

src/types.ts CHANGED Viewed

@@ -89,6 +89,7 @@ export interface ImageAnalysisResponse {
 export type GeneratedPanel = {
   panel: number
   instructions: string
   caption: string
 }

 export type GeneratedPanel = {
   panel: number
   instructions: string
+  speech: string
   caption: string
 }