ai-manga-factory

Running

App Files Files Community

jbilcke-hf HF staff commited on Oct 19, 2023

Commit

81eb27e

•

1 Parent(s): 7967a54

fixes for the Inference API

Browse files

Files changed (6) hide show

src/app/engine/presets.ts +17 -12
src/app/engine/render.ts +28 -22
src/app/interface/panel/index.tsx +40 -5
src/app/main.tsx +45 -29
src/app/queries/getStory.ts +2 -2
src/app/queries/predictWithHuggingFace.ts +5 -0

src/app/engine/presets.ts CHANGED Viewed

@@ -44,11 +44,12 @@ export const presets: Record<string, Preset> = {
     font: "actionman",
     llmPrompt: "japanese manga",
     imagePrompt: (prompt: string) => [
       `japanese manga about ${prompt}`,
       "single panel",
       "manga",
       "japanese",
-      "grayscale",
       "intricate",
       "detailed",
       // "drawing"
@@ -98,9 +99,9 @@ export const presets: Record<string, Preset> = {
     font: "actionman",
     llmPrompt: "Franco-Belgian comic (a \"bande dessinée\"), in the style of Franquin, Moebius etc",
     imagePrompt: (prompt: string) => [
-      `franco-belgian color comic about ${prompt}`,
       "bande dessinée",
       "franco-belgian comic",
       "comic album",
       // "color drawing"
     ],
@@ -123,9 +124,9 @@ export const presets: Record<string, Preset> = {
     font: "actionman",
     llmPrompt: "american comic",
     imagePrompt: (prompt: string) => [
       `modern american comic about ${prompt}`,
       //"single panel",
-      "digital color comicbook style",
       // "2010s",
       // "digital print",
       // "color comicbook",
@@ -182,11 +183,11 @@ export const presets: Record<string, Preset> = {
     font: "actionman",
     llmPrompt: "american comic",
     imagePrompt: (prompt: string) => [
       `vintage american color comic about ${prompt}`,
       // "single panel",
      //  "comicbook style",
-      "1950",
-      "50s",
       // "color comicbook",
       // "color drawing"
     ],
@@ -243,10 +244,10 @@ export const presets: Record<string, Preset> = {
     llmPrompt: "new pulp science fiction",
     imagePrompt: (prompt: string) => [
       `vintage color pulp comic panel`,
-      `${prompt}`,
       "40s",
       "1940",
       "vintage science fiction",
       // "single panel",
       // "comic album"
     ],
@@ -271,13 +272,14 @@ export const presets: Record<string, Preset> = {
     llmPrompt: "comic books by Moebius",
     imagePrompt: (prompt: string) => [
       `color comic panel`,
-      `${prompt}`,
       "style of Moebius",
       "by Moebius",
       "french comic panel",
       "franco-belgian style",
       "bande dessinée",
       "single panel",
       // "comic album"
     ],
     negativePrompt: () => [
@@ -300,10 +302,10 @@ export const presets: Record<string, Preset> = {
     llmPrompt: "writing Tintin comic books",
     imagePrompt: (prompt: string) => [
       `color comic panel`,
-      `${prompt}`,
       "style of Hergé",
-      "by Hergé",
       "tintin style",
       "french comic panel",
       "franco-belgian style",
      //  "color panel",
@@ -355,11 +357,11 @@ export const presets: Record<string, Preset> = {
     llmPrompt: "french style comic books set in ancient Rome and Gaul",
     imagePrompt: (prompt: string) => [
       `color comic panel`,
-      `about ${prompt}`,
       "romans",
       "gauls",
       "french comic panel",
       "franco-belgian style",
       "bande dessinée",
       "single panel",
       // "comical",
@@ -412,6 +414,7 @@ export const presets: Record<string, Preset> = {
       `patchwork`,
       `style of Gustav Klimt`,
       `Gustav Klimt painting`,
       `${prompt}`,
     ],
     negativePrompt: () => [
@@ -433,9 +436,11 @@ export const presets: Record<string, Preset> = {
     imagePrompt: (prompt: string) => [
       `medieval illuminated manuscript`,
       `illuminated manuscript of`,
       // `medieval color engraving`,
       `${prompt}`,
-      `medieval`
     ],
     negativePrompt: () => [
       "manga",
@@ -506,9 +511,9 @@ export const presets: Record<string, Preset> = {
     llmPrompt: "ancient egyptian stories.",
     imagePrompt: (prompt: string) => [
       `ancient egyptian wall painting`,
       // `medieval color engraving`,
       `${prompt}`,
-      `ancient egypt`,
     ],
     negativePrompt: () => [
       "manga",

     font: "actionman",
     llmPrompt: "japanese manga",
     imagePrompt: (prompt: string) => [
+      `grayscale`,
+      `intricate details`,
       `japanese manga about ${prompt}`,
       "single panel",
       "manga",
       "japanese",
       "intricate",
       "detailed",
       // "drawing"
     font: "actionman",
     llmPrompt: "Franco-Belgian comic (a \"bande dessinée\"), in the style of Franquin, Moebius etc",
     imagePrompt: (prompt: string) => [
       "bande dessinée",
       "franco-belgian comic",
+      `franco-belgian color comic about ${prompt}`,
       "comic album",
       // "color drawing"
     ],
     font: "actionman",
     llmPrompt: "american comic",
     imagePrompt: (prompt: string) => [
+      "digital color comicbook style",
       `modern american comic about ${prompt}`,
       //"single panel",
       // "2010s",
       // "digital print",
       // "color comicbook",
     font: "actionman",
     llmPrompt: "american comic",
     imagePrompt: (prompt: string) => [
+      "1950",
+      "50s",
       `vintage american color comic about ${prompt}`,
       // "single panel",
      //  "comicbook style",
       // "color comicbook",
       // "color drawing"
     ],
     llmPrompt: "new pulp science fiction",
     imagePrompt: (prompt: string) => [
       `vintage color pulp comic panel`,
       "40s",
       "1940",
       "vintage science fiction",
+      `${prompt}`,
       // "single panel",
       // "comic album"
     ],
     llmPrompt: "comic books by Moebius",
     imagePrompt: (prompt: string) => [
       `color comic panel`,
       "style of Moebius",
+      `${prompt}`,
       "by Moebius",
       "french comic panel",
       "franco-belgian style",
       "bande dessinée",
       "single panel",
+      "intricate"
       // "comic album"
     ],
     negativePrompt: () => [
     llmPrompt: "writing Tintin comic books",
     imagePrompt: (prompt: string) => [
       `color comic panel`,
       "style of Hergé",
       "tintin style",
+      `${prompt}`,
+      "by Hergé",
       "french comic panel",
       "franco-belgian style",
      //  "color panel",
     llmPrompt: "french style comic books set in ancient Rome and Gaul",
     imagePrompt: (prompt: string) => [
       `color comic panel`,
       "romans",
       "gauls",
       "french comic panel",
       "franco-belgian style",
+      `about ${prompt}`,
       "bande dessinée",
       "single panel",
       // "comical",
       `patchwork`,
       `style of Gustav Klimt`,
       `Gustav Klimt painting`,
+      `intricate details`,
       `${prompt}`,
     ],
     negativePrompt: () => [
     imagePrompt: (prompt: string) => [
       `medieval illuminated manuscript`,
       `illuminated manuscript of`,
+      `medieval`,
+      `intricate details`,
       // `medieval color engraving`,
       `${prompt}`,
+      // `medieval`
     ],
     negativePrompt: () => [
       "manga",
     llmPrompt: "ancient egyptian stories.",
     imagePrompt: (prompt: string) => [
       `ancient egyptian wall painting`,
+      `ancient egypt`,
       // `medieval color engraving`,
       `${prompt}`,
     ],
     negativePrompt: () => [
       "manga",

src/app/engine/render.ts CHANGED Viewed

@@ -26,12 +26,14 @@ export async function newRender({
   prompt,
   // negativePrompt,
   width,
-  height
 }: {
   prompt: string
   // negativePrompt: string[]
   width: number
   height: number
 }) {
   if (!prompt) {
     const error = `cannot call the rendering API without a prompt, aborting..`
@@ -49,6 +51,8 @@ export async function newRender({
     segments: []
   }
   try {
     if (renderingEngine === "REPLICATE") {
@@ -69,7 +73,7 @@ export async function newRender({
         input: {
           prompt: [
             "beautiful",
-            "intricate details",
             prompt,
             "award winning",
             "high resolution"
@@ -111,10 +115,9 @@ export async function newRender({
         ? huggingFaceInferenceEndpointUrl
         : `https://api-inference.huggingface.co/models/${huggingFaceInferenceApiBaseModel}`
       const positivePrompt = [
         "beautiful",
-        "intricate details",
         prompt,
         "award winning",
         "high resolution"
@@ -129,12 +132,14 @@ export async function newRender({
         body: JSON.stringify({
           inputs: positivePrompt,
           parameters: {
-            num_inference_steps: 25,
-            guidance_scale: 8,
             width,
             height,
           },
-          use_cache: false,
         }),
         cache: "no-store",
         // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
@@ -159,13 +164,12 @@ export async function newRender({
       // note: there is no "refiner" step yet for custom inference endpoint
       // you probably don't need it anyway, as you probably want to deploy an all-in-one model instead for perf reasons
-      // update: right now it is not possible to use it from the Inference API either:
-      // "Model type not found or pipeline not implemented"
-      /*
       if (renderingEngine === "INFERENCE_API") {
         try {
           const refinerModelUrl = `https://api-inference.huggingface.co/models/${huggingFaceInferenceApiRefinerModel}`
           const res = await fetch(refinerModelUrl, {
             method: "POST",
             headers: {
@@ -173,15 +177,17 @@ export async function newRender({
               Authorization: `Bearer ${huggingFaceToken}`,
             },
             body: JSON.stringify({
-              data: assetUrl,
               parameters: {
                 prompt: positivePrompt,
-                num_inference_steps: 25,
-                guidance_scale: 8,
                 width,
                 height,
               },
-              use_cache: false,
             }),
             cache: "no-store",
             // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
@@ -191,22 +197,22 @@ export async function newRender({
           // Recommendation: handle errors
           if (res.status !== 200) {
-            const content = await res.text()
-            console.error(content)
-            // This will activate the closest `error.js` Error Boundary
-            throw new Error('Failed to fetch data')
           }
-          const blob = await res.arrayBuffer()
           const contentType = res.headers.get('content-type')
-          assetUrl = `data:${contentType};base64,${Buffer.from(blob).toString('base64')}`
         } catch (err) {
           console.log(`Refiner step failed, but this is not a blocker. Error details: ${err}`)
         }
-      } */
       return {
@@ -230,7 +236,7 @@ export async function newRender({
           prompt,
           // negativePrompt, unused for now
           nbFrames: 1,
-          nbSteps: 30, // 20 = fast, 30 = better, 50 = best
           actionnables: [], // ["text block"],
           segmentation: "disabled", // "firstframe", // one day we will remove this param, to make it automatic
           width,

   prompt,
   // negativePrompt,
   width,
+  height,
+  withCache
 }: {
   prompt: string
   // negativePrompt: string[]
   width: number
   height: number
+  withCache: boolean
 }) {
   if (!prompt) {
     const error = `cannot call the rendering API without a prompt, aborting..`
     segments: []
   }
+  const nbInferenceSteps = 30
+  const guidanceScale = 9
   try {
     if (renderingEngine === "REPLICATE") {
         input: {
           prompt: [
             "beautiful",
+            // "intricate details",
             prompt,
             "award winning",
             "high resolution"
         ? huggingFaceInferenceEndpointUrl
         : `https://api-inference.huggingface.co/models/${huggingFaceInferenceApiBaseModel}`
       const positivePrompt = [
         "beautiful",
+        // "intricate details",
         prompt,
         "award winning",
         "high resolution"
         body: JSON.stringify({
           inputs: positivePrompt,
           parameters: {
+            num_inference_steps: nbInferenceSteps,
+            guidance_scale: guidanceScale,
             width,
             height,
           },
+          // this doesn't do what you think it does
+          use_cache: false, // withCache,
         }),
         cache: "no-store",
         // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
       // note: there is no "refiner" step yet for custom inference endpoint
       // you probably don't need it anyway, as you probably want to deploy an all-in-one model instead for perf reasons
       if (renderingEngine === "INFERENCE_API") {
         try {
           const refinerModelUrl = `https://api-inference.huggingface.co/models/${huggingFaceInferenceApiRefinerModel}`
           const res = await fetch(refinerModelUrl, {
             method: "POST",
             headers: {
               Authorization: `Bearer ${huggingFaceToken}`,
             },
             body: JSON.stringify({
+              inputs: Buffer.from(blob).toString('base64'),
               parameters: {
                 prompt: positivePrompt,
+                num_inference_steps: nbInferenceSteps,
+                guidance_scale: guidanceScale,
                 width,
                 height,
               },
+              // this doesn't do what you think it does
+              use_cache: false, // withCache,
             }),
             cache: "no-store",
             // we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
           // Recommendation: handle errors
           if (res.status !== 200) {
+            const content = await res.json()
+            // if (content.error.include("currently loading")) {
+            // console.log("refiner isn't ready yet")
+            throw new Error(content?.error || 'Failed to fetch data')
           }
+          const refinedBlob = await res.arrayBuffer()
           const contentType = res.headers.get('content-type')
+          assetUrl = `data:${contentType};base64,${Buffer.from(refinedBlob).toString('base64')}`
         } catch (err) {
           console.log(`Refiner step failed, but this is not a blocker. Error details: ${err}`)
         }
+      }
       return {
           prompt,
           // negativePrompt, unused for now
           nbFrames: 1,
+          nbSteps: nbInferenceSteps, // 20 = fast, 30 = better, 50 = best
           actionnables: [], // ["text block"],
           segmentation: "disabled", // "firstframe", // one day we will remove this param, to make it automatic
           width,

src/app/interface/panel/index.tsx CHANGED Viewed

@@ -63,7 +63,12 @@ export function Panel({
   const delay = enableRateLimiter ? (1000 + (500 * panel)) : 1000
-  const startImageGeneration = ({ prompt, width, height }: { prompt: string, width: number, height: number}) => {
     if (!prompt?.length) { return }
     // important: update the status, and clear the scene
@@ -75,12 +80,37 @@ export function Panel({
     setTimeout(() => {
       startTransition(async () => {
         let newRendered: RenderedScene
         try {
-          newRendered = await newRender({ prompt, width, height })
         } catch (err) {
           // "Failed to load the panel! Don't worry, we are retrying..")
-          newRendered = await newRender({ prompt, width, height })
         }
         if (newRendered) {
@@ -133,7 +163,12 @@ export function Panel({
         } else if (newRendered.status === "error" ||
         (newRendered.status === "completed" && !newRendered.assetUrl?.length)) {
           try {
-            const newAttempt = await newRender({ prompt, width, height })
             setRendered(panelId, newAttempt)
           } catch (err) {
             console.error("yeah sorry, something is wrong.. aborting", err)
@@ -154,7 +189,7 @@ export function Panel({
   useEffect(() => {
     if (!prompt.length) { return }
-    startImageGeneration({ prompt, width, height })
     clearTimeout(timeoutRef.current)

   const delay = enableRateLimiter ? (1000 + (500 * panel)) : 1000
+  const startImageGeneration = ({ prompt, width, height, revision }: {
+    prompt: string
+    width: number
+    height: number
+    revision: number
+  }) => {
     if (!prompt?.length) { return }
     // important: update the status, and clear the scene
     setTimeout(() => {
       startTransition(async () => {
+        const withCache = revision === 0
+        // atrocious and very, very, very, very, very, very, very ugly hack for the Inference API
+        // as apparently "use_cache: false" doesn't work, or doesn't do what we want it to do
+        let cacheInvalidationHack = ""
+        const nbMaxRevisions = 6
+        for (let i = 0; i < revision && revision < nbMaxRevisions; i++) {
+          const j =  Math.random()
+          cacheInvalidationHack += j < 0.3 ? "_" : j < 0.6 ? "," : "-"
+        }
         let newRendered: RenderedScene
         try {
+          newRendered = await newRender({
+            prompt: cacheInvalidationHack + " " + prompt,
+            width,
+            height,
+            // TODO: here we never reset the revision, so only the first user
+            // comic will be cached (we should fix that later)
+            withCache: revision === 0
+          })
         } catch (err) {
           // "Failed to load the panel! Don't worry, we are retrying..")
+          newRendered = await newRender({
+            prompt: cacheInvalidationHack + "   " + prompt,
+            width,
+            height,
+            withCache,
+          })
         }
         if (newRendered) {
         } else if (newRendered.status === "error" ||
         (newRendered.status === "completed" && !newRendered.assetUrl?.length)) {
           try {
+            const newAttempt = await newRender({
+              prompt,
+              width,
+              height,
+              withCache: false,
+            })
             setRendered(panelId, newAttempt)
           } catch (err) {
             console.error("yeah sorry, something is wrong.. aborting", err)
   useEffect(() => {
     if (!prompt.length) { return }
+    startImageGeneration({ prompt, width, height, revision })
     clearTimeout(timeoutRef.current)

src/app/main.tsx CHANGED Viewed

@@ -10,6 +10,7 @@ import { Zoom } from "./interface/zoom"
 import { getStory } from "./queries/getStory"
 import { BottomBar } from "./interface/bottom-bar"
 import { Page } from "./interface/page"
 export default function Main() {
   const [_isPending, startTransition] = useTransition()
@@ -41,42 +42,57 @@ export default function Main() {
       // I don't think we are going to need a rate limiter on the LLM part anymore
       const enableRateLimiter = false // `${process.env.NEXT_PUBLIC_ENABLE_RATE_LIMITER}`  === "true"
-      try {
-        const llmResponse = await getStory({ preset, prompt })
-        console.log("LLM responded:", llmResponse)
-        // we have to limit the size of the prompt, otherwise the rest of the style won't be followed
-        let limitedPrompt = prompt.slice(0, 77)
-        if (limitedPrompt.length !== prompt.length) {
-          console.log("Sorry folks, the prompt was cut to:", limitedPrompt)
-        }
-        const panelPromptPrefix = preset.imagePrompt(limitedPrompt).join(", ")
-        const nbPanels = 4
-        const newPanels: string[] = []
-        const newCaptions: string[] = []
-        setWaitABitMore(true)
-        console.log("Panel prompts for SDXL:")
         for (let p = 0; p < nbPanels; p++) {
-          newCaptions.push(llmResponse[p]?.caption || "...")
-          const newPanel = [panelPromptPrefix, llmResponse[p]?.instructions || ""].map(chunk => chunk).join(", ")
-          newPanels.push(newPanel)
-          console.log(newPanel)
         }
-        setCaptions(newCaptions)
-        setPanels(newPanels)
-      } catch (err) {
         console.error(err)
-      } finally {
-        setTimeout(() => {
-          setGeneratingStory(false)
-          setWaitABitMore(false)
-        }, enableRateLimiter ? 12000 : 0)
       }
     })
   }, [prompt, preset?.label]) // important: we need to react to preset changes too

 import { getStory } from "./queries/getStory"
 import { BottomBar } from "./interface/bottom-bar"
 import { Page } from "./interface/page"
+import { LLMResponse } from "@/types"
 export default function Main() {
   const [_isPending, startTransition] = useTransition()
       // I don't think we are going to need a rate limiter on the LLM part anymore
       const enableRateLimiter = false // `${process.env.NEXT_PUBLIC_ENABLE_RATE_LIMITER}`  === "true"
+      const nbPanels = 4
+      let llmResponse: LLMResponse = []
+      try {
+        llmResponse = await getStory({ preset, prompt })
+        console.log("LLM responded:", llmResponse)
+      } catch (err) {
+        console.log("LLM step failed due to:", err)
+        console.log("we are now switching to a degraded mode, using 4 similar panels")
+        llmResponse = []
         for (let p = 0; p < nbPanels; p++) {
+          llmResponse.push({
+            panel: p,
+            instructions: `${prompt} ${".".repeat(p)}`,
+            caption: "(Sorry, LLM generation failed: using degraded mode)"
+          })
         }
         console.error(err)
       }
+      // we have to limit the size of the prompt, otherwise the rest of the style won't be followed
+      let limitedPrompt = prompt.slice(0, 77)
+      if (limitedPrompt.length !== prompt.length) {
+        console.log("Sorry folks, the prompt was cut to:", limitedPrompt)
+      }
+      const panelPromptPrefix = preset.imagePrompt(limitedPrompt).join(", ")
+      const newPanels: string[] = []
+      const newCaptions: string[] = []
+      setWaitABitMore(true)
+      console.log("Panel prompts for SDXL:")
+      for (let p = 0; p < nbPanels; p++) {
+        newCaptions.push(llmResponse[p]?.caption || "...")
+        const newPanel = [panelPromptPrefix, llmResponse[p]?.instructions || ""].map(chunk => chunk).join(", ")
+        newPanels.push(newPanel)
+        console.log(newPanel)
+      }
+      setCaptions(newCaptions)
+      setPanels(newPanels)
+      setTimeout(() => {
+        setGeneratingStory(false)
+        setWaitABitMore(false)
+      }, enableRateLimiter ? 12000 : 0)
     })
   }, [prompt, preset?.label]) // important: we need to react to preset changes too

src/app/queries/getStory.ts CHANGED Viewed

@@ -25,9 +25,9 @@ export const getStory = async ({
       content: [
         `You are a comic book author specialized in ${preset.llmPrompt}`,
         `Please write detailed drawing instructions and a one-sentence short caption for the 4 panels of a new silent comic book page.`,
-        `Give your response as a JSON array like this: \`Array<{ panel: number; instructions: string; caption: string}>\`.`,
         // `Give your response as Markdown bullet points.`,
-        `Be brief in your 4 instructions and captions, don't add your own comments. Be straight to the point, and never reply things like "Sure, I can.." etc.`
       ].filter(item => item).join("\n")
     },
     {

       content: [
         `You are a comic book author specialized in ${preset.llmPrompt}`,
         `Please write detailed drawing instructions and a one-sentence short caption for the 4 panels of a new silent comic book page.`,
+        `Give your response as a VALID JSON array like this: \`Array<{ panel: number; instructions: string; caption: string}>\`.`,
         // `Give your response as Markdown bullet points.`,
+        `Be brief in your 4 instructions and captions, don't add your own comments. Be straight to the point, and never reply things like "Sure, I can.." etc. Reply using valid JSON.`
       ].filter(item => item).join("\n")
     },
     {

src/app/queries/predictWithHuggingFace.ts CHANGED Viewed

@@ -72,6 +72,11 @@ export async function predict(inputs: string): Promise<string> {
     }
   } catch (err) {
     console.error(`error during generation: ${err}`)
   }
   // need to do some cleanup of the garbage the LLM might have gave us

     }
   } catch (err) {
     console.error(`error during generation: ${err}`)
+    // a common issue with Llama-2 might be that the model receives too many requests
+    if (`${err}` === "Error: Model is overloaded") {
+      instructions = ``
+    }
   }
   // need to do some cleanup of the garbage the LLM might have gave us