Spaces:
Running
Running
Commit
•
1cef24b
1
Parent(s):
0176e5b
clusterize musicgen for today's release
Browse files- package-lock.json +17 -17
- package.json +3 -3
- src/app/api/generators/music/generateMusicAsBase64.ts +0 -72
- src/app/api/v1/create/index.ts +25 -0
- src/app/api/v1/edit/entities/generateEntityPrompts.ts +1 -1
- src/app/api/v1/edit/music/cluster.ts +44 -0
- src/app/api/v1/edit/music/generateMusic.ts +33 -6
- src/app/api/v1/edit/music/generateMusicPrompt.ts +1 -1
- src/app/api/{generators → v1/edit}/music/generateMusicWithMusicgen.ts +17 -31
- src/app/api/v1/edit/music/systemPrompt.ts +5 -0
- src/app/api/{generators → v1/edit}/music/types.ts +0 -0
- src/app/api/v1/export/route.ts +1 -0
- src/app/api/v1/render/cluster.ts +1 -5
- src/app/api/v1/types.ts +6 -0
package-lock.json
CHANGED
@@ -8,9 +8,9 @@
|
|
8 |
"name": "@aitube/website",
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
-
"@aitube/clap": "0.0.
|
12 |
-
"@aitube/client": "0.0.
|
13 |
-
"@aitube/engine": "0.0.
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
16 |
"@jcoreio/async-throttle": "^1.6.0",
|
@@ -118,9 +118,9 @@
|
|
118 |
}
|
119 |
},
|
120 |
"node_modules/@aitube/clap": {
|
121 |
-
"version": "0.0.
|
122 |
-
"resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.
|
123 |
-
"integrity": "sha512-
|
124 |
"dependencies": {
|
125 |
"pure-uuid": "^1.8.1",
|
126 |
"yaml": "^2.4.1"
|
@@ -130,22 +130,22 @@
|
|
130 |
}
|
131 |
},
|
132 |
"node_modules/@aitube/client": {
|
133 |
-
"version": "0.0.
|
134 |
-
"resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.
|
135 |
-
"integrity": "sha512-
|
136 |
"dependencies": {
|
137 |
"query-string": "^9.0.0"
|
138 |
},
|
139 |
"peerDependencies": {
|
140 |
-
"@aitube/clap": "0.0.
|
141 |
}
|
142 |
},
|
143 |
"node_modules/@aitube/engine": {
|
144 |
-
"version": "0.0.
|
145 |
-
"resolved": "https://registry.npmjs.org/@aitube/engine/-/engine-0.0.
|
146 |
-
"integrity": "sha512-
|
147 |
"peerDependencies": {
|
148 |
-
"@aitube/clap": "0.0.
|
149 |
}
|
150 |
},
|
151 |
"node_modules/@alloc/quick-lru": {
|
@@ -6081,9 +6081,9 @@
|
|
6081 |
}
|
6082 |
},
|
6083 |
"node_modules/jose": {
|
6084 |
-
"version": "5.
|
6085 |
-
"resolved": "https://registry.npmjs.org/jose/-/jose-5.
|
6086 |
-
"integrity": "sha512-
|
6087 |
"funding": {
|
6088 |
"url": "https://github.com/sponsors/panva"
|
6089 |
}
|
|
|
8 |
"name": "@aitube/website",
|
9 |
"version": "0.0.0",
|
10 |
"dependencies": {
|
11 |
+
"@aitube/clap": "0.0.17",
|
12 |
+
"@aitube/client": "0.0.25",
|
13 |
+
"@aitube/engine": "0.0.7",
|
14 |
"@huggingface/hub": "0.12.3-oauth",
|
15 |
"@huggingface/inference": "^2.6.7",
|
16 |
"@jcoreio/async-throttle": "^1.6.0",
|
|
|
118 |
}
|
119 |
},
|
120 |
"node_modules/@aitube/clap": {
|
121 |
+
"version": "0.0.17",
|
122 |
+
"resolved": "https://registry.npmjs.org/@aitube/clap/-/clap-0.0.17.tgz",
|
123 |
+
"integrity": "sha512-g/jjePX2Hz9Eo4hk+rxd6FRwoy2Hx9sadGLgN9yWSm7dGHhr9B/DVv8eLjFabu7jgW0zvZZ1FHvlsNAby4Pr/Q==",
|
124 |
"dependencies": {
|
125 |
"pure-uuid": "^1.8.1",
|
126 |
"yaml": "^2.4.1"
|
|
|
130 |
}
|
131 |
},
|
132 |
"node_modules/@aitube/client": {
|
133 |
+
"version": "0.0.25",
|
134 |
+
"resolved": "https://registry.npmjs.org/@aitube/client/-/client-0.0.25.tgz",
|
135 |
+
"integrity": "sha512-gX5eJOKiigVY3xK1NcsStruUuWMQbj4o4XHTceZhUyKCgHDldC0Y15mvIWVabCtEW5FFebdmhH0EFeg+PBMCsg==",
|
136 |
"dependencies": {
|
137 |
"query-string": "^9.0.0"
|
138 |
},
|
139 |
"peerDependencies": {
|
140 |
+
"@aitube/clap": "0.0.17"
|
141 |
}
|
142 |
},
|
143 |
"node_modules/@aitube/engine": {
|
144 |
+
"version": "0.0.7",
|
145 |
+
"resolved": "https://registry.npmjs.org/@aitube/engine/-/engine-0.0.7.tgz",
|
146 |
+
"integrity": "sha512-Bu3MhQ7DelO+K/lU82MGSu1ksf3IUi3L9q1E2WfdBh4bkI0Kq8hH+OgUFKp38e5t1zhwjY2lJPNCbAJau5RcBA==",
|
147 |
"peerDependencies": {
|
148 |
+
"@aitube/clap": "0.0.17"
|
149 |
}
|
150 |
},
|
151 |
"node_modules/@alloc/quick-lru": {
|
|
|
6081 |
}
|
6082 |
},
|
6083 |
"node_modules/jose": {
|
6084 |
+
"version": "5.3.0",
|
6085 |
+
"resolved": "https://registry.npmjs.org/jose/-/jose-5.3.0.tgz",
|
6086 |
+
"integrity": "sha512-IChe9AtAE79ru084ow8jzkN2lNrG3Ntfiv65Cvj9uOCE2m5LNsdHG+9EbxWxAoWRF9TgDOqLN5jm08++owDVRg==",
|
6087 |
"funding": {
|
6088 |
"url": "https://github.com/sponsors/panva"
|
6089 |
}
|
package.json
CHANGED
@@ -10,9 +10,9 @@
|
|
10 |
"lint": "next lint"
|
11 |
},
|
12 |
"dependencies": {
|
13 |
-
"@aitube/clap": "0.0.
|
14 |
-
"@aitube/client": "0.0.
|
15 |
-
"@aitube/engine": "0.0.
|
16 |
"@huggingface/hub": "0.12.3-oauth",
|
17 |
"@huggingface/inference": "^2.6.7",
|
18 |
"@jcoreio/async-throttle": "^1.6.0",
|
|
|
10 |
"lint": "next lint"
|
11 |
},
|
12 |
"dependencies": {
|
13 |
+
"@aitube/clap": "0.0.17",
|
14 |
+
"@aitube/client": "0.0.25",
|
15 |
+
"@aitube/engine": "0.0.7",
|
16 |
"@huggingface/hub": "0.12.3-oauth",
|
17 |
"@huggingface/inference": "^2.6.7",
|
18 |
"@jcoreio/async-throttle": "^1.6.0",
|
src/app/api/generators/music/generateMusicAsBase64.ts
DELETED
@@ -1,72 +0,0 @@
|
|
1 |
-
import { sleep } from "@/lib/utils/sleep"
|
2 |
-
import { generateMusicWithMusicgen } from "./generateMusicWithMusicgen"
|
3 |
-
|
4 |
-
// apparently if we ask to generate like 4 minutes of audio, it crashes
|
5 |
-
const maxAudioDurationInSec = 120
|
6 |
-
|
7 |
-
// generate music
|
8 |
-
// this may generate multiple tracks (one after another)
|
9 |
-
// if the durationInSec parameter is larger than the max audio duration
|
10 |
-
export async function generateMusicAsBase64({
|
11 |
-
prompt,
|
12 |
-
durationInSec,
|
13 |
-
hd = false,
|
14 |
-
}: {
|
15 |
-
prompt: string
|
16 |
-
durationInSec: number
|
17 |
-
|
18 |
-
// use diffusion (increases quality, but requires more RAM)
|
19 |
-
hd?: boolean
|
20 |
-
}): Promise<string[]> {
|
21 |
-
|
22 |
-
const musicPrompt = prompt || ""
|
23 |
-
|
24 |
-
if (durationInSec < 1 || !musicPrompt) { return [] }
|
25 |
-
|
26 |
-
if (durationInSec > maxAudioDurationInSec) {
|
27 |
-
const halfTheDuration = Math.round(durationInSec / 2)
|
28 |
-
|
29 |
-
// no! we shouldn't generate them in parallel
|
30 |
-
// or at least, no now, because we only have ONE music server!
|
31 |
-
// const chunks = await Promise.all([
|
32 |
-
// generateMusic({ video, durationInSec: halfTheDuration })
|
33 |
-
//])
|
34 |
-
// return chunks.reduce((acc, tracks) => ([...acc, ...tracks]), [])
|
35 |
-
|
36 |
-
// instead, let's play it safe and generate them one after another
|
37 |
-
let chunks: string[] = []
|
38 |
-
const partA = await generateMusicAsBase64({ prompt, hd, durationInSec: halfTheDuration })
|
39 |
-
if (partA) { chunks = chunks.concat(partA) }
|
40 |
-
|
41 |
-
const partB = await generateMusicAsBase64({ prompt, hd, durationInSec: halfTheDuration })
|
42 |
-
if (partB) { chunks = chunks.concat(partB) }
|
43 |
-
|
44 |
-
return [...partA, ...partB]
|
45 |
-
}
|
46 |
-
|
47 |
-
let musicTracks: string[] = []
|
48 |
-
|
49 |
-
const musicParams = {
|
50 |
-
prompt: musicPrompt,
|
51 |
-
durationInSec,
|
52 |
-
hd,
|
53 |
-
}
|
54 |
-
try {
|
55 |
-
console.log(` |- generating ${durationInSec} seconds of music..`)
|
56 |
-
const musicTrack = await generateMusicWithMusicgen(musicParams)
|
57 |
-
if (!musicTrack?.length) { throw new Error("audio is too short to be valid!")}
|
58 |
-
musicTracks.push(musicTrack)
|
59 |
-
} catch (err) {
|
60 |
-
try {
|
61 |
-
await sleep(4000)
|
62 |
-
const musicTrack = await generateMusicWithMusicgen(musicParams)
|
63 |
-
if (!musicTrack?.length) { throw new Error("audio is too short to be valid!")}
|
64 |
-
musicTracks.push(musicTrack)
|
65 |
-
} catch (err2) {
|
66 |
-
console.error(` |- failed to generate the music (yes, we retried after a delay)`)
|
67 |
-
}
|
68 |
-
}
|
69 |
-
|
70 |
-
|
71 |
-
return musicTracks
|
72 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/app/api/v1/create/index.ts
CHANGED
@@ -131,49 +131,59 @@ Output: `
|
|
131 |
clap.segments.push(newSegment({
|
132 |
track: 0,
|
133 |
startTimeInMs: currentElapsedTimeInMs,
|
|
|
134 |
assetDurationInMs: defaultSegmentDurationInMs,
|
135 |
category: ClapSegmentCategory.VIDEO,
|
136 |
prompt: image,
|
137 |
outputType: ClapOutputType.VIDEO,
|
|
|
138 |
}))
|
139 |
|
140 |
clap.segments.push(newSegment({
|
141 |
track: 1,
|
142 |
startTimeInMs: currentElapsedTimeInMs,
|
|
|
143 |
assetDurationInMs: defaultSegmentDurationInMs,
|
144 |
category: ClapSegmentCategory.STORYBOARD,
|
145 |
prompt: image,
|
146 |
outputType: ClapOutputType.IMAGE,
|
|
|
147 |
}))
|
148 |
|
149 |
clap.segments.push(newSegment({
|
150 |
track: 2,
|
151 |
startTimeInMs: currentElapsedTimeInMs,
|
|
|
152 |
assetDurationInMs: defaultSegmentDurationInMs,
|
153 |
category: ClapSegmentCategory.INTERFACE,
|
154 |
prompt: comment,
|
155 |
// assetUrl: `data:text/plain;base64,${btoa(comment)}`,
|
156 |
assetUrl: comment,
|
157 |
outputType: ClapOutputType.TEXT,
|
|
|
158 |
}))
|
159 |
|
160 |
clap.segments.push(newSegment({
|
161 |
track: 3,
|
162 |
startTimeInMs: currentElapsedTimeInMs,
|
|
|
163 |
assetDurationInMs: defaultSegmentDurationInMs,
|
164 |
category: ClapSegmentCategory.DIALOGUE,
|
165 |
prompt: voice,
|
166 |
outputType: ClapOutputType.AUDIO,
|
|
|
167 |
}))
|
168 |
|
169 |
// the presence of a camera is mandatory
|
170 |
clap.segments.push(newSegment({
|
171 |
track: 4,
|
172 |
startTimeInMs: currentElapsedTimeInMs,
|
|
|
173 |
assetDurationInMs: defaultSegmentDurationInMs,
|
174 |
category: ClapSegmentCategory.CAMERA,
|
175 |
prompt: "video",
|
176 |
outputType: ClapOutputType.TEXT,
|
|
|
177 |
}))
|
178 |
|
179 |
currentElapsedTimeInMs += defaultSegmentDurationInMs
|
@@ -187,6 +197,21 @@ Output: `
|
|
187 |
prompt,
|
188 |
latentStory: await clapToLatentStory(clap)
|
189 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
} catch (err) {
|
191 |
console.error(`[api/v1/create] failed to generate music prompts`)
|
192 |
musicPrompts.push("lofi hiphop loop")
|
|
|
131 |
clap.segments.push(newSegment({
|
132 |
track: 0,
|
133 |
startTimeInMs: currentElapsedTimeInMs,
|
134 |
+
endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
|
135 |
assetDurationInMs: defaultSegmentDurationInMs,
|
136 |
category: ClapSegmentCategory.VIDEO,
|
137 |
prompt: image,
|
138 |
outputType: ClapOutputType.VIDEO,
|
139 |
+
status: "to_generate",
|
140 |
}))
|
141 |
|
142 |
clap.segments.push(newSegment({
|
143 |
track: 1,
|
144 |
startTimeInMs: currentElapsedTimeInMs,
|
145 |
+
endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
|
146 |
assetDurationInMs: defaultSegmentDurationInMs,
|
147 |
category: ClapSegmentCategory.STORYBOARD,
|
148 |
prompt: image,
|
149 |
outputType: ClapOutputType.IMAGE,
|
150 |
+
status: "to_generate",
|
151 |
}))
|
152 |
|
153 |
clap.segments.push(newSegment({
|
154 |
track: 2,
|
155 |
startTimeInMs: currentElapsedTimeInMs,
|
156 |
+
endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
|
157 |
assetDurationInMs: defaultSegmentDurationInMs,
|
158 |
category: ClapSegmentCategory.INTERFACE,
|
159 |
prompt: comment,
|
160 |
// assetUrl: `data:text/plain;base64,${btoa(comment)}`,
|
161 |
assetUrl: comment,
|
162 |
outputType: ClapOutputType.TEXT,
|
163 |
+
status: "to_generate",
|
164 |
}))
|
165 |
|
166 |
clap.segments.push(newSegment({
|
167 |
track: 3,
|
168 |
startTimeInMs: currentElapsedTimeInMs,
|
169 |
+
endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
|
170 |
assetDurationInMs: defaultSegmentDurationInMs,
|
171 |
category: ClapSegmentCategory.DIALOGUE,
|
172 |
prompt: voice,
|
173 |
outputType: ClapOutputType.AUDIO,
|
174 |
+
status: "to_generate",
|
175 |
}))
|
176 |
|
177 |
// the presence of a camera is mandatory
|
178 |
clap.segments.push(newSegment({
|
179 |
track: 4,
|
180 |
startTimeInMs: currentElapsedTimeInMs,
|
181 |
+
endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
|
182 |
assetDurationInMs: defaultSegmentDurationInMs,
|
183 |
category: ClapSegmentCategory.CAMERA,
|
184 |
prompt: "video",
|
185 |
outputType: ClapOutputType.TEXT,
|
186 |
+
status: "to_generate",
|
187 |
}))
|
188 |
|
189 |
currentElapsedTimeInMs += defaultSegmentDurationInMs
|
|
|
197 |
prompt,
|
198 |
latentStory: await clapToLatentStory(clap)
|
199 |
})
|
200 |
+
const musicPrompt = musicPrompts.at(0)
|
201 |
+
if (!musicPrompt) { throw new Error(`not enough music prompts`) }
|
202 |
+
|
203 |
+
console.log("musicPrompt:", musicPrompt)
|
204 |
+
|
205 |
+
clap.segments.push(newSegment({
|
206 |
+
track: 5,
|
207 |
+
startTimeInMs: 0,
|
208 |
+
endTimeInMs: currentElapsedTimeInMs,
|
209 |
+
assetDurationInMs: currentElapsedTimeInMs,
|
210 |
+
category: ClapSegmentCategory.MUSIC,
|
211 |
+
prompt: musicPrompt,
|
212 |
+
outputType: ClapOutputType.AUDIO,
|
213 |
+
status: "to_generate",
|
214 |
+
}))
|
215 |
} catch (err) {
|
216 |
console.error(`[api/v1/create] failed to generate music prompts`)
|
217 |
musicPrompts.push("lofi hiphop loop")
|
src/app/api/v1/edit/entities/generateEntityPrompts.ts
CHANGED
@@ -37,7 +37,7 @@ export async function generateEntityPrompts({
|
|
37 |
|
38 |
if (!latentStory.length) { throw new Error(`please provide a story`) }
|
39 |
|
40 |
-
console.log("generateEntityPrompts(): latentStory:", latentStory)
|
41 |
|
42 |
const userPrompt = `The input story is about: ${prompt}.
|
43 |
|
|
|
37 |
|
38 |
if (!latentStory.length) { throw new Error(`please provide a story`) }
|
39 |
|
40 |
+
// console.log("generateEntityPrompts(): latentStory:", latentStory)
|
41 |
|
42 |
const userPrompt = `The input story is about: ${prompt}.
|
43 |
|
src/app/api/v1/edit/music/cluster.ts
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { sleep } from "@/lib/utils/sleep"
|
2 |
+
import { ClusterMachine } from "../../types"
|
3 |
+
|
4 |
+
export const nbClusterMachines = 3
|
5 |
+
// make sure the machines are running!!
|
6 |
+
|
7 |
+
// https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-musicgen-1/settings
|
8 |
+
// https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-musicgen-2/settings
|
9 |
+
// https://huggingface.co/spaces/jbilcke-hf/ai-tube-model-musicgen-3/settings
|
10 |
+
|
11 |
+
// we maintain a global cluster state
|
12 |
+
|
13 |
+
export const clusterMachines: ClusterMachine[] = []
|
14 |
+
for (let i = 0; i < nbClusterMachines; i++) {
|
15 |
+
clusterMachines.push({
|
16 |
+
id: i,
|
17 |
+
url: `https://jbilcke-hf-ai-tube-model-musicgen-${i + 1}.hf.space`,
|
18 |
+
busy: false
|
19 |
+
})
|
20 |
+
}
|
21 |
+
|
22 |
+
export async function getClusterMachine(maxWaitTimeInMs: number = 10000): Promise<ClusterMachine> {
|
23 |
+
let clusterMachine: ClusterMachine | undefined = undefined
|
24 |
+
let timeSpentWaitingInMs = 0
|
25 |
+
const intervalInMs = 500
|
26 |
+
|
27 |
+
while (true) {
|
28 |
+
clusterMachine = clusterMachines.find(m => !m.busy)
|
29 |
+
if (clusterMachine) { break }
|
30 |
+
if (timeSpentWaitingInMs > maxWaitTimeInMs) { break }
|
31 |
+
await sleep(intervalInMs)
|
32 |
+
}
|
33 |
+
|
34 |
+
if (!clusterMachine) {
|
35 |
+
throw new Error(`failed to find a cluster machine within ${maxWaitTimeInMs/10} seconds`)
|
36 |
+
}
|
37 |
+
|
38 |
+
// change the global state
|
39 |
+
clusterMachine.busy = true
|
40 |
+
|
41 |
+
return clusterMachine
|
42 |
+
}
|
43 |
+
|
44 |
+
export const token = `${process.env.MICROSERVICE_API_SECRET_TOKEN || ""}`
|
src/app/api/v1/edit/music/generateMusic.ts
CHANGED
@@ -13,7 +13,7 @@ import { getSpeechBackgroundAudioPrompt } from "@aitube/engine"
|
|
13 |
|
14 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
15 |
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
|
16 |
-
import { generateMusicWithMusicgen } from "@/app/api/
|
17 |
|
18 |
export async function generateMusic({
|
19 |
musicSegment,
|
@@ -32,6 +32,14 @@ export async function generateMusic({
|
|
32 |
console.log(`generateMusic(): music segment is empty, so skipping music generation.`)
|
33 |
return
|
34 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
// for now we do something very basic
|
36 |
const prompt = musicSegment.prompt
|
37 |
if (!prompt) {
|
@@ -39,28 +47,47 @@ export async function generateMusic({
|
|
39 |
return
|
40 |
}
|
41 |
|
|
|
|
|
|
|
|
|
42 |
const assetUrl = await generateMusicWithMusicgen({
|
43 |
prompt,
|
44 |
-
durationInSec
|
45 |
hd: false,
|
46 |
debug: true,
|
47 |
neverThrow: true,
|
48 |
})
|
49 |
|
|
|
50 |
if (!assetUrl || assetUrl?.length < 30) {
|
51 |
-
console.log(`generateMusic(): generated assetUrl is empty, so music generation failed.`)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
return
|
53 |
}
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
if (mode !== ClapCompletionMode.FULL) {
|
56 |
console.log(`generateMusic(): adding music to a new clap file`)
|
57 |
newerClap.segments.push(newSegment({
|
58 |
...musicSegment,
|
59 |
-
|
60 |
}))
|
61 |
} else {
|
62 |
console.log(`generateMusic(): overwriting the music inside the existing clap file`)
|
63 |
-
// this will
|
64 |
-
musicSegment
|
65 |
}
|
66 |
}
|
|
|
13 |
|
14 |
import { generateSpeechWithParlerTTS } from "@/app/api/generators/speech/generateVoiceWithParlerTTS"
|
15 |
import { getMediaInfo } from "@/app/api/utils/getMediaInfo"
|
16 |
+
import { generateMusicWithMusicgen } from "@/app/api/v1/edit/music/generateMusicWithMusicgen"
|
17 |
|
18 |
export async function generateMusic({
|
19 |
musicSegment,
|
|
|
32 |
console.log(`generateMusic(): music segment is empty, so skipping music generation.`)
|
33 |
return
|
34 |
}
|
35 |
+
|
36 |
+
// for now we do something very basic
|
37 |
+
|
38 |
+
if (musicSegment.status === "completed") {
|
39 |
+
console.log(`generateMusic(): music segment is already generated, skipping doing it twice.`)
|
40 |
+
return
|
41 |
+
}
|
42 |
+
|
43 |
// for now we do something very basic
|
44 |
const prompt = musicSegment.prompt
|
45 |
if (!prompt) {
|
|
|
47 |
return
|
48 |
}
|
49 |
|
50 |
+
const durationInSec = 10 // musicSegment.assetDurationInMs / 1000
|
51 |
+
|
52 |
+
console.log(`generateMusic(): generating a music with:\n duration: ${durationInSec} sec\n prompt: ${prompt}`)
|
53 |
+
|
54 |
const assetUrl = await generateMusicWithMusicgen({
|
55 |
prompt,
|
56 |
+
durationInSec,
|
57 |
hd: false,
|
58 |
debug: true,
|
59 |
neverThrow: true,
|
60 |
})
|
61 |
|
62 |
+
|
63 |
if (!assetUrl || assetUrl?.length < 30) {
|
64 |
+
console.log(`generateMusic(): the generated assetUrl is empty, so music generation failed.`)
|
65 |
+
return
|
66 |
+
}
|
67 |
+
|
68 |
+
const { durationInMs, hasAudio } = await getMediaInfo(assetUrl)
|
69 |
+
|
70 |
+
if (!hasAudio) {
|
71 |
+
console.log(`generateMusic(): the generated music waveform is silent, so music generation failed.`)
|
72 |
return
|
73 |
}
|
74 |
|
75 |
+
const newProperties: Partial<ClapSegment> = {
|
76 |
+
assetUrl,
|
77 |
+
assetDurationInMs: durationInMs,
|
78 |
+
outputGain: 1.0,
|
79 |
+
status: "completed"
|
80 |
+
}
|
81 |
+
|
82 |
if (mode !== ClapCompletionMode.FULL) {
|
83 |
console.log(`generateMusic(): adding music to a new clap file`)
|
84 |
newerClap.segments.push(newSegment({
|
85 |
...musicSegment,
|
86 |
+
...newProperties,
|
87 |
}))
|
88 |
} else {
|
89 |
console.log(`generateMusic(): overwriting the music inside the existing clap file`)
|
90 |
+
// this will update the existing clap (normally)
|
91 |
+
Object.assign(musicSegment, newProperties)
|
92 |
}
|
93 |
}
|
src/app/api/v1/edit/music/generateMusicPrompt.ts
CHANGED
@@ -26,7 +26,7 @@ export async function generateMusicPrompts({
|
|
26 |
|
27 |
if (!latentStory.length) { throw new Error(`please provide a story`) }
|
28 |
|
29 |
-
console.log("generateMusicPrompts(): latentStory:", latentStory)
|
30 |
|
31 |
const userPrompt = `The input story is about: ${prompt}.
|
32 |
|
|
|
26 |
|
27 |
if (!latentStory.length) { throw new Error(`please provide a story`) }
|
28 |
|
29 |
+
// console.log("generateMusicPrompts(): latentStory:", latentStory)
|
30 |
|
31 |
const userPrompt = `The input story is about: ${prompt}.
|
32 |
|
src/app/api/{generators → v1/edit}/music/generateMusicWithMusicgen.ts
RENAMED
@@ -1,7 +1,6 @@
|
|
1 |
import { addBase64Header } from "@/lib/data/addBase64Header"
|
2 |
-
|
3 |
-
import { tryApiCalls } from "../../utils/tryApiCall"
|
4 |
import { MusicGenerationParams } from "./types"
|
|
|
5 |
|
6 |
const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-musicgen.hf.space`
|
7 |
const huggingFaceSpace = "jbilcke-hf/ai-tube-model-musicgen"
|
@@ -18,16 +17,21 @@ export async function generateMusicWithMusicgen({
|
|
18 |
neverThrow = false,
|
19 |
}: MusicGenerationParams): Promise<string> {
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
|
|
|
24 |
method: "POST",
|
25 |
headers: {
|
26 |
"Content-Type": "application/json",
|
27 |
// Authorization: `Bearer ${token}`,
|
28 |
},
|
29 |
body: JSON.stringify({
|
30 |
-
fn_index:
|
31 |
data: [
|
32 |
microserviceApiKey, // string in 'Secret Token' Textbox component
|
33 |
"facebook/musicgen-stereo-large", // string in 'Model' Radio component
|
@@ -55,8 +59,7 @@ export async function generateMusicWithMusicgen({
|
|
55 |
if (res.status !== 200) {
|
56 |
throw new Error('Failed to fetch data')
|
57 |
}
|
58 |
-
|
59 |
-
|
60 |
const { data } = await res.json()
|
61 |
|
62 |
// console.log("data:", data)
|
@@ -66,34 +69,17 @@ export async function generateMusicWithMusicgen({
|
|
66 |
throw new Error(`Failed to fetch data (status: ${res.status})`)
|
67 |
}
|
68 |
// console.log("data:", data.slice(0, 50))
|
69 |
-
|
70 |
if (!data[0]) {
|
71 |
throw new Error(`the returned music was empty`)
|
72 |
}
|
73 |
-
|
74 |
-
console.log("data:", data[0].slice(0, 60))
|
75 |
-
return addBase64Header(data[0] as string, "mp3")
|
76 |
-
}
|
77 |
-
|
78 |
-
try {
|
79 |
-
if (!prompt?.length) {
|
80 |
-
throw new Error(`prompt is too short!`)
|
81 |
-
}
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
huggingFaceSpace,
|
86 |
-
debug,
|
87 |
-
failureMessage: "failed to generate the music"
|
88 |
-
})
|
89 |
-
|
90 |
-
return result
|
91 |
} catch (err) {
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
throw err
|
97 |
-
}
|
98 |
}
|
99 |
}
|
|
|
1 |
import { addBase64Header } from "@/lib/data/addBase64Header"
|
|
|
|
|
2 |
import { MusicGenerationParams } from "./types"
|
3 |
+
import { getClusterMachine } from "./cluster"
|
4 |
|
5 |
const gradioSpaceApiUrl = `https://jbilcke-hf-ai-tube-model-musicgen.hf.space`
|
6 |
const huggingFaceSpace = "jbilcke-hf/ai-tube-model-musicgen"
|
|
|
17 |
neverThrow = false,
|
18 |
}: MusicGenerationParams): Promise<string> {
|
19 |
|
20 |
+
if (!prompt?.length) {
|
21 |
+
throw new Error(`prompt is too short!`)
|
22 |
+
}
|
23 |
+
|
24 |
+
const machine = await getClusterMachine()
|
25 |
|
26 |
+
try {
|
27 |
+
const res = await fetch(machine.url + (machine.url.endsWith("/") ? "" : "/") + "api/predict", {
|
28 |
method: "POST",
|
29 |
headers: {
|
30 |
"Content-Type": "application/json",
|
31 |
// Authorization: `Bearer ${token}`,
|
32 |
},
|
33 |
body: JSON.stringify({
|
34 |
+
fn_index: 1, // <- important!
|
35 |
data: [
|
36 |
microserviceApiKey, // string in 'Secret Token' Textbox component
|
37 |
"facebook/musicgen-stereo-large", // string in 'Model' Radio component
|
|
|
59 |
if (res.status !== 200) {
|
60 |
throw new Error('Failed to fetch data')
|
61 |
}
|
62 |
+
|
|
|
63 |
const { data } = await res.json()
|
64 |
|
65 |
// console.log("data:", data)
|
|
|
69 |
throw new Error(`Failed to fetch data (status: ${res.status})`)
|
70 |
}
|
71 |
// console.log("data:", data.slice(0, 50))
|
72 |
+
|
73 |
if (!data[0]) {
|
74 |
throw new Error(`the returned music was empty`)
|
75 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
+
// console.log("data:", data[0].slice(0, 60))
|
78 |
+
return addBase64Header(data[0] as string, "mp3")
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
} catch (err) {
|
80 |
+
throw err
|
81 |
+
} finally {
|
82 |
+
// important: we need to free up the machine!
|
83 |
+
machine.busy = false
|
|
|
|
|
84 |
}
|
85 |
}
|
src/app/api/v1/edit/music/systemPrompt.ts
CHANGED
@@ -3,11 +3,16 @@ You are a backend API engine, designed to generate music prompt output from a st
|
|
3 |
|
4 |
## Prompting guidelines
|
5 |
|
|
|
|
|
|
|
6 |
To create a music prompt, you need to combine styles with moods, plus a few other things.
|
|
|
7 |
1. Please choose a base style among those categories: "Hip Hop and Rap track", "Classic track", "Jazz track", "Electronic and dance track", "Rock'n'Roll track", "Funk track", "Dubstep track", "Afrobeats", "Orchestral track", "Pop track", "Reggae track", "Metal track", "Country track", "Blues track", "Soul track", "R'n'B track", "Disco track", "Trap track", "Ambient track", "Lofi track", "Chill track", etc.
|
8 |
2. Then choose a vibe: "with an happy vibe", "with a sad vibe", "with an angry vibe", "with a chill vibe", "with a romantic vibe", "with an epic vibe", "with an energetic vibe", "with a dreamy vibe", "with a mysterious vibe", "with a relaxing vibe", "with a dark vibe", "with an upbeat vibe", "with a motivational vibe", "with an inspiring vibe", "with a nostalgic vibe", "with a groovy vibe", "with a cheerful vibe", "with a melancholic vibe", "with a hopeful vibe", etc.
|
9 |
3. build up a coherent description eg.: "80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums", "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions bpm: 130", "A cheerful country song with acoustic guitars", "lofi slow bpm electro chill with organic samples" etc.
|
10 |
|
|
|
11 |
## Example of input/output
|
12 |
|
13 |
Given the following input story, provided as YAML:
|
|
|
3 |
|
4 |
## Prompting guidelines
|
5 |
|
6 |
+
Be concise! don't say things like "The track should have an cheerful vibe.." instead just add "cheerful vibe".
|
7 |
+
Avoid concepts that don't translate well to music (eg use "mysterious" instead of "investigative")
|
8 |
+
|
9 |
To create a music prompt, you need to combine styles with moods, plus a few other things.
|
10 |
+
|
11 |
1. Please choose a base style among those categories: "Hip Hop and Rap track", "Classic track", "Jazz track", "Electronic and dance track", "Rock'n'Roll track", "Funk track", "Dubstep track", "Afrobeats", "Orchestral track", "Pop track", "Reggae track", "Metal track", "Country track", "Blues track", "Soul track", "R'n'B track", "Disco track", "Trap track", "Ambient track", "Lofi track", "Chill track", etc.
|
12 |
2. Then choose a vibe: "with an happy vibe", "with a sad vibe", "with an angry vibe", "with a chill vibe", "with a romantic vibe", "with an epic vibe", "with an energetic vibe", "with a dreamy vibe", "with a mysterious vibe", "with a relaxing vibe", "with a dark vibe", "with an upbeat vibe", "with a motivational vibe", "with an inspiring vibe", "with a nostalgic vibe", "with a groovy vibe", "with a cheerful vibe", "with a melancholic vibe", "with a hopeful vibe", etc.
|
13 |
3. build up a coherent description eg.: "80s pop track with bassy drums and synth", "90s rock song with loud guitars and heavy drums", "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions bpm: 130", "A cheerful country song with acoustic guitars", "lofi slow bpm electro chill with organic samples" etc.
|
14 |
|
15 |
+
|
16 |
## Example of input/output
|
17 |
|
18 |
Given the following input story, provided as YAML:
|
src/app/api/{generators → v1/edit}/music/types.ts
RENAMED
File without changes
|
src/app/api/v1/export/route.ts
CHANGED
@@ -23,6 +23,7 @@ export async function POST(req: NextRequest, res: NextResponse) {
|
|
23 |
// console.log("[api/v1/export] sending blob to ai-tube-clap-exporter.hf.space")
|
24 |
|
25 |
const result = await fetch(
|
|
|
26 |
`https://jbilcke-hf-ai-tube-clap-exporter.hf.space?f=${format}`,
|
27 |
{ method: "POST", body: await req.blob() }
|
28 |
)
|
|
|
23 |
// console.log("[api/v1/export] sending blob to ai-tube-clap-exporter.hf.space")
|
24 |
|
25 |
const result = await fetch(
|
26 |
+
// `http://localhost:7860?f=${format}`,
|
27 |
`https://jbilcke-hf-ai-tube-clap-exporter.hf.space?f=${format}`,
|
28 |
{ method: "POST", body: await req.blob() }
|
29 |
)
|
src/app/api/v1/render/cluster.ts
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
import { sleep } from "@/lib/utils/sleep"
|
|
|
2 |
|
3 |
-
export type ClusterMachine = {
|
4 |
-
id: number
|
5 |
-
url: string
|
6 |
-
busy: boolean
|
7 |
-
}
|
8 |
|
9 |
export const nbClusterMachines = 3
|
10 |
// make sure the machines are running!!
|
|
|
1 |
import { sleep } from "@/lib/utils/sleep"
|
2 |
+
import { ClusterMachine } from "../types"
|
3 |
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
export const nbClusterMachines = 3
|
6 |
// make sure the machines are running!!
|
src/app/api/v1/types.ts
CHANGED
@@ -1,5 +1,11 @@
|
|
1 |
import { ClapSegmentCategory } from "@aitube/clap"
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
export type LatentEntity = {
|
4 |
name: string
|
5 |
category: ClapSegmentCategory
|
|
|
1 |
import { ClapSegmentCategory } from "@aitube/clap"
|
2 |
|
3 |
+
export type ClusterMachine = {
|
4 |
+
id: number
|
5 |
+
url: string
|
6 |
+
busy: boolean
|
7 |
+
}
|
8 |
+
|
9 |
export type LatentEntity = {
|
10 |
name: string
|
11 |
category: ClapSegmentCategory
|