File size: 32,964 Bytes
8b3be6a
5cfd8a9
917b084
 
5cfd8a9
8b3be6a
 
917b084
5cfd8a9
8b3be6a
 
5cfd8a9
917b084
 
 
 
 
 
 
 
 
 
 
 
5cfd8a9
 
 
 
 
 
 
 
 
917b084
5cfd8a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
917b084
 
 
5cfd8a9
 
 
 
 
 
 
 
 
8b3be6a
 
 
 
 
5cfd8a9
917b084
8b3be6a
5cfd8a9
 
 
 
 
 
 
 
8b3be6a
 
 
5cfd8a9
 
917b084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cfd8a9
917b084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cfd8a9
 
917b084
 
5cfd8a9
 
 
917b084
5cfd8a9
917b084
5cfd8a9
 
 
917b084
 
 
5cfd8a9
917b084
 
 
 
 
 
 
5cfd8a9
917b084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b3be6a
5cfd8a9
 
917b084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cfd8a9
 
917b084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5cfd8a9
 
917b084
 
5cfd8a9
 
 
 
 
 
 
917b084
5cfd8a9
917b084
5cfd8a9
 
917b084
 
 
 
 
 
 
 
 
 
5cfd8a9
 
917b084
 
 
 
5cfd8a9
 
917b084
 
 
5cfd8a9
 
917b084
 
 
 
5cfd8a9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
from typing import List, Tuple, Dict
import gradio as gr
import os
import json

from utils.song_utils import generate_song_seed, get_starting_messages, messages_to_history, update_song_details, get_sections
from chat import model_chat
from gradio_modal import Modal

History = List[Tuple[str, str]] # a type: pairs of (query, response), where query is user input and response is system output
Messages = List[Dict[str, str]] # a type: list of messages with role and content

css = """
#audio-group {
    max-height: 800px;
    overflow-y: scroll;
}
"""

textbox = gr.Textbox(lines=1, label='Send a message', show_label=False, placeholder='Send a message', scale=4, visible=True)
submit = gr.Button("Send", scale=2, visible=True)


with gr.Blocks(css=css) as demo:
    gr.Markdown("""<center><font size=8>AI Songwriter (alpha)</center>""")
    gr.Markdown("""<center><font size=4>Turning your stories into musical poetry. 2024 MIT Senior Thesis.</center>""")

    with gr.Tabs() as tabs:
        with gr.TabItem("Ideation", id=0): #index is 0
            gr.Markdown("""<center><font size=6>Let's write a song!</font></center>""")
            gr.Markdown("""<center><font size=4>First, let's try to find an interesting concept. Fill out the fields below and generate a song seed.</font></center>""")
            gr.Markdown("""<center><font size=3>If you're stuck, check out <a href="https://onestopforwriters.com/emotions" target="_blank">here</a>.</font></center>""")
            with gr.Row():
                feeling_input = gr.Textbox(label="What's an emotion(s) that you've been feeling a lot recently? And why?", placeholder='Enter your emotions', scale=2)
                # audio_input = gr.Audio(sources=["upload"], type="numpy", label="Instrumental",
                #                 interactive=True, elem_id="instrumental-input")
                
            generate_seed_button = gr.Button("STEP 1: Generate Song Seed")
            concept_desc = gr.Markdown("""<center><font size=4>Here it is! Hit 'Approve' to confirm this concept. Edit the concept directly or hit 'Try Again' to get another suggestion.</font></center>""", visible=False)
            with gr.Row(visible=False) as concept_row:
                instrumental_output = gr.TextArea(label="Suggested Song Concept", value="", max_lines=3, scale=2)
                with gr.Column():
                    approve_button = gr.Button("Approve")
                    try_again_button = gr.Button("Try Again")
            with gr.Row():
                with gr.Accordion("Generated Song Details", open=False) as accordion:
                    with gr.Row():
                        title_input = gr.Textbox(label='Title', placeholder='Enter a song title')
                        genre_input = gr.Textbox(label='Genre', placeholder='Enter a genre')
                        blurb_input = gr.Textbox(label='Blurb', placeholder='Enter a one-sentence blurb')
                        songwriter_style = gr.Dropdown(label='Songwriter Style', value = "GPT 4o", choices=["GPT 4o", "d4vd (Indie Rock Ballad - Male)", "Lizzy McAlpine (Indie Pop Folk - Female)", "Phoebe Bridgers (Pop Sad Rock - Female)", "Daniel Caesar (R&B/Soul - Male)"], interactive=True)
                
                        instrumental_textbox = gr.TextArea(label="Song Structure", value="Verse 1: 4 measures\nChorus 1: 8 measures\nVerse 2: 8 measures\nChorus 2: 8 measures\nVerse 3: 8 measures\nChorus 3: 8 measures", visible=False, interactive=True, max_lines=3)
                    gr.Markdown("""<center><font size=4>Edit these to your liking and hit 'Continue to Next Step' to start creating!</font></center>""")
                
                def open_accordion(x):
                    return gr.Accordion("Generated Song Details", open=True)
                approve_button.click(open_accordion, inputs=[approve_button], outputs=[accordion])
                  
            with gr.Row():
                continue_btn = gr.Button("Continue to Next Step", interactive=False)

            
            def clean_song_seed(song_seed):
                if "Suggested Song Concept:" in song_seed:
                    song_seed = song_seed.split("Suggested Song Concept:")[1].strip()
                return song_seed
            generate_seed_button.click(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output]).then(clean_song_seed, inputs=[instrumental_output], outputs=[instrumental_output])
            feeling_input.submit(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output]).then(clean_song_seed, inputs=[instrumental_output], outputs=[instrumental_output])
            
            def make_row_visible(x):
                return gr.Row(visible=True), gr.Markdown("""<center><font size=4>Here it is! Hit 'Approve' to confirm this concept. Edit the concept directly or hit 'Try Again' to get another suggestion.</font></center>""", visible=True)
            def enable_button(x):
                return gr.Button("Continue to Next Step", interactive=True)
            generate_seed_button.click(make_row_visible, inputs=[generate_seed_button], outputs=[concept_row, concept_desc])
            approve_button.click(enable_button, inputs=[approve_button], outputs=[continue_btn])
            
            try_again_button.click(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output])
            
            def change_tab(id):
                return gr.Tabs(selected=id)
            continue_btn.click(change_tab, gr.Number(1, visible=False), tabs)

        # with gr.TabItem("Tutorial", id=1):
        #     gr.Markdown("""<center><font size=4>Now, let's walkthrough writing a verse together! Start chatting with the chatbot.</font></center>""")

        #     character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")

        #     starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)

        #     messages = gr.State(value=starting_messages)

        #     section_meanings = gr.State(value="")
        #     approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
            
        #     # lyrics_display = gr.State("")
            
        #     generated_audios = gr.State([])
        #     def reset_textbox(textbox):
        #         return ""

        #     character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")

        #     starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)
        #     print(starting_history, "STARTING HISTORY")
        #     messages = gr.State(value=starting_messages)
        #     # messages += [{"role": "assistant", "content": "You are a songwriter. You write songs."}]
        #     # journal_messages = gr.State(value=[journal_starting_message])
        #     # journal_response = gr.State(value="")

        #     generated_audios = gr.State(value=[])
        #     tutorial_step = gr.Number(0, visible=False)

        #     with gr.Row():
        #         with gr.Column(scale=2):
        #             chatbot_history = gr.Chatbot(type="messages", value=starting_history, label='SongChat', placeholder=None, layout='bubble', bubble_full_width=False, height=500)
        #             with gr.Row():
        #                 typical_responses = [textbox, submit]
                        
        #                 def update_response_options(buttons, button_dict):
        #                     return [gr.Textbox(visible=len(buttons)==0, scale=4), gr.Button(visible=len(buttons)==0, scale=2)] + [gr.Button(visible=(x in buttons)) for x in button_dict.keys()]

        #                 button_options = gr.State([])
        #                 button_dict = gr.State({
        #                     "revise lyrics": "Can we revise the lyrics?", 
        #                     "generate audio snippet": "Can you generate an audio snippet?", 
        #                     "continue revising" : "Can we continue revising this section?", 
        #                     "generate audio snippet with new lyrics": "Can you generate an audio snippet with these new lyrics?", 
        #                     "return to original instrumental": "Can you use the original clip for this section instead?", 
        #                     "revise genre": "Can we revise the instrumental tags?",
        #                     "re-revise genre": "Can we revise the instrumental tags?", 
        #                     "continue to next section": "Looks good! Let's move on to the next section.",
        #                     "merge snippets": "Can you merge this snippet into its full song?"
        #                 })

        #                 for button in button_dict.value.keys():
        #                     btn = gr.Button(button, visible=(button in button_options.value))
        #                     typical_responses.append(btn)


        #         with gr.Column(elem_id="audio-group", scale=1) as audio_group_column:
        #             # songwriter_creativity = gr.Slider(label="Songwriter LLM Temperature", minimum=0, maximum=1, step=0.01, value=1)

        #             with gr.Group():
        #                 # loop thru all audio in audio_clips
        #                 gr.Markdown("""<center><font size=4>All Generations</font></center>""")

        #                 @gr.render(inputs=generated_audios, triggers=[demo.load, generated_audios.change, textbox.submit, submit.click] + [btn.click for btn in typical_responses[2:]])
        #                 def render_audio_group(generated_audios):
        #                     # audio_group = gr.Group()
        #                     for audio in generated_audios:
        #                         clip_path, lyrics, instrumental, title, status = audio
        #                         with gr.Accordion(title, open=False):
        #                             if status == 'complete':
        #                                 gr.Audio(value=clip_path, label=title, interactive=False, show_label=False, waveform_options={"show_controls": False})
        #                             else:
        #                                 gr.HTML(f'<audio controls><source src="{clip_path}" type="audio/mp3"></audio>')
        #                             gr.TextArea(label="Lyrics", value=lyrics, interactive=False, show_label=False)
        #                             gr.TextArea(label="Instrumental", value=instrumental, interactive=False, show_label=False, max_lines=1)

        #                 gr.Markdown("""<center><font size=4>Current Generation</font></center>""")
        #                 current_section = gr.Textbox(label="Current section", value="Verse 1", interactive=False, show_label=True)
        #                 current_lyrics = gr.Textbox(label="Lyrics", value="", interactive=True, show_label=True)
        #                 with gr.Row():
        #                     curr_tags = gr.Textbox(label="Instrumental Tags", value="", interactive=True, show_label=True)
        #                     # @gr.render(inputs=generated_audios, triggers=[demo.load])
        #                     # def render_clip_to_continue(generated_audios):
        #                     audio_clips = [x[3] for x in generated_audios.value]
        #                     clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
        #                 #clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
        #                 songwriter_style = gr.Dropdown(label='Songwriter Style', value= "GPT 4o", choices=["GPT 4o", "d4vd"], interactive=True)
        #                 with gr.Row():
        #                     #curr_audio = gr.State("")
        #                     curr_audio = gr.HTML(label="Generated section")
        #                     regen = gr.Button("Re-generate")
                        
            
        #     section_meanings = gr.State(value="")
        #     approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
        #     continue_btn.click(get_starting_messages, inputs=[instrumental_textbox, title_input, blurb_input, genre_input, section_meanings], outputs=[messages, chatbot_history])

        #     def set_response_buttons(button_dict, button_name):
        #         print(button_name)
        #         return button_dict[button_name]
                        

        #     with gr.Row(visible=True) as chat_row_0:
        #         textbox_0 = gr.Textbox(lines=1, label='Send a message', show_label=False, placeholder='Send a message', scale=4)
        #         submit_0 = gr.Button("Send", scale=2)
            
        #     for btn in typical_responses[2:]:
        #             btn.click(set_response_buttons, inputs=[button_dict, btn], outputs=[textbox]).then(model_chat, 
        #                             inputs=[genre_input, textbox, chatbot_history, messages, generated_audios], 
        #                             outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
        #                             update_response_options, [button_options, button_dict], typical_responses
        #                     )
            


        #     with Modal(visible=False) as modal_0:
        #         gr.Markdown("Welcome to the AI songwriter! The AI songwriter will help you write a song. You can chat with the AI, generate lyrics, and listen to audio snippets. Let's start by chatting with the AI.")
        #     with Modal(visible=False) as modal:
        #         gr.Markdown("The chatbot is run by an AI songwriter. It can respond to your conversations, generate lyrics and audio, and edit prior generations.\n\nNow, continue and respond to this second question from the AI songwriter.")
        #     with Modal(visible=False) as modal_1:
        #         gr.Markdown("The AI songwriter has now proposed a first verse! You now have the option to hear an audio snippet, revise the lyrics, or continue to the next section. The latter two options continue the conversation, whereas the first starts audio generation models. Select the 'get audio snippet' button to continue to the next step.")
        #     with Modal(visible=False) as modal_2:
        #         gr.Markdown("Awesome! You generated your first audio snippet./n/n As you work thru each section, the generated snippets are populated on the right panel. You'll be able to listen thru snippets as you work thru the song. \n\n "
        #                     "The current section is also displayed on the right panel. You'll be able to revise sections via the chat or directly via the right panel. \n\n "
        #                     "You're ready to start your official song! Hit the 'Start' button to start.")
        #         start_button = gr.Button("Start")

        #     # start_button.click(change_tab, gr.Number(2, visible=False), tabs).then(update_generation_tab,
        #     #                 inputs=[],
        #     #                 outputs=[])
        #     continue_btn.click(lambda: Modal(visible=True), None, modal_0)

        #     def make_modal_visible(step_number):
        #         new_step_number = step_number + 1 if step_number in [0, 1, 2] else step_number
        #         modals = [Modal(visible=i == step_number) for i in range(3)]
        #         return new_step_number, *modals

                


        #     submit_0.click(update_textbox, [textbox_0, tutorial_step], [textbox_0]).then(model_chat,
        #                 inputs=[genre_input, textbox_0, chatbot_history, messages, generated_audios],
        #                 outputs=[textbox_0, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox_0], outputs=[textbox_0]).then(
        #                 update_response_options, [button_options, button_dict], typical_responses
        #         ).then(
        #                     make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
        #                 )
        #     textbox_0.submit(update_textbox, [textbox_0, tutorial_step], [textbox_0]).then(model_chat, 
        #                 inputs=[genre_input, textbox_0, chatbot_history, messages, generated_audios], 
        #                 outputs=[textbox_0, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox_0], outputs=[textbox_0]).then(
        #                 update_response_options, [button_options, button_dict], typical_responses
        #         ).then(
        #                     make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
        #                 )
            
                
        
        with gr.TabItem("Generation", id=1): #index is 1
            start_song_gen = gr.State(value=False)
            gr.Markdown("""<center><font size=4>Now, chat with an AI songwriter to make your song! Hit finish when ready to hear full song.</font></center>""")        

            character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")

            starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)
            print(starting_history, "STARTING HISTORY")
            messages = gr.State(value=starting_messages)
            # messages += [{"role": "assistant", "content": "You are a songwriter. You write songs."}]
            # journal_messages = gr.State(value=[journal_starting_message])
            # journal_response = gr.State(value="")

            generated_audios = gr.State(value=[])
            tutorial_step = gr.Number(value=0, visible=False)

            with gr.Row():
                with gr.Column(scale=2):
                    chatbot_history = gr.Chatbot(type="messages", value=starting_history, label='SongChat', placeholder=None, layout='bubble', bubble_full_width=False, height=500)
                    with gr.Row():
                        typical_responses = [textbox, submit]
                        
                        def update_response_options(buttons, button_dict):
                            return [gr.Textbox(visible=len(buttons)==0, scale=4), gr.Button(visible=len(buttons)==0, scale=2)] + [gr.Button(visible=(x in buttons)) for x in button_dict.keys()]

                        button_options = gr.State([])
                        button_dict = gr.State({
                            "revise lyrics": "Can we revise the lyrics?", 
                            "generate audio snippet": "Can you generate an audio snippet?", 
                            "continue revising" : "Can we continue revising this section?", 
                            "generate audio snippet with new lyrics": "Can you generate an audio snippet with these new lyrics?", 
                            "return to original instrumental": "Can you use the original clip for this section instead?", 
                            "revise genre": "Can we revise the instrumental tags?",
                            "re-revise genre": "Can we revise the instrumental tags?", 
                            "continue to next section": "Looks good! Let's move on to the next section.",
                            "merge snippets": "Can you merge this snippet into its full song?"
                        })

                        for button in button_dict.value.keys():
                            btn = gr.Button(button, visible=(button in button_options.value))
                            typical_responses.append(btn)


                with gr.Column(elem_id="audio-group", scale=1):
                    # songwriter_creativity = gr.Slider(label="Songwriter LLM Temperature", minimum=0, maximum=1, step=0.01, value=1)

                    with gr.Group():
                        # loop thru all audio in audio_clips
                        gr.Markdown("""<center><font size=4>All Generations</font></center>""")

                        @gr.render(inputs=generated_audios, triggers=[demo.load, generated_audios.change, textbox.submit, submit.click] + [btn.click for btn in typical_responses[2:]])
                        def render_audio_group(generated_audios):
                            # audio_group = gr.Group()
                            for audio in generated_audios:
                                clip_path, lyrics, instrumental, title, status = audio
                                with gr.Accordion(title, open=False):
                                    if status == 'complete':
                                        gr.Audio(value=clip_path, label=title, interactive=False, show_label=False, waveform_options={"show_controls": False})
                                    else:
                                        gr.HTML(f'<audio controls><source src="{clip_path}" type="audio/mp3"></audio>')
                                    gr.TextArea(label="Lyrics", value=lyrics, interactive=False, show_label=False)
                                    gr.TextArea(label="Instrumental", value=instrumental, interactive=False, show_label=False, max_lines=1)

                        gr.Markdown("""<center><font size=4>Current Generation</font></center>""")
                        current_section = gr.Textbox(label="Current section", value="Verse 1", interactive=False, show_label=True)
                        current_lyrics = gr.Textbox(label="Lyrics", value="", interactive=True, show_label=True)
                        with gr.Row():
                            curr_tags = gr.Textbox(label="Instrumental Tags", value="", interactive=True, show_label=True)
                            # @gr.render(inputs=generated_audios, triggers=[demo.load])
                            # def render_clip_to_continue(generated_audios):
                            audio_clips = [x[3] for x in generated_audios.value]
                            clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
                        #clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
                        songwriter_style = gr.Dropdown(label='Songwriter Style', value= "GPT 4o", choices=["GPT 4o", "d4vd"], interactive=True)
                        with gr.Row():
                            #curr_audio = gr.State("")
                            curr_audio = gr.HTML(label="Generated section")
                            regen = gr.Button("Re-generate")
                        
            
            section_meanings = gr.State(value="")
            approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
            continue_btn.click(get_starting_messages, inputs=[instrumental_textbox, title_input, blurb_input, genre_input, section_meanings], outputs=[messages, chatbot_history])

            with Modal(visible=False) as modal_0:
                gr.Markdown("Welcome to the AI songwriter! The AI songwriter will help you write a song. You can chat with the AI, generate lyrics, and listen to audio snippets. Let's start by chatting with the AI.")
            with Modal(visible=False) as modal:
                gr.Markdown("The chatbot is run by an AI songwriter. It can respond to your conversations, generate lyrics and audio, and edit prior generations.\n\nNow, continue and respond to this second question from the AI songwriter.")
            with Modal(visible=False) as modal_1:
                gr.Markdown("The AI songwriter has now proposed a first verse! You now have the option to hear an audio snippet, revise the lyrics, or continue to the next section. The latter two options continue the conversation, whereas the first starts audio generation models. Select the 'get audio snippet' button to continue to the next step.")
            with Modal(visible=False) as modal_2:
                gr.Markdown("Awesome! You generated your first audio snippet./n/n As you work thru each section, the generated snippets are populated on the right panel. You'll be able to listen thru snippets as you work thru the song. \n\n "
                            "The current section is also displayed on the right panel. You'll be able to revise sections via the chat or directly via the right panel. \n\n "
                            "You're ready to start your official song! Hit the 'Start' button to start.")
                start_button = gr.Button("Start")
            
            continue_btn.click(lambda: Modal(visible=True), None, modal_0)
            start_button.click(lambda: Modal(visible=False), None, modal_2)

            def make_modal_visible(step_number):
                new_step_number = step_number + 1 if step_number in [0, 1, 2] else step_number
                modals = [Modal(visible=i == step_number) for i in range(3)]
                return new_step_number, *modals
            
            def update_textbox(textbox, step_number):
                if step_number == 0:
                    return textbox + "\nAsk me another question to inform the verse"
                elif step_number == 1:
                    return textbox + "\nUse this info to write a verse"
                else:
                    return textbox
            
            def set_response_buttons(button_dict, button_name):
                print(button_name)
                return button_dict[button_name]

            def set_regenerate_query(textbox, current_section, current_lyrics, curr_tags, clip_to_continue):
                return f"Can you revise this section so it uses these lyrics and instrumentals and then generate an audio snippet using it?\nLyrics:\n{current_lyrics}Instrumental tags: {curr_tags}\nClip to continue: {clip_to_continue}"
            def set_snippet_query(textbox):
                return "Can I have an audio snippet of what we have now?"
            def set_finish_query(textbox):
                return "I'm ready for the full song now! Can you finish it up?"
            def reset_textbox(textbox):
                return ""
            
            with gr.Row():
                textbox.render()
                submit.render()

                for btn in typical_responses[2:]:
                    btn.click(set_response_buttons, inputs=[button_dict, btn], outputs=[textbox]).then(model_chat, 
                                    inputs=[genre_input, textbox, chatbot_history, messages, generated_audios], 
                                    outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
                                    update_response_options, [button_options, button_dict], typical_responses
                            ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )

    


            submit.click(update_textbox, [textbox, tutorial_step], [textbox]).then(model_chat,
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )
            textbox.submit(update_textbox, [textbox, tutorial_step], [textbox]).then(model_chat, 
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios], 
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )
            
            
            regen.click(set_regenerate_query, inputs=[textbox, current_section, current_lyrics, curr_tags, clip_to_continue], outputs=[textbox]).then(model_chat,
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(reset_textbox, inputs=[textbox], outputs=[textbox]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )

            with gr.Row():
                # get_snippet_button = gr.Button("Get Audio Snippet", scale=2)
                done = gr.Button("Finish Full Song 🎶", scale=4)
                #autoGPT_checkbox = gr.Checkbox(label="AutoGPT", value=True, info="Auto-generate responses from journal entry", interactive=True, scale=2)
                #journal_llm_creativity = gr.Slider(label="Journal LLM Temperature", minimum=0, maximum=1, step=0.01, value=1, interactive=True, scale=2)
                reset_button = gr.Button("Reset", scale=2)
            
                def reset_chat(messages, chatbot_history):
                    messages = messages[:2]
                    chatbot_history = messages_to_history(messages[:2])
                    return messages, chatbot_history, '', '', '', '', gr.HTML('<center>generating...</center>'), [], []
                
                reset_button.click(reset_chat, inputs=[messages, chatbot_history], outputs=[messages, chatbot_history, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options])
            

            done.click(set_finish_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options])

            demo.load(reset_chat, inputs=[messages, chatbot_history], outputs=[messages, chatbot_history, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios])
            
            
            # with gr.Row():
            #     song_link = gr.State(value="")
            #     song = gr.HTML()
            


            def download_conversation(messages):
                with open(f'data/conversation_history.json', 'w') as f:
                    json.dump(messages, f)

            
            with gr.Accordion("Admin", open=False):
                download_btn = gr.Button("Download Conversation")
                download_btn.click(download_conversation, [messages], None)
            #     story_textbox = gr.TextArea(label="Story to provide context to songwriter", value="", max_lines=3)

            
            # get_snippet_button.click(set_snippet_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
            #             inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
            #             outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios]).then(reset_textbox, inputs=[textbox], outputs=[textbox])



demo.queue(api_open=False)
demo.launch(max_threads=30)