File size: 21,506 Bytes
8b3be6a
5cfd8a9
917b084
 
5cfd8a9
8b3be6a
 
917b084
5cfd8a9
8b3be6a
 
5cfd8a9
917b084
 
 
 
 
 
 
146352e
917b084
 
 
 
5cfd8a9
 
 
 
 
 
146352e
 
5cfd8a9
97045b3
5cfd8a9
 
 
146352e
5cfd8a9
 
 
 
 
 
 
 
 
 
 
 
917b084
 
 
1b4968b
5cfd8a9
 
 
 
 
 
1b4968b
5cfd8a9
8b3be6a
 
 
 
 
5cfd8a9
917b084
8b3be6a
5cfd8a9
97045b3
5cfd8a9
1b4968b
5cfd8a9
146352e
5cfd8a9
 
 
8b3be6a
 
 
f362ef8
917b084
 
5cfd8a9
 
146352e
917b084
5cfd8a9
 
 
917b084
5cfd8a9
917b084
5cfd8a9
 
 
917b084
 
 
5cfd8a9
917b084
97045b3
917b084
 
 
 
 
5cfd8a9
917b084
 
97045b3
 
146352e
917b084
 
 
 
97045b3
 
146352e
917b084
 
 
 
 
 
 
 
 
9ba3bfc
917b084
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f362ef8
917b084
 
 
 
 
 
 
 
 
f362ef8
917b084
 
 
f362ef8
917b084
 
8b3be6a
5cfd8a9
 
917b084
 
97045b3
917b084
97045b3
917b084
97045b3
917b084
97045b3
 
 
917b084
 
 
 
 
 
 
 
 
 
 
146352e
917b084
 
 
 
 
 
 
 
 
 
 
 
3482c4c
917b084
 
 
 
 
 
5cfd8a9
 
917b084
 
 
 
 
 
146352e
917b084
 
 
 
 
 
 
 
 
 
146352e
917b084
 
 
 
 
 
146352e
917b084
 
 
 
 
 
 
 
146352e
917b084
 
 
 
5cfd8a9
146352e
9ba3bfc
146352e
9ba3bfc
 
 
5cfd8a9
9ba3bfc
97045b3
 
9ba3bfc
5cfd8a9
97045b3
 
 
5cfd8a9
 
9ba3bfc
 
97045b3
 
 
917b084
97045b3
 
 
917b084
 
 
 
 
5cfd8a9
 
917b084
 
 
 
5cfd8a9
 
917b084
 
 
5cfd8a9
 
917b084
 
 
 
5cfd8a9
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
from typing import List, Tuple, Dict
import gradio as gr
import os
import json

from utils.song_utils import generate_song_seed, get_starting_messages, messages_to_history, update_song_details, get_sections
from chat import model_chat
from gradio_modal import Modal

History = List[Tuple[str, str]] # a type: pairs of (query, response), where query is user input and response is system output
Messages = List[Dict[str, str]] # a type: list of messages with role and content

css = """
#audio-group {
    max-height: 800px;
    overflow-y: scroll;
}
"""

textbox = gr.Textbox(lines=2, label='Send a message', show_label=False, placeholder='Send a message', scale=4, visible=True)
submit = gr.Button("Send", scale=2, visible=True)


with gr.Blocks(css=css) as demo:
    gr.Markdown("""<center><font size=8>AI Songwriter (alpha)</center>""")
    gr.Markdown("""<center><font size=4>Turning your stories into musical poetry. 2024 MIT Senior Thesis.</center>""")

    with gr.Tabs() as tabs:
        with gr.TabItem("Ideation", id=0): #index is 0
            gr.Markdown("""<center><font size=6>Let's write a song!</font></center>""")
            gr.Markdown("""<center><font size=4>But first, let's generate a song seed to provide context to the AI Songwriter.</font></center>""")
            gr.Markdown("""<center><font size=3>If you're stuck thinking of a song idea, check out <a href="https://onestopforwriters.com/emotions" target="_blank">here</a>.</font></center>""")
            with gr.Row():
                feeling_input = gr.Textbox(label="How are you feeling today?", placeholder='Enter your emotions', scale=2)
                # audio_input = gr.Audio(sources=["upload"], type="numpy", label="Instrumental",
                #                 interactive=True, elem_id="instrumental-input")
                
            generate_seed_button = gr.Button("Click to Generate Song Seed")
            concept_desc = gr.Markdown("""<center><font size=4>Here it is! Hit 'Approve' to confirm this concept. Edit the concept directly or hit 'Try Again' to get another suggestion.</font></center>""", visible=False)
            with gr.Row(visible=False) as concept_row:
                instrumental_output = gr.TextArea(label="Suggested Song Concept", value="", max_lines=3, scale=2)
                with gr.Column():
                    approve_button = gr.Button("Approve")
                    try_again_button = gr.Button("Try Again")
            with gr.Row():
                with gr.Accordion("Generated Song Details", open=False) as accordion:
                    with gr.Row():
                        title_input = gr.Textbox(label='Title', placeholder='Enter a song title')
                        genre_input = gr.Textbox(label='Genre', placeholder='Enter a genre')
                        blurb_input = gr.Textbox(label='Blurb', placeholder='Enter a one-sentence blurb')
                        songwriter_style = gr.Dropdown(label='Songwriter Style', value = "GPT 4o", choices=["GPT 4o", "d4vd (Indie Rock Ballad - Male)", "Lizzy McAlpine (Indie Pop Folk - Female)", "Phoebe Bridgers (Pop Sad Rock - Female)", "Daniel Caesar (R&B/Soul - Male)"], interactive=True)
                
                        instrumental_textbox = gr.TextArea(label="Song Structure", value="Verse 1: 4 measures\nChorus 1: 8 measures\nVerse 2: 8 measures\nChorus 2: 8 measures\nVerse 3: 8 measures\nChorus 3: 8 measures", visible=False, interactive=True, max_lines=3)
                    gr.Markdown("""<center><font size=4>Edit these to your liking and hit 'Start Creating' to continue onto generation!</font></center>""")
                
                def open_accordion(x):
                    return gr.Accordion("Generated Song Details", open=True)
                approve_button.click(open_accordion, inputs=[approve_button], outputs=[accordion])
                  
            with gr.Row():
                continue_btn = gr.Button("Start Creating", interactive=False)

            
            def clean_song_seed(song_seed):
                if "Suggested Song Concept:" in song_seed:
                    song_seed = song_seed.split("Suggested Song Concept:")[1].strip()
                return song_seed
            generate_seed_button.click(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output]).then(clean_song_seed, inputs=[instrumental_output], outputs=[instrumental_output])
            feeling_input.submit(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output]).then(clean_song_seed, inputs=[instrumental_output], outputs=[instrumental_output])
            
            def make_row_visible(x):
                return gr.Row(visible=True), gr.Markdown("""<center><font size=4>Here it is! Hit 'Approve' to confirm this concept. Edit the concept directly or hit 'Try Again' to get another suggestion.</font></center>""", visible=False)
            def enable_button(x):
                return gr.Button("Start Creating", interactive=True)
            generate_seed_button.click(make_row_visible, inputs=[generate_seed_button], outputs=[concept_row, concept_desc])
            feeling_input.submit(make_row_visible, inputs=[generate_seed_button], outputs=[concept_row, concept_desc])
            approve_button.click(enable_button, inputs=[approve_button], outputs=[continue_btn])
            
            try_again_button.click(generate_song_seed, inputs=[feeling_input], outputs=[instrumental_output])
            
            def change_tab(id):
                return gr.Tabs(selected=id)
            continue_btn.click(change_tab, gr.Number(1, visible=False), tabs)            
                
        
        with gr.TabItem("Generation", id=1): #index is 1
            start_song_gen = gr.State(value=False)
            gr.Markdown("""<center><font size=4>Now, chat with an AI songwriter to make your song!</font></center>""")        

            character = gr.State(value="A 18-year old boy who dreams of being a pop star that uplifts people going through the difficulties of life")

            starting_messages, starting_history = get_starting_messages("", "Home", "Missing home", "Ballad", instrumental_textbox.value)
            print(starting_history, "STARTING HISTORY")
            messages = gr.State(value=starting_messages)
            # messages += [{"role": "assistant", "content": "You are a songwriter. You write songs."}]
            # journal_messages = gr.State(value=[journal_starting_message])
            # journal_response = gr.State(value="")

            generated_audios = gr.State(value=[])
            tutorial_step = gr.Number(value=0, visible=False)

            with gr.Row():
                with gr.Column(scale=2):
                    chatbot_history = gr.Chatbot(type="messages", value=starting_history, label='SongChat', placeholder=None, layout='bubble', bubble_full_width=False, height=500)
                    with gr.Row():
                        typical_responses = [textbox, submit]
                        
                        def update_response_options(buttons, button_dict):
                            return [gr.Textbox(visible=len(buttons)==0, scale=4), gr.Button(visible=len(buttons)==0, scale=2)] + [gr.Button(visible=(x in buttons)) for x in button_dict.keys()]

                        button_options = gr.State([])
                        button_dict = gr.State({
                            "revise lyrics": "Can we revise the lyrics together?",
                            "re-revise lyrics": "Can we revise the lyrics together?", 
                            "edit lyrics directly": "Can I edit the lyrics directly for the whole section?",
                            "generate audio snippet": "Can you generate an audio snippet?", 
                            "continue revising" : "Can we continue revising this section?", 
                            "generate audio snippet with new lyrics": "Can you generate an audio snippet with these new lyrics?", 
                            "return to original instrumental": "Can you use the original clip for this section instead?", 
                            "revise genre": "Can we revise the instrumental tags together?",
                            "re-revise genre": "Can we revise the instrumental tags together?", 
                            "revise genre directly": "Can I edit the genre directly for the whole song?",
                            "continue to next section": "Looks good! Let's move on to the next section.",
                            "merge snippets": "Can you merge this snippet into its full song?"
                        })

                        for button in button_dict.value.keys():
                            btn = gr.Button(button, visible=(button in button_options.value))
                            typical_responses.append(btn)


                with gr.Column(elem_id="audio-group", scale=1, visible=False):
                    # songwriter_creativity = gr.Slider(label="Songwriter LLM Temperature", minimum=0, maximum=1, step=0.01, value=1)

                    with gr.Group():
                        # loop thru all audio in audio_clips
                        gr.Markdown("""<center><font size=4>All Generations</font></center>""")

                        @gr.render(inputs=generated_audios, triggers=[demo.load, generated_audios.change, textbox.submit, submit.click] + [btn.click for btn in typical_responses[2:]])
                        def render_audio_group(generated_audios):
                            # audio_group = gr.Group()
                            for audio in generated_audios:
                                clip_path, lyrics, instrumental, title, status = audio
                                with gr.Accordion(title, open=False):
                                    if status == 'complete':
                                        gr.Audio(value=clip_path, label=title, interactive=False, show_label=False, waveform_options={"show_controls": False})
                                    else:
                                        gr.HTML(f'<audio controls><source src="{clip_path}" type="audio/mp3"></audio>')
                                    gr.TextArea(label="Lyrics", value=lyrics, interactive=False, show_label=False)
                                    gr.TextArea(label="Instrumental", value=instrumental, interactive=False, show_label=False, max_lines=1)

                        gr.Markdown("""<center><font size=4>Edit Current Generation</font></center>""")
                        current_section = gr.Textbox(label="Current section", value="Verse 1", interactive=False, show_label=True)
                        current_lyrics = gr.Textbox(label="Lyrics", value="", interactive=True, show_label=True)
                        with gr.Row():
                            curr_tags = gr.Textbox(label="Instrumental Tags", value="", interactive=True, show_label=True)
                            # @gr.render(inputs=generated_audios, triggers=[demo.load])
                            # def render_clip_to_continue(generated_audios):
                            audio_clips = [x[3] for x in generated_audios.value]
                            clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
                        #clip_to_continue = gr.Dropdown(label='Clip to continue', value = "", choices=audio_clips+[""], interactive=True)
                        songwriter_style = gr.Dropdown(label='Songwriter Style', value= "GPT 4o", choices=["GPT 4o", "d4vd (Indie Rock Ballad - Male)", "Lizzy McAlpine (Indie Pop Folk - Female)", "Phoebe Bridgers (Pop Sad Rock - Female)", "Daniel Caesar (R&B/Soul - Male)"], interactive=True)
                        with gr.Row():
                            #curr_audio = gr.State("")
                            curr_audio = gr.HTML(label="Generated section")
                            regen = gr.Button("Submit edits")
                        
            
            section_meanings = gr.State(value="")
            approve_button.click(update_song_details, inputs=[instrumental_output], outputs=[genre_input, title_input, blurb_input]).then(get_sections, inputs=[blurb_input, instrumental_output], outputs=[section_meanings])
            continue_btn.click(get_starting_messages, inputs=[instrumental_textbox, title_input, blurb_input, genre_input, section_meanings], outputs=[messages, chatbot_history])

            with Modal(visible=False) as modal_0:
                gr.Markdown("Welcome to the AI songwriter! The AI songwriter is a chatbot that will help you write a song. You can chat with the AI and guide it however you'd like. Let's start by chatting with the AI.")
            with Modal(visible=False) as modal:
                gr.Markdown("The AI songwriter can respond to your stories and requests, generate lyrics and audio, and edit prior generations.\n\nNow, continue and respond to this second question from the AI songwriter to get to know you.")
            with Modal(visible=False) as modal_1:
                gr.Markdown("The AI songwriter has now proposed a first verse! After each generation from the AI, you'll receive a list of buttons to guide it further. Select the 'get audio snippet' button to continue to the next step.")
            with Modal(visible=False) as modal_2:
                gr.Markdown("Awesome! You generated your first audio snippet. The songwriter will continue for the each section for the rest of the song, revising and iterating with you. \n"
                            "As the song gets generated, feel free to ask the songwriter any questions or guide it in any direction. \n"
                            "You're ready to start your study with the AI Songwriter! Hit the 'Start' button to start.")
                start_button = gr.Button("Start")
            
            continue_btn.click(lambda: Modal(visible=True), None, modal_0)
            start_button.click(lambda: Modal(visible=False), None, modal_2)

            def make_modal_visible(step_number):
                new_step_number = step_number + 1 if step_number in [0, 1, 2] else step_number
                modals = [Modal(visible=i == step_number) for i in range(3)]
                return new_step_number, *modals
            
            def update_textbox(textbox, step_number):
                print("on step number", step_number)
                if step_number == 0:
                    return textbox + "\nAsk me another question to inform the verse"
                elif step_number == 1:
                    return textbox + "\nUse this info to write a verse"
                else:
                    return textbox
            
            def set_response_buttons(button_dict, button_name):
                print(button_name)
                return button_dict[button_name]

            def set_regenerate_query(textbox, current_section, current_lyrics, curr_tags, clip_to_continue):
                return f"Can you revise this section so it uses these lyrics and instrumentals and then generate an audio snippet using it?\nLyrics:\n{current_lyrics}Instrumental tags: {curr_tags}"
            def set_snippet_query(textbox):
                return "Can I have an audio snippet of what we have now?"
            def set_finish_query(textbox):
                return "I'm ready for the full song now! Can you finish it up?"
            def reset_textbox(textbox):
                return ""
            
            with gr.Row():
                textbox.render()
                submit.render()

                for btn in typical_responses[2:]:
                    btn.click(set_response_buttons, inputs=[button_dict, btn], outputs=[textbox]).then(model_chat, 
                                    inputs=[genre_input, textbox, chatbot_history, messages, generated_audios], 
                                    outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(
                                    update_response_options, [button_options, button_dict], typical_responses
                            ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )

    


            submit.click(update_textbox, [textbox, tutorial_step], [textbox]).then(model_chat,
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )
            textbox.submit(update_textbox, [textbox, tutorial_step], [textbox]).then(model_chat, 
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios], 
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )
            
            
            regen.click(set_regenerate_query, inputs=[textbox, current_section, current_lyrics, curr_tags, clip_to_continue], outputs=[textbox]).then(model_chat,
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                ).then(
                            make_modal_visible, [tutorial_step], [tutorial_step, modal, modal_1, modal_2]
                        )

            with gr.Row(visible=True):
                # get_snippet_button = gr.Button("Get Audio Snippet", scale=2)
                done = gr.Button("Complete User Study 🎶", scale=4)
                #autoGPT_checkbox = gr.Checkbox(label="AutoGPT", value=True, info="Auto-generate responses from journal entry", interactive=True, scale=2)
                #journal_llm_creativity = gr.Slider(label="Journal LLM Temperature", minimum=0, maximum=1, step=0.01, value=1, interactive=True, scale=2)
                reset_button = gr.Button("Reset", scale=2)
            
                def reset_chat(messages, chatbot_history):
                    messages = messages[:3]
                    chatbot_history = messages_to_history(messages[:3])
                    return messages, chatbot_history, '', '', '', '', gr.HTML('<center>generating...</center>'), [], []
                
                reset_button.click(reset_chat, inputs=[messages, chatbot_history], outputs=[messages, chatbot_history, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                )
            

            done.click(set_finish_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
                inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
                outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                )

            demo.load(reset_chat, inputs=[messages, chatbot_history], outputs=[messages, chatbot_history, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios, button_options]).then(
                        update_response_options, [button_options, button_dict], typical_responses
                )
            
            
            # with gr.Row():
            #     song_link = gr.State(value="")
            #     song = gr.HTML()
            


            def download_conversation(messages):
                with open(f'data/conversation_history.json', 'w') as f:
                    json.dump(messages, f)

            
            with gr.Accordion("Admin", open=False):
                download_btn = gr.Button("Download Conversation")
                download_btn.click(download_conversation, [messages], None)
            #     story_textbox = gr.TextArea(label="Story to provide context to songwriter", value="", max_lines=3)

            
            # get_snippet_button.click(set_snippet_query, inputs=[textbox], outputs=[textbox]).then(model_chat,
            #             inputs=[genre_input, textbox, chatbot_history, messages, generated_audios],
            #             outputs=[textbox, chatbot_history, messages, current_section, current_lyrics, curr_tags, clip_to_continue, curr_audio, generated_audios]).then(reset_textbox, inputs=[textbox], outputs=[textbox])



demo.queue(api_open=False)
demo.launch(max_threads=30)