Spaces:

swimmiing
/

ACL-SSL-zeroshot-demo

Running

swimmiing commited on Dec 15, 2023

Commit

fd01b14

•

1 Parent(s): 59910f7

Add more examples v3

Files changed (1) hide show

app.py CHANGED Viewed

@@ -81,13 +81,15 @@ title = "Audio-Grounded Contrastive Learning"
 description = """<p>
 This is a simple demo of our WACV'24 paper 'Can CLIP Help Sound Source Localization?', zero-shot visual sound localization.<br><br>
 To use it simply upload an image and corresponding audio to mask (identify in the image), or use one of the examples below and click ‘submit’.<br><br>
-Results will show up in a few seconds.
 </p>"""
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2311.04066'>Can CLIP Help Sound Source Localization?</a> | <a href='https://github.com/swimmiing/ACL-SSL'>Offical Github repo</a></p>"
 examples = [['./asset/web_image1.jpeg', './asset/web_dog_barking.wav'],
             ['./asset/web_image1.jpeg', './asset/web_childs_laugh.wav'],
             ['./asset/web_image1.jpeg', './asset/web_motorcycle_pass_by.wav'],
             ['./asset/web_image2.jpeg', './asset/web_dog_barking.wav'],
             ['./asset/web_image2.jpeg', './asset/web_female_speech.wav'],

 description = """<p>
 This is a simple demo of our WACV'24 paper 'Can CLIP Help Sound Source Localization?', zero-shot visual sound localization.<br><br>
 To use it simply upload an image and corresponding audio to mask (identify in the image), or use one of the examples below and click ‘submit’.<br><br>
+Results will show up in a few seconds. <br><br>
+It is recommended to use audio sources with a sample rate of 16 kHz or higher, and the model does not utilize audio beyond the initial 10 seconds.
 </p>"""
 article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2311.04066'>Can CLIP Help Sound Source Localization?</a> | <a href='https://github.com/swimmiing/ACL-SSL'>Offical Github repo</a></p>"
 examples = [['./asset/web_image1.jpeg', './asset/web_dog_barking.wav'],
             ['./asset/web_image1.jpeg', './asset/web_childs_laugh.wav'],
+            ['./asset/web_image1.jpeg', './asset/web_car_horns.wav'],
             ['./asset/web_image1.jpeg', './asset/web_motorcycle_pass_by.wav'],
             ['./asset/web_image2.jpeg', './asset/web_dog_barking.wav'],
             ['./asset/web_image2.jpeg', './asset/web_female_speech.wav'],