Spaces:
Running
Running
Add more examples v3
Browse files
app.py
CHANGED
@@ -81,13 +81,15 @@ title = "Audio-Grounded Contrastive Learning"
|
|
81 |
description = """<p>
|
82 |
This is a simple demo of our WACV'24 paper 'Can CLIP Help Sound Source Localization?', zero-shot visual sound localization.<br><br>
|
83 |
To use it simply upload an image and corresponding audio to mask (identify in the image), or use one of the examples below and click ‘submit’.<br><br>
|
84 |
-
Results will show up in a few seconds.
|
|
|
85 |
</p>"""
|
86 |
|
87 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2311.04066'>Can CLIP Help Sound Source Localization?</a> | <a href='https://github.com/swimmiing/ACL-SSL'>Offical Github repo</a></p>"
|
88 |
|
89 |
examples = [['./asset/web_image1.jpeg', './asset/web_dog_barking.wav'],
|
90 |
['./asset/web_image1.jpeg', './asset/web_childs_laugh.wav'],
|
|
|
91 |
['./asset/web_image1.jpeg', './asset/web_motorcycle_pass_by.wav'],
|
92 |
['./asset/web_image2.jpeg', './asset/web_dog_barking.wav'],
|
93 |
['./asset/web_image2.jpeg', './asset/web_female_speech.wav'],
|
|
|
81 |
description = """<p>
|
82 |
This is a simple demo of our WACV'24 paper 'Can CLIP Help Sound Source Localization?', zero-shot visual sound localization.<br><br>
|
83 |
To use it simply upload an image and corresponding audio to mask (identify in the image), or use one of the examples below and click ‘submit’.<br><br>
|
84 |
+
Results will show up in a few seconds. <br><br>
|
85 |
+
It is recommended to use audio sources with a sample rate of 16 kHz or higher, and the model does not utilize audio beyond the initial 10 seconds.
|
86 |
</p>"""
|
87 |
|
88 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2311.04066'>Can CLIP Help Sound Source Localization?</a> | <a href='https://github.com/swimmiing/ACL-SSL'>Offical Github repo</a></p>"
|
89 |
|
90 |
examples = [['./asset/web_image1.jpeg', './asset/web_dog_barking.wav'],
|
91 |
['./asset/web_image1.jpeg', './asset/web_childs_laugh.wav'],
|
92 |
+
['./asset/web_image1.jpeg', './asset/web_car_horns.wav'],
|
93 |
['./asset/web_image1.jpeg', './asset/web_motorcycle_pass_by.wav'],
|
94 |
['./asset/web_image2.jpeg', './asset/web_dog_barking.wav'],
|
95 |
['./asset/web_image2.jpeg', './asset/web_female_speech.wav'],
|