|
{
|
|
"architectures": [
|
|
"ASTForAudioClassification"
|
|
],
|
|
"attention_probs_dropout_prob": 0.0,
|
|
"frequency_stride": 16,
|
|
"hidden_act": "gelu",
|
|
"hidden_dropout_prob": 0.0,
|
|
"hidden_size": 192,
|
|
"id2label": {
|
|
"0": "Speech",
|
|
"1": "Male speech, man speaking",
|
|
"2": "Female speech, woman speaking",
|
|
"3": "Child speech, kid speaking",
|
|
"4": "Conversation",
|
|
"5": "Narration, monologue",
|
|
"6": "Babbling",
|
|
"7": "Speech synthesizer",
|
|
"8": "Shout",
|
|
"9": "Bellow",
|
|
"10": "Whoop",
|
|
"11": "Yell",
|
|
"12": "Battle cry",
|
|
"13": "Children shouting",
|
|
"14": "Screaming",
|
|
"15": "Whispering",
|
|
"16": "Laughter",
|
|
"17": "Baby laughter",
|
|
"18": "Giggle",
|
|
"19": "Snicker",
|
|
"20": "Belly laugh",
|
|
"21": "Chuckle, chortle",
|
|
"22": "Crying, sobbing",
|
|
"23": "Baby cry, infant cry",
|
|
"24": "Whimper",
|
|
"25": "Wail, moan",
|
|
"26": "Sigh",
|
|
"27": "Singing",
|
|
"28": "Choir",
|
|
"29": "Yodeling",
|
|
"30": "Chant",
|
|
"31": "Mantra",
|
|
"32": "Male singing",
|
|
"33": "Female singing",
|
|
"34": "Child singing",
|
|
"35": "Synthetic singing",
|
|
"36": "Rapping",
|
|
"37": "Humming",
|
|
"38": "Groan",
|
|
"39": "Grunt",
|
|
"40": "Whistling",
|
|
"41": "Breathing",
|
|
"42": "Wheeze",
|
|
"43": "Snoring",
|
|
"44": "Gasp",
|
|
"45": "Pant",
|
|
"46": "Snort",
|
|
"47": "Cough",
|
|
"48": "Throat clearing",
|
|
"49": "Sneeze",
|
|
"50": "Sniff",
|
|
"51": "Run",
|
|
"52": "Shuffle",
|
|
"53": "Walk, footsteps",
|
|
"54": "Chewing, mastication",
|
|
"55": "Biting",
|
|
"56": "Gargling",
|
|
"57": "Stomach rumble",
|
|
"58": "Burping, eructation",
|
|
"59": "Hiccup",
|
|
"60": "Fart",
|
|
"61": "Hands",
|
|
"62": "Finger snapping",
|
|
"63": "Clapping",
|
|
"64": "Heart sounds, heartbeat",
|
|
"65": "Heart murmur",
|
|
"66": "Cheering",
|
|
"67": "Applause",
|
|
"68": "Chatter",
|
|
"69": "Crowd",
|
|
"70": "Hubbub, speech noise, speech babble",
|
|
"71": "Children playing",
|
|
"72": "Animal",
|
|
"73": "Domestic animals, pets",
|
|
"74": "Dog",
|
|
"75": "Bark",
|
|
"76": "Yip",
|
|
"77": "Howl",
|
|
"78": "Bow-wow",
|
|
"79": "Growling",
|
|
"80": "Whimper (dog)",
|
|
"81": "Cat",
|
|
"82": "Purr",
|
|
"83": "Meow",
|
|
"84": "Hiss",
|
|
"85": "Caterwaul",
|
|
"86": "Livestock, farm animals, working animals",
|
|
"87": "Horse",
|
|
"88": "Clip-clop",
|
|
"89": "Neigh, whinny",
|
|
"90": "Cattle, bovinae",
|
|
"91": "Moo",
|
|
"92": "Cowbell",
|
|
"93": "Pig",
|
|
"94": "Oink",
|
|
"95": "Goat",
|
|
"96": "Bleat",
|
|
"97": "Sheep",
|
|
"98": "Fowl",
|
|
"99": "Chicken, rooster",
|
|
"100": "Cluck",
|
|
"101": "Crowing, cock-a-doodle-doo",
|
|
"102": "Turkey",
|
|
"103": "Gobble",
|
|
"104": "Duck",
|
|
"105": "Quack",
|
|
"106": "Goose",
|
|
"107": "Honk",
|
|
"108": "Wild animals",
|
|
"109": "Roaring cats (lions, tigers)",
|
|
"110": "Roar",
|
|
"111": "Bird",
|
|
"112": "Bird vocalization, bird call, bird song",
|
|
"113": "Chirp, tweet",
|
|
"114": "Squawk",
|
|
"115": "Pigeon, dove",
|
|
"116": "Coo",
|
|
"117": "Crow",
|
|
"118": "Caw",
|
|
"119": "Owl",
|
|
"120": "Hoot",
|
|
"121": "Bird flight, flapping wings",
|
|
"122": "Canidae, dogs, wolves",
|
|
"123": "Rodents, rats, mice",
|
|
"124": "Mouse",
|
|
"125": "Patter",
|
|
"126": "Insect",
|
|
"127": "Cricket",
|
|
"128": "Mosquito",
|
|
"129": "Fly, housefly",
|
|
"130": "Buzz",
|
|
"131": "Bee, wasp, etc.",
|
|
"132": "Frog",
|
|
"133": "Croak",
|
|
"134": "Snake",
|
|
"135": "Rattle",
|
|
"136": "Whale vocalization",
|
|
"137": "Music",
|
|
"138": "Musical instrument",
|
|
"139": "Plucked string instrument",
|
|
"140": "Guitar",
|
|
"141": "Electric guitar",
|
|
"142": "Bass guitar",
|
|
"143": "Acoustic guitar",
|
|
"144": "Steel guitar, slide guitar",
|
|
"145": "Tapping (guitar technique)",
|
|
"146": "Strum",
|
|
"147": "Banjo",
|
|
"148": "Sitar",
|
|
"149": "Mandolin",
|
|
"150": "Zither",
|
|
"151": "Ukulele",
|
|
"152": "Keyboard (musical)",
|
|
"153": "Piano",
|
|
"154": "Electric piano",
|
|
"155": "Organ",
|
|
"156": "Electronic organ",
|
|
"157": "Hammond organ",
|
|
"158": "Synthesizer",
|
|
"159": "Sampler",
|
|
"160": "Harpsichord",
|
|
"161": "Percussion",
|
|
"162": "Drum kit",
|
|
"163": "Drum machine",
|
|
"164": "Drum",
|
|
"165": "Snare drum",
|
|
"166": "Rimshot",
|
|
"167": "Drum roll",
|
|
"168": "Bass drum",
|
|
"169": "Timpani",
|
|
"170": "Tabla",
|
|
"171": "Cymbal",
|
|
"172": "Hi-hat",
|
|
"173": "Wood block",
|
|
"174": "Tambourine",
|
|
"175": "Rattle (instrument)",
|
|
"176": "Maraca",
|
|
"177": "Gong",
|
|
"178": "Tubular bells",
|
|
"179": "Mallet percussion",
|
|
"180": "Marimba, xylophone",
|
|
"181": "Glockenspiel",
|
|
"182": "Vibraphone",
|
|
"183": "Steelpan",
|
|
"184": "Orchestra",
|
|
"185": "Brass instrument",
|
|
"186": "French horn",
|
|
"187": "Trumpet",
|
|
"188": "Trombone",
|
|
"189": "Bowed string instrument",
|
|
"190": "String section",
|
|
"191": "Violin, fiddle",
|
|
"192": "Pizzicato",
|
|
"193": "Cello",
|
|
"194": "Double bass",
|
|
"195": "Wind instrument, woodwind instrument",
|
|
"196": "Flute",
|
|
"197": "Saxophone",
|
|
"198": "Clarinet",
|
|
"199": "Harp",
|
|
"200": "Bell",
|
|
"201": "Church bell",
|
|
"202": "Jingle bell",
|
|
"203": "Bicycle bell",
|
|
"204": "Tuning fork",
|
|
"205": "Chime",
|
|
"206": "Wind chime",
|
|
"207": "Change ringing (campanology)",
|
|
"208": "Harmonica",
|
|
"209": "Accordion",
|
|
"210": "Bagpipes",
|
|
"211": "Didgeridoo",
|
|
"212": "Shofar",
|
|
"213": "Theremin",
|
|
"214": "Singing bowl",
|
|
"215": "Scratching (performance technique)",
|
|
"216": "Pop music",
|
|
"217": "Hip hop music",
|
|
"218": "Beatboxing",
|
|
"219": "Rock music",
|
|
"220": "Heavy metal",
|
|
"221": "Punk rock",
|
|
"222": "Grunge",
|
|
"223": "Progressive rock",
|
|
"224": "Rock and roll",
|
|
"225": "Psychedelic rock",
|
|
"226": "Rhythm and blues",
|
|
"227": "Soul music",
|
|
"228": "Reggae",
|
|
"229": "Country",
|
|
"230": "Swing music",
|
|
"231": "Bluegrass",
|
|
"232": "Funk",
|
|
"233": "Folk music",
|
|
"234": "Middle Eastern music",
|
|
"235": "Jazz",
|
|
"236": "Disco",
|
|
"237": "Classical music",
|
|
"238": "Opera",
|
|
"239": "Electronic music",
|
|
"240": "House music",
|
|
"241": "Techno",
|
|
"242": "Dubstep",
|
|
"243": "Drum and bass",
|
|
"244": "Electronica",
|
|
"245": "Electronic dance music",
|
|
"246": "Ambient music",
|
|
"247": "Trance music",
|
|
"248": "Music of Latin America",
|
|
"249": "Salsa music",
|
|
"250": "Flamenco",
|
|
"251": "Blues",
|
|
"252": "Music for children",
|
|
"253": "New-age music",
|
|
"254": "Vocal music",
|
|
"255": "A capella",
|
|
"256": "Music of Africa",
|
|
"257": "Afrobeat",
|
|
"258": "Christian music",
|
|
"259": "Gospel music",
|
|
"260": "Music of Asia",
|
|
"261": "Carnatic music",
|
|
"262": "Music of Bollywood",
|
|
"263": "Ska",
|
|
"264": "Traditional music",
|
|
"265": "Independent music",
|
|
"266": "Song",
|
|
"267": "Background music",
|
|
"268": "Theme music",
|
|
"269": "Jingle (music)",
|
|
"270": "Soundtrack music",
|
|
"271": "Lullaby",
|
|
"272": "Video game music",
|
|
"273": "Christmas music",
|
|
"274": "Dance music",
|
|
"275": "Wedding music",
|
|
"276": "Happy music",
|
|
"277": "Funny music",
|
|
"278": "Sad music",
|
|
"279": "Tender music",
|
|
"280": "Exciting music",
|
|
"281": "Angry music",
|
|
"282": "Scary music",
|
|
"283": "Wind",
|
|
"284": "Rustling leaves",
|
|
"285": "Wind noise (microphone)",
|
|
"286": "Thunderstorm",
|
|
"287": "Thunder",
|
|
"288": "Water",
|
|
"289": "Rain",
|
|
"290": "Raindrop",
|
|
"291": "Rain on surface",
|
|
"292": "Stream",
|
|
"293": "Waterfall",
|
|
"294": "Ocean",
|
|
"295": "Waves, surf",
|
|
"296": "Steam",
|
|
"297": "Gurgling",
|
|
"298": "Fire",
|
|
"299": "Crackle",
|
|
"300": "Vehicle",
|
|
"301": "Boat, Water vehicle",
|
|
"302": "Sailboat, sailing ship",
|
|
"303": "Rowboat, canoe, kayak",
|
|
"304": "Motorboat, speedboat",
|
|
"305": "Ship",
|
|
"306": "Motor vehicle (road)",
|
|
"307": "Car",
|
|
"308": "Vehicle horn, car horn, honking",
|
|
"309": "Toot",
|
|
"310": "Car alarm",
|
|
"311": "Power windows, electric windows",
|
|
"312": "Skidding",
|
|
"313": "Tire squeal",
|
|
"314": "Car passing by",
|
|
"315": "Race car, auto racing",
|
|
"316": "Truck",
|
|
"317": "Air brake",
|
|
"318": "Air horn, truck horn",
|
|
"319": "Reversing beeps",
|
|
"320": "Ice cream truck, ice cream van",
|
|
"321": "Bus",
|
|
"322": "Emergency vehicle",
|
|
"323": "Police car (siren)",
|
|
"324": "Ambulance (siren)",
|
|
"325": "Fire engine, fire truck (siren)",
|
|
"326": "Motorcycle",
|
|
"327": "Traffic noise, roadway noise",
|
|
"328": "Rail transport",
|
|
"329": "Train",
|
|
"330": "Train whistle",
|
|
"331": "Train horn",
|
|
"332": "Railroad car, train wagon",
|
|
"333": "Train wheels squealing",
|
|
"334": "Subway, metro, underground",
|
|
"335": "Aircraft",
|
|
"336": "Aircraft engine",
|
|
"337": "Jet engine",
|
|
"338": "Propeller, airscrew",
|
|
"339": "Helicopter",
|
|
"340": "Fixed-wing aircraft, airplane",
|
|
"341": "Bicycle",
|
|
"342": "Skateboard",
|
|
"343": "Engine",
|
|
"344": "Light engine (high frequency)",
|
|
"345": "Dental drill, dentist's drill",
|
|
"346": "Lawn mower",
|
|
"347": "Chainsaw",
|
|
"348": "Medium engine (mid frequency)",
|
|
"349": "Heavy engine (low frequency)",
|
|
"350": "Engine knocking",
|
|
"351": "Engine starting",
|
|
"352": "Idling",
|
|
"353": "Accelerating, revving, vroom",
|
|
"354": "Door",
|
|
"355": "Doorbell",
|
|
"356": "Ding-dong",
|
|
"357": "Sliding door",
|
|
"358": "Slam",
|
|
"359": "Knock",
|
|
"360": "Tap",
|
|
"361": "Squeak",
|
|
"362": "Cupboard open or close",
|
|
"363": "Drawer open or close",
|
|
"364": "Dishes, pots, and pans",
|
|
"365": "Cutlery, silverware",
|
|
"366": "Chopping (food)",
|
|
"367": "Frying (food)",
|
|
"368": "Microwave oven",
|
|
"369": "Blender",
|
|
"370": "Water tap, faucet",
|
|
"371": "Sink (filling or washing)",
|
|
"372": "Bathtub (filling or washing)",
|
|
"373": "Hair dryer",
|
|
"374": "Toilet flush",
|
|
"375": "Toothbrush",
|
|
"376": "Electric toothbrush",
|
|
"377": "Vacuum cleaner",
|
|
"378": "Zipper (clothing)",
|
|
"379": "Keys jangling",
|
|
"380": "Coin (dropping)",
|
|
"381": "Scissors",
|
|
"382": "Electric shaver, electric razor",
|
|
"383": "Shuffling cards",
|
|
"384": "Typing",
|
|
"385": "Typewriter",
|
|
"386": "Computer keyboard",
|
|
"387": "Writing",
|
|
"388": "Alarm",
|
|
"389": "Telephone",
|
|
"390": "Telephone bell ringing",
|
|
"391": "Ringtone",
|
|
"392": "Telephone dialing, DTMF",
|
|
"393": "Dial tone",
|
|
"394": "Busy signal",
|
|
"395": "Alarm clock",
|
|
"396": "Siren",
|
|
"397": "Civil defense siren",
|
|
"398": "Buzzer",
|
|
"399": "Smoke detector, smoke alarm",
|
|
"400": "Fire alarm",
|
|
"401": "Foghorn",
|
|
"402": "Whistle",
|
|
"403": "Steam whistle",
|
|
"404": "Mechanisms",
|
|
"405": "Ratchet, pawl",
|
|
"406": "Clock",
|
|
"407": "Tick",
|
|
"408": "Tick-tock",
|
|
"409": "Gears",
|
|
"410": "Pulleys",
|
|
"411": "Sewing machine",
|
|
"412": "Mechanical fan",
|
|
"413": "Air conditioning",
|
|
"414": "Cash register",
|
|
"415": "Printer",
|
|
"416": "Camera",
|
|
"417": "Single-lens reflex camera",
|
|
"418": "Tools",
|
|
"419": "Hammer",
|
|
"420": "Jackhammer",
|
|
"421": "Sawing",
|
|
"422": "Filing (rasp)",
|
|
"423": "Sanding",
|
|
"424": "Power tool",
|
|
"425": "Drill",
|
|
"426": "Explosion",
|
|
"427": "Gunshot, gunfire",
|
|
"428": "Machine gun",
|
|
"429": "Fusillade",
|
|
"430": "Artillery fire",
|
|
"431": "Cap gun",
|
|
"432": "Fireworks",
|
|
"433": "Firecracker",
|
|
"434": "Burst, pop",
|
|
"435": "Eruption",
|
|
"436": "Boom",
|
|
"437": "Wood",
|
|
"438": "Chop",
|
|
"439": "Splinter",
|
|
"440": "Crack",
|
|
"441": "Glass",
|
|
"442": "Chink, clink",
|
|
"443": "Shatter",
|
|
"444": "Liquid",
|
|
"445": "Splash, splatter",
|
|
"446": "Slosh",
|
|
"447": "Squish",
|
|
"448": "Drip",
|
|
"449": "Pour",
|
|
"450": "Trickle, dribble",
|
|
"451": "Gush",
|
|
"452": "Fill (with liquid)",
|
|
"453": "Spray",
|
|
"454": "Pump (liquid)",
|
|
"455": "Stir",
|
|
"456": "Boiling",
|
|
"457": "Sonar",
|
|
"458": "Arrow",
|
|
"459": "Whoosh, swoosh, swish",
|
|
"460": "Thump, thud",
|
|
"461": "Thunk",
|
|
"462": "Electronic tuner",
|
|
"463": "Effects unit",
|
|
"464": "Chorus effect",
|
|
"465": "Basketball bounce",
|
|
"466": "Bang",
|
|
"467": "Slap, smack",
|
|
"468": "Whack, thwack",
|
|
"469": "Smash, crash",
|
|
"470": "Breaking",
|
|
"471": "Bouncing",
|
|
"472": "Whip",
|
|
"473": "Flap",
|
|
"474": "Scratch",
|
|
"475": "Scrape",
|
|
"476": "Rub",
|
|
"477": "Roll",
|
|
"478": "Crushing",
|
|
"479": "Crumpling, crinkling",
|
|
"480": "Tearing",
|
|
"481": "Beep, bleep",
|
|
"482": "Ping",
|
|
"483": "Ding",
|
|
"484": "Clang",
|
|
"485": "Squeal",
|
|
"486": "Creak",
|
|
"487": "Rustle",
|
|
"488": "Whir",
|
|
"489": "Clatter",
|
|
"490": "Sizzle",
|
|
"491": "Clicking",
|
|
"492": "Clickety-clack",
|
|
"493": "Rumble",
|
|
"494": "Plop",
|
|
"495": "Jingle, tinkle",
|
|
"496": "Hum",
|
|
"497": "Zing",
|
|
"498": "Boing",
|
|
"499": "Crunch",
|
|
"500": "Silence",
|
|
"501": "Sine wave",
|
|
"502": "Harmonic",
|
|
"503": "Chirp tone",
|
|
"504": "Sound effect",
|
|
"505": "Pulse",
|
|
"506": "Inside, small room",
|
|
"507": "Inside, large room or hall",
|
|
"508": "Inside, public space",
|
|
"509": "Outside, urban or manmade",
|
|
"510": "Outside, rural or natural",
|
|
"511": "Reverberation",
|
|
"512": "Echo",
|
|
"513": "Noise",
|
|
"514": "Environmental noise",
|
|
"515": "Static",
|
|
"516": "Mains hum",
|
|
"517": "Distortion",
|
|
"518": "Sidetone",
|
|
"519": "Cacophony",
|
|
"520": "White noise",
|
|
"521": "Pink noise",
|
|
"522": "Throbbing",
|
|
"523": "Vibration",
|
|
"524": "Television",
|
|
"525": "Radio",
|
|
"526": "Field recording"
|
|
},
|
|
"initializer_range": 0.02,
|
|
"intermediate_size": 768,
|
|
"label2id": {
|
|
"A capella": 255,
|
|
"Accelerating, revving, vroom": 353,
|
|
"Accordion": 209,
|
|
"Acoustic guitar": 143,
|
|
"Afrobeat": 257,
|
|
"Air brake": 317,
|
|
"Air conditioning": 413,
|
|
"Air horn, truck horn": 318,
|
|
"Aircraft": 335,
|
|
"Aircraft engine": 336,
|
|
"Alarm": 388,
|
|
"Alarm clock": 395,
|
|
"Ambient music": 246,
|
|
"Ambulance (siren)": 324,
|
|
"Angry music": 281,
|
|
"Animal": 72,
|
|
"Applause": 67,
|
|
"Arrow": 458,
|
|
"Artillery fire": 430,
|
|
"Babbling": 6,
|
|
"Baby cry, infant cry": 23,
|
|
"Baby laughter": 17,
|
|
"Background music": 267,
|
|
"Bagpipes": 210,
|
|
"Bang": 466,
|
|
"Banjo": 147,
|
|
"Bark": 75,
|
|
"Basketball bounce": 465,
|
|
"Bass drum": 168,
|
|
"Bass guitar": 142,
|
|
"Bathtub (filling or washing)": 372,
|
|
"Battle cry": 12,
|
|
"Beatboxing": 218,
|
|
"Bee, wasp, etc.": 131,
|
|
"Beep, bleep": 481,
|
|
"Bell": 200,
|
|
"Bellow": 9,
|
|
"Belly laugh": 20,
|
|
"Bicycle": 341,
|
|
"Bicycle bell": 203,
|
|
"Bird": 111,
|
|
"Bird flight, flapping wings": 121,
|
|
"Bird vocalization, bird call, bird song": 112,
|
|
"Biting": 55,
|
|
"Bleat": 96,
|
|
"Blender": 369,
|
|
"Bluegrass": 231,
|
|
"Blues": 251,
|
|
"Boat, Water vehicle": 301,
|
|
"Boiling": 456,
|
|
"Boing": 498,
|
|
"Boom": 436,
|
|
"Bouncing": 471,
|
|
"Bow-wow": 78,
|
|
"Bowed string instrument": 189,
|
|
"Brass instrument": 185,
|
|
"Breaking": 470,
|
|
"Breathing": 41,
|
|
"Burping, eructation": 58,
|
|
"Burst, pop": 434,
|
|
"Bus": 321,
|
|
"Busy signal": 394,
|
|
"Buzz": 130,
|
|
"Buzzer": 398,
|
|
"Cacophony": 519,
|
|
"Camera": 416,
|
|
"Canidae, dogs, wolves": 122,
|
|
"Cap gun": 431,
|
|
"Car": 307,
|
|
"Car alarm": 310,
|
|
"Car passing by": 314,
|
|
"Carnatic music": 261,
|
|
"Cash register": 414,
|
|
"Cat": 81,
|
|
"Caterwaul": 85,
|
|
"Cattle, bovinae": 90,
|
|
"Caw": 118,
|
|
"Cello": 193,
|
|
"Chainsaw": 347,
|
|
"Change ringing (campanology)": 207,
|
|
"Chant": 30,
|
|
"Chatter": 68,
|
|
"Cheering": 66,
|
|
"Chewing, mastication": 54,
|
|
"Chicken, rooster": 99,
|
|
"Child singing": 34,
|
|
"Child speech, kid speaking": 3,
|
|
"Children playing": 71,
|
|
"Children shouting": 13,
|
|
"Chime": 205,
|
|
"Chink, clink": 442,
|
|
"Chirp tone": 503,
|
|
"Chirp, tweet": 113,
|
|
"Choir": 28,
|
|
"Chop": 438,
|
|
"Chopping (food)": 366,
|
|
"Chorus effect": 464,
|
|
"Christian music": 258,
|
|
"Christmas music": 273,
|
|
"Chuckle, chortle": 21,
|
|
"Church bell": 201,
|
|
"Civil defense siren": 397,
|
|
"Clang": 484,
|
|
"Clapping": 63,
|
|
"Clarinet": 198,
|
|
"Classical music": 237,
|
|
"Clatter": 489,
|
|
"Clickety-clack": 492,
|
|
"Clicking": 491,
|
|
"Clip-clop": 88,
|
|
"Clock": 406,
|
|
"Cluck": 100,
|
|
"Coin (dropping)": 380,
|
|
"Computer keyboard": 386,
|
|
"Conversation": 4,
|
|
"Coo": 116,
|
|
"Cough": 47,
|
|
"Country": 229,
|
|
"Cowbell": 92,
|
|
"Crack": 440,
|
|
"Crackle": 299,
|
|
"Creak": 486,
|
|
"Cricket": 127,
|
|
"Croak": 133,
|
|
"Crow": 117,
|
|
"Crowd": 69,
|
|
"Crowing, cock-a-doodle-doo": 101,
|
|
"Crumpling, crinkling": 479,
|
|
"Crunch": 499,
|
|
"Crushing": 478,
|
|
"Crying, sobbing": 22,
|
|
"Cupboard open or close": 362,
|
|
"Cutlery, silverware": 365,
|
|
"Cymbal": 171,
|
|
"Dance music": 274,
|
|
"Dental drill, dentist's drill": 345,
|
|
"Dial tone": 393,
|
|
"Didgeridoo": 211,
|
|
"Ding": 483,
|
|
"Ding-dong": 356,
|
|
"Disco": 236,
|
|
"Dishes, pots, and pans": 364,
|
|
"Distortion": 517,
|
|
"Dog": 74,
|
|
"Domestic animals, pets": 73,
|
|
"Door": 354,
|
|
"Doorbell": 355,
|
|
"Double bass": 194,
|
|
"Drawer open or close": 363,
|
|
"Drill": 425,
|
|
"Drip": 448,
|
|
"Drum": 164,
|
|
"Drum and bass": 243,
|
|
"Drum kit": 162,
|
|
"Drum machine": 163,
|
|
"Drum roll": 167,
|
|
"Dubstep": 242,
|
|
"Duck": 104,
|
|
"Echo": 512,
|
|
"Effects unit": 463,
|
|
"Electric guitar": 141,
|
|
"Electric piano": 154,
|
|
"Electric shaver, electric razor": 382,
|
|
"Electric toothbrush": 376,
|
|
"Electronic dance music": 245,
|
|
"Electronic music": 239,
|
|
"Electronic organ": 156,
|
|
"Electronic tuner": 462,
|
|
"Electronica": 244,
|
|
"Emergency vehicle": 322,
|
|
"Engine": 343,
|
|
"Engine knocking": 350,
|
|
"Engine starting": 351,
|
|
"Environmental noise": 514,
|
|
"Eruption": 435,
|
|
"Exciting music": 280,
|
|
"Explosion": 426,
|
|
"Fart": 60,
|
|
"Female singing": 33,
|
|
"Female speech, woman speaking": 2,
|
|
"Field recording": 526,
|
|
"Filing (rasp)": 422,
|
|
"Fill (with liquid)": 452,
|
|
"Finger snapping": 62,
|
|
"Fire": 298,
|
|
"Fire alarm": 400,
|
|
"Fire engine, fire truck (siren)": 325,
|
|
"Firecracker": 433,
|
|
"Fireworks": 432,
|
|
"Fixed-wing aircraft, airplane": 340,
|
|
"Flamenco": 250,
|
|
"Flap": 473,
|
|
"Flute": 196,
|
|
"Fly, housefly": 129,
|
|
"Foghorn": 401,
|
|
"Folk music": 233,
|
|
"Fowl": 98,
|
|
"French horn": 186,
|
|
"Frog": 132,
|
|
"Frying (food)": 367,
|
|
"Funk": 232,
|
|
"Funny music": 277,
|
|
"Fusillade": 429,
|
|
"Gargling": 56,
|
|
"Gasp": 44,
|
|
"Gears": 409,
|
|
"Giggle": 18,
|
|
"Glass": 441,
|
|
"Glockenspiel": 181,
|
|
"Goat": 95,
|
|
"Gobble": 103,
|
|
"Gong": 177,
|
|
"Goose": 106,
|
|
"Gospel music": 259,
|
|
"Groan": 38,
|
|
"Growling": 79,
|
|
"Grunge": 222,
|
|
"Grunt": 39,
|
|
"Guitar": 140,
|
|
"Gunshot, gunfire": 427,
|
|
"Gurgling": 297,
|
|
"Gush": 451,
|
|
"Hair dryer": 373,
|
|
"Hammer": 419,
|
|
"Hammond organ": 157,
|
|
"Hands": 61,
|
|
"Happy music": 276,
|
|
"Harmonic": 502,
|
|
"Harmonica": 208,
|
|
"Harp": 199,
|
|
"Harpsichord": 160,
|
|
"Heart murmur": 65,
|
|
"Heart sounds, heartbeat": 64,
|
|
"Heavy engine (low frequency)": 349,
|
|
"Heavy metal": 220,
|
|
"Helicopter": 339,
|
|
"Hi-hat": 172,
|
|
"Hiccup": 59,
|
|
"Hip hop music": 217,
|
|
"Hiss": 84,
|
|
"Honk": 107,
|
|
"Hoot": 120,
|
|
"Horse": 87,
|
|
"House music": 240,
|
|
"Howl": 77,
|
|
"Hubbub, speech noise, speech babble": 70,
|
|
"Hum": 496,
|
|
"Humming": 37,
|
|
"Ice cream truck, ice cream van": 320,
|
|
"Idling": 352,
|
|
"Independent music": 265,
|
|
"Insect": 126,
|
|
"Inside, large room or hall": 507,
|
|
"Inside, public space": 508,
|
|
"Inside, small room": 506,
|
|
"Jackhammer": 420,
|
|
"Jazz": 235,
|
|
"Jet engine": 337,
|
|
"Jingle (music)": 269,
|
|
"Jingle bell": 202,
|
|
"Jingle, tinkle": 495,
|
|
"Keyboard (musical)": 152,
|
|
"Keys jangling": 379,
|
|
"Knock": 359,
|
|
"Laughter": 16,
|
|
"Lawn mower": 346,
|
|
"Light engine (high frequency)": 344,
|
|
"Liquid": 444,
|
|
"Livestock, farm animals, working animals": 86,
|
|
"Lullaby": 271,
|
|
"Machine gun": 428,
|
|
"Mains hum": 516,
|
|
"Male singing": 32,
|
|
"Male speech, man speaking": 1,
|
|
"Mallet percussion": 179,
|
|
"Mandolin": 149,
|
|
"Mantra": 31,
|
|
"Maraca": 176,
|
|
"Marimba, xylophone": 180,
|
|
"Mechanical fan": 412,
|
|
"Mechanisms": 404,
|
|
"Medium engine (mid frequency)": 348,
|
|
"Meow": 83,
|
|
"Microwave oven": 368,
|
|
"Middle Eastern music": 234,
|
|
"Moo": 91,
|
|
"Mosquito": 128,
|
|
"Motor vehicle (road)": 306,
|
|
"Motorboat, speedboat": 304,
|
|
"Motorcycle": 326,
|
|
"Mouse": 124,
|
|
"Music": 137,
|
|
"Music for children": 252,
|
|
"Music of Africa": 256,
|
|
"Music of Asia": 260,
|
|
"Music of Bollywood": 262,
|
|
"Music of Latin America": 248,
|
|
"Musical instrument": 138,
|
|
"Narration, monologue": 5,
|
|
"Neigh, whinny": 89,
|
|
"New-age music": 253,
|
|
"Noise": 513,
|
|
"Ocean": 294,
|
|
"Oink": 94,
|
|
"Opera": 238,
|
|
"Orchestra": 184,
|
|
"Organ": 155,
|
|
"Outside, rural or natural": 510,
|
|
"Outside, urban or manmade": 509,
|
|
"Owl": 119,
|
|
"Pant": 45,
|
|
"Patter": 125,
|
|
"Percussion": 161,
|
|
"Piano": 153,
|
|
"Pig": 93,
|
|
"Pigeon, dove": 115,
|
|
"Ping": 482,
|
|
"Pink noise": 521,
|
|
"Pizzicato": 192,
|
|
"Plop": 494,
|
|
"Plucked string instrument": 139,
|
|
"Police car (siren)": 323,
|
|
"Pop music": 216,
|
|
"Pour": 449,
|
|
"Power tool": 424,
|
|
"Power windows, electric windows": 311,
|
|
"Printer": 415,
|
|
"Progressive rock": 223,
|
|
"Propeller, airscrew": 338,
|
|
"Psychedelic rock": 225,
|
|
"Pulleys": 410,
|
|
"Pulse": 505,
|
|
"Pump (liquid)": 454,
|
|
"Punk rock": 221,
|
|
"Purr": 82,
|
|
"Quack": 105,
|
|
"Race car, auto racing": 315,
|
|
"Radio": 525,
|
|
"Rail transport": 328,
|
|
"Railroad car, train wagon": 332,
|
|
"Rain": 289,
|
|
"Rain on surface": 291,
|
|
"Raindrop": 290,
|
|
"Rapping": 36,
|
|
"Ratchet, pawl": 405,
|
|
"Rattle": 135,
|
|
"Rattle (instrument)": 175,
|
|
"Reggae": 228,
|
|
"Reverberation": 511,
|
|
"Reversing beeps": 319,
|
|
"Rhythm and blues": 226,
|
|
"Rimshot": 166,
|
|
"Ringtone": 391,
|
|
"Roar": 110,
|
|
"Roaring cats (lions, tigers)": 109,
|
|
"Rock and roll": 224,
|
|
"Rock music": 219,
|
|
"Rodents, rats, mice": 123,
|
|
"Roll": 477,
|
|
"Rowboat, canoe, kayak": 303,
|
|
"Rub": 476,
|
|
"Rumble": 493,
|
|
"Run": 51,
|
|
"Rustle": 487,
|
|
"Rustling leaves": 284,
|
|
"Sad music": 278,
|
|
"Sailboat, sailing ship": 302,
|
|
"Salsa music": 249,
|
|
"Sampler": 159,
|
|
"Sanding": 423,
|
|
"Sawing": 421,
|
|
"Saxophone": 197,
|
|
"Scary music": 282,
|
|
"Scissors": 381,
|
|
"Scrape": 475,
|
|
"Scratch": 474,
|
|
"Scratching (performance technique)": 215,
|
|
"Screaming": 14,
|
|
"Sewing machine": 411,
|
|
"Shatter": 443,
|
|
"Sheep": 97,
|
|
"Ship": 305,
|
|
"Shofar": 212,
|
|
"Shout": 8,
|
|
"Shuffle": 52,
|
|
"Shuffling cards": 383,
|
|
"Sidetone": 518,
|
|
"Sigh": 26,
|
|
"Silence": 500,
|
|
"Sine wave": 501,
|
|
"Singing": 27,
|
|
"Singing bowl": 214,
|
|
"Single-lens reflex camera": 417,
|
|
"Sink (filling or washing)": 371,
|
|
"Siren": 396,
|
|
"Sitar": 148,
|
|
"Sizzle": 490,
|
|
"Ska": 263,
|
|
"Skateboard": 342,
|
|
"Skidding": 312,
|
|
"Slam": 358,
|
|
"Slap, smack": 467,
|
|
"Sliding door": 357,
|
|
"Slosh": 446,
|
|
"Smash, crash": 469,
|
|
"Smoke detector, smoke alarm": 399,
|
|
"Snake": 134,
|
|
"Snare drum": 165,
|
|
"Sneeze": 49,
|
|
"Snicker": 19,
|
|
"Sniff": 50,
|
|
"Snoring": 43,
|
|
"Snort": 46,
|
|
"Sonar": 457,
|
|
"Song": 266,
|
|
"Soul music": 227,
|
|
"Sound effect": 504,
|
|
"Soundtrack music": 270,
|
|
"Speech": 0,
|
|
"Speech synthesizer": 7,
|
|
"Splash, splatter": 445,
|
|
"Splinter": 439,
|
|
"Spray": 453,
|
|
"Squawk": 114,
|
|
"Squeak": 361,
|
|
"Squeal": 485,
|
|
"Squish": 447,
|
|
"Static": 515,
|
|
"Steam": 296,
|
|
"Steam whistle": 403,
|
|
"Steel guitar, slide guitar": 144,
|
|
"Steelpan": 183,
|
|
"Stir": 455,
|
|
"Stomach rumble": 57,
|
|
"Stream": 292,
|
|
"String section": 190,
|
|
"Strum": 146,
|
|
"Subway, metro, underground": 334,
|
|
"Swing music": 230,
|
|
"Synthesizer": 158,
|
|
"Synthetic singing": 35,
|
|
"Tabla": 170,
|
|
"Tambourine": 174,
|
|
"Tap": 360,
|
|
"Tapping (guitar technique)": 145,
|
|
"Tearing": 480,
|
|
"Techno": 241,
|
|
"Telephone": 389,
|
|
"Telephone bell ringing": 390,
|
|
"Telephone dialing, DTMF": 392,
|
|
"Television": 524,
|
|
"Tender music": 279,
|
|
"Theme music": 268,
|
|
"Theremin": 213,
|
|
"Throat clearing": 48,
|
|
"Throbbing": 522,
|
|
"Thump, thud": 460,
|
|
"Thunder": 287,
|
|
"Thunderstorm": 286,
|
|
"Thunk": 461,
|
|
"Tick": 407,
|
|
"Tick-tock": 408,
|
|
"Timpani": 169,
|
|
"Tire squeal": 313,
|
|
"Toilet flush": 374,
|
|
"Tools": 418,
|
|
"Toot": 309,
|
|
"Toothbrush": 375,
|
|
"Traditional music": 264,
|
|
"Traffic noise, roadway noise": 327,
|
|
"Train": 329,
|
|
"Train horn": 331,
|
|
"Train wheels squealing": 333,
|
|
"Train whistle": 330,
|
|
"Trance music": 247,
|
|
"Trickle, dribble": 450,
|
|
"Trombone": 188,
|
|
"Truck": 316,
|
|
"Trumpet": 187,
|
|
"Tubular bells": 178,
|
|
"Tuning fork": 204,
|
|
"Turkey": 102,
|
|
"Typewriter": 385,
|
|
"Typing": 384,
|
|
"Ukulele": 151,
|
|
"Vacuum cleaner": 377,
|
|
"Vehicle": 300,
|
|
"Vehicle horn, car horn, honking": 308,
|
|
"Vibraphone": 182,
|
|
"Vibration": 523,
|
|
"Video game music": 272,
|
|
"Violin, fiddle": 191,
|
|
"Vocal music": 254,
|
|
"Wail, moan": 25,
|
|
"Walk, footsteps": 53,
|
|
"Water": 288,
|
|
"Water tap, faucet": 370,
|
|
"Waterfall": 293,
|
|
"Waves, surf": 295,
|
|
"Wedding music": 275,
|
|
"Whack, thwack": 468,
|
|
"Whale vocalization": 136,
|
|
"Wheeze": 42,
|
|
"Whimper": 24,
|
|
"Whimper (dog)": 80,
|
|
"Whip": 472,
|
|
"Whir": 488,
|
|
"Whispering": 15,
|
|
"Whistle": 402,
|
|
"Whistling": 40,
|
|
"White noise": 520,
|
|
"Whoop": 10,
|
|
"Whoosh, swoosh, swish": 459,
|
|
"Wild animals": 108,
|
|
"Wind": 283,
|
|
"Wind chime": 206,
|
|
"Wind instrument, woodwind instrument": 195,
|
|
"Wind noise (microphone)": 285,
|
|
"Wood": 437,
|
|
"Wood block": 173,
|
|
"Writing": 387,
|
|
"Yell": 11,
|
|
"Yip": 76,
|
|
"Yodeling": 29,
|
|
"Zing": 497,
|
|
"Zipper (clothing)": 378,
|
|
"Zither": 150
|
|
},
|
|
"layer_norm_eps": 1e-12,
|
|
"max_length": 1024,
|
|
"model_type": "audio-spectrogram-transformer",
|
|
"num_attention_heads": 3,
|
|
"num_hidden_layers": 12,
|
|
"num_mel_bins": 128,
|
|
"patch_size": 16,
|
|
"qkv_bias": true,
|
|
"time_stride": 16,
|
|
"torch_dtype": "float32",
|
|
"transformers_version": "4.36.1"
|
|
}
|
|
|