Spaces:
Runtime error
Runtime error
nsthorat-lilac
commited on
Commit
•
308a822
1
Parent(s):
39638a5
5e0c8617b3f7c000255740d8cc83e9b97f3540affbf45d9107328eec05f9afca
Browse files- .gitattributes +6 -0
- data/datasets/local/spotify/data-00000-of-00001.parquet +3 -0
- data/datasets/local/spotify/manifest.json +27 -0
- data/datasets/local/spotify/settings.json +1 -0
- data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet +3 -0
- data/datasets/local/spotify/text/lang_detection/signal_manifest.json +36 -0
- data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet +3 -0
- data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet +3 -0
- data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/signal_manifest.json +64 -0
- data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl +3 -0
- data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy +3 -0
- data/datasets/local/spotify/text/sbert/signal_manifest.json +37 -0
.gitattributes
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data/datasets/local/spotify/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
2 |
+
data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
3 |
+
data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
4 |
+
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet filter=lfs diff=lfs merge=lfs -text
|
5 |
+
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl filter=lfs diff=lfs merge=lfs -text
|
6 |
+
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy filter=lfs diff=lfs merge=lfs -text
|
data/datasets/local/spotify/data-00000-of-00001.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32224657332b09187a737c73ab634f9d14c9ba9a240bd105f1b9819cde2afcef
|
3 |
+
size 37128682
|
data/datasets/local/spotify/manifest.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"files": [
|
3 |
+
"data-00000-of-00001.parquet"
|
4 |
+
],
|
5 |
+
"data_schema": {
|
6 |
+
"fields": {
|
7 |
+
"artist": {
|
8 |
+
"dtype": "string"
|
9 |
+
},
|
10 |
+
"song": {
|
11 |
+
"dtype": "string"
|
12 |
+
},
|
13 |
+
"link": {
|
14 |
+
"dtype": "string"
|
15 |
+
},
|
16 |
+
"text": {
|
17 |
+
"dtype": "string"
|
18 |
+
},
|
19 |
+
"__line_number__": {
|
20 |
+
"dtype": "int64"
|
21 |
+
},
|
22 |
+
"__rowid__": {
|
23 |
+
"dtype": "string"
|
24 |
+
}
|
25 |
+
}
|
26 |
+
}
|
27 |
+
}
|
data/datasets/local/spotify/settings.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ui": {"media_paths": [["text"]]}}
|
data/datasets/local/spotify/text/lang_detection/data-00000-of-00001.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f1555427c8dc3b2f1e9310f5e71b46297e607f710365e107c73c894d5a8e1b0
|
3 |
+
size 2033407
|
data/datasets/local/spotify/text/lang_detection/signal_manifest.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"files": [
|
3 |
+
"data-00000-of-00001.parquet"
|
4 |
+
],
|
5 |
+
"parquet_id": "lang_detection(text)",
|
6 |
+
"data_schema": {
|
7 |
+
"fields": {
|
8 |
+
"__rowid__": {
|
9 |
+
"dtype": "string"
|
10 |
+
},
|
11 |
+
"text": {
|
12 |
+
"fields": {
|
13 |
+
"lang_detection": {
|
14 |
+
"repeated_field": {
|
15 |
+
"fields": {
|
16 |
+
"lang_code": {
|
17 |
+
"dtype": "string"
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"dtype": "string_span"
|
21 |
+
},
|
22 |
+
"signal": {
|
23 |
+
"signal_name": "lang_detection"
|
24 |
+
}
|
25 |
+
}
|
26 |
+
}
|
27 |
+
}
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"signal": {
|
31 |
+
"signal_name": "lang_detection"
|
32 |
+
},
|
33 |
+
"enriched_path": [
|
34 |
+
"text"
|
35 |
+
]
|
36 |
+
}
|
data/datasets/local/spotify/text/sbert/data-00000-of-00001.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9796beb630cc3503f3c2ac9db8f71e4c1604570836d78bbf364e801cd427c39e
|
3 |
+
size 2709987
|
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/data-00000-of-00001.parquet
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1ba0fe68cc02849b0a20b7f72047c8e9cb8e5ef5b57b0cd642fa0b0be8a6e06
|
3 |
+
size 3340135
|
data/datasets/local/spotify/text/sbert/embedding/local/outerspace/v34/signal_manifest.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"files": [
|
3 |
+
"data-00000-of-00001.parquet"
|
4 |
+
],
|
5 |
+
"parquet_id": "local/outerspace/v34(text.sbert.*.embedding)",
|
6 |
+
"data_schema": {
|
7 |
+
"fields": {
|
8 |
+
"__rowid__": {
|
9 |
+
"dtype": "string"
|
10 |
+
},
|
11 |
+
"text": {
|
12 |
+
"fields": {
|
13 |
+
"sbert": {
|
14 |
+
"repeated_field": {
|
15 |
+
"fields": {
|
16 |
+
"embedding": {
|
17 |
+
"fields": {
|
18 |
+
"local/outerspace/v34": {
|
19 |
+
"dtype": "float32",
|
20 |
+
"signal": {
|
21 |
+
"signal_name": "concept_score",
|
22 |
+
"embedding": "sbert",
|
23 |
+
"namespace": "local",
|
24 |
+
"concept_name": "outerspace",
|
25 |
+
"draft": "main",
|
26 |
+
"num_negative_examples": 100
|
27 |
+
},
|
28 |
+
"bins": [
|
29 |
+
[
|
30 |
+
"Not in concept",
|
31 |
+
null,
|
32 |
+
0.5
|
33 |
+
],
|
34 |
+
[
|
35 |
+
"In concept",
|
36 |
+
0.5,
|
37 |
+
null
|
38 |
+
]
|
39 |
+
]
|
40 |
+
}
|
41 |
+
}
|
42 |
+
}
|
43 |
+
}
|
44 |
+
}
|
45 |
+
}
|
46 |
+
}
|
47 |
+
}
|
48 |
+
}
|
49 |
+
},
|
50 |
+
"signal": {
|
51 |
+
"signal_name": "concept_score",
|
52 |
+
"embedding": "sbert",
|
53 |
+
"namespace": "local",
|
54 |
+
"concept_name": "outerspace",
|
55 |
+
"draft": "main",
|
56 |
+
"num_negative_examples": 100
|
57 |
+
},
|
58 |
+
"enriched_path": [
|
59 |
+
"text",
|
60 |
+
"sbert",
|
61 |
+
"*",
|
62 |
+
"embedding"
|
63 |
+
]
|
64 |
+
}
|
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.keys.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5df43291782b8c731d4ce56537946654c642a01dc9a4e37de394836362f6b45
|
3 |
+
size 3727400
|
data/datasets/local/spotify/text/sbert/embeddings-00000-of-00001.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94e10c23d7229541e1f60b791a659d13673b10a03649abf0ae092e0e18c5aee3
|
3 |
+
size 170446976
|
data/datasets/local/spotify/text/sbert/signal_manifest.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"files": [
|
3 |
+
"data-00000-of-00001.parquet"
|
4 |
+
],
|
5 |
+
"parquet_id": "sbert(text)",
|
6 |
+
"data_schema": {
|
7 |
+
"fields": {
|
8 |
+
"__rowid__": {
|
9 |
+
"dtype": "string"
|
10 |
+
},
|
11 |
+
"text": {
|
12 |
+
"fields": {
|
13 |
+
"sbert": {
|
14 |
+
"repeated_field": {
|
15 |
+
"fields": {
|
16 |
+
"embedding": {
|
17 |
+
"dtype": "embedding"
|
18 |
+
}
|
19 |
+
},
|
20 |
+
"dtype": "string_span"
|
21 |
+
},
|
22 |
+
"signal": {
|
23 |
+
"signal_name": "sbert"
|
24 |
+
}
|
25 |
+
}
|
26 |
+
}
|
27 |
+
}
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"signal": {
|
31 |
+
"signal_name": "sbert"
|
32 |
+
},
|
33 |
+
"enriched_path": [
|
34 |
+
"text"
|
35 |
+
],
|
36 |
+
"embedding_filename_prefix": "embeddings-00000-of-00001"
|
37 |
+
}
|