MarkusStoll commited on
Commit
6290607
1 Parent(s): 3e9cab9

use prerelease version, cache dataset

Browse files
Files changed (2) hide show
  1. Dockerfile +6 -3
  2. run.py +21 -5
Dockerfile CHANGED
@@ -2,11 +2,14 @@ FROM python:3.9
2
 
3
  WORKDIR /code
4
  ENV HOME=/code
5
- COPY ./requirements.txt /code/requirements.txt
 
6
 
7
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
 
8
 
9
  COPY . .
10
  RUN mkdir -p /code/.cache
11
  RUN chmod -R 777 /code
12
- CMD ["python", "run.py"]
 
2
 
3
  WORKDIR /code
4
  ENV HOME=/code
5
+ # COPY ./requirements.txt /code/requirements.txt
6
+ # RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
7
 
8
+ RUN apt install curl
9
+ RUN curl -LJO https://spotlightpublic.blob.core.windows.net/github-public/Renumics/renumics_spotlight-1.1.0.post16+c447f37-py3-none-any.whl
10
+ RUN pip install renumics_spotlight-1.1.0.post16+c447f37-py3-none-any.whl
11
 
12
  COPY . .
13
  RUN mkdir -p /code/.cache
14
  RUN chmod -R 777 /code
15
+ CMD ["python", "run.py"]
run.py CHANGED
@@ -1,9 +1,25 @@
 
 
 
1
  if __name__ == "__main__":
2
- import datasets
3
- from renumics import spotlight
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
6
-
7
  df = dataset.to_pandas()
8
  df_show = df.drop(columns=['embedding', 'probabilities'])
9
- spotlight.show(df_show.sample(5000, random_state=1), port=7860, host="0.0.0.0", dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding})
 
 
1
+ import pickle
2
+ import datasets
3
+ from renumics import spotlight
4
  if __name__ == "__main__":
5
+ cache_file = "dataset_cache.pkl"
6
+ if os.path.exists(cache_file):
7
+ # Load dataset from cache
8
+ with open(cache_file, "rb") as file:
9
+ dataset = pickle.load(file)
10
+ print("Dataset loaded from cache.")
11
+ else:
12
+ # Load dataset using datasets.load_dataset()
13
+ dataset = datasets.load_dataset("renumics/cifar100-enriched", split="train")
14
+ print("Dataset loaded using datasets.load_dataset().")
15
+
16
+ # Save dataset to cache
17
+ with open(cache_file, "wb") as file:
18
+ pickle.dump(dataset, file)
19
+ print("Dataset saved to cache.")
20
+
21
 
 
 
22
  df = dataset.to_pandas()
23
  df_show = df.drop(columns=['embedding', 'probabilities'])
24
+ spotlight.show(df_show.sample(5000, random_state=1), port=7860, host="0.0.0.0",
25
+ dtype={"image": spotlight.Image, "embedding_reduced": spotlight.Embedding}, allow_filebrowser=False)