Pierre Chapuis commited on
Commit
e6062ad
1 Parent(s): e8bb239
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio_cached_examples/
2
+ requirements-dev.lock
3
+
4
+ __pycache__/
5
+ *.py[cod]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Lagon Technologies
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,21 @@
1
  ---
2
  title: Finegrain Object Cutter
3
- emoji: 🔥
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.42.0
8
- app_file: app.py
9
  pinned: false
10
  license: mit
 
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
  ---
2
  title: Finegrain Object Cutter
3
+ emoji: ✂️
4
  colorFrom: indigo
5
  colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.42.0
8
+ app_file: src/app.py
9
  pinned: false
10
  license: mit
11
+ short_description: Create high-quality HD cutouts with just a text prompt
12
+ tags:
13
+ - refiners
14
  ---
15
 
16
+ # Finegrain Object Cutter
17
+
18
+ ## Links
19
+
20
+ - https://github.com/finegrain-ai/refiners
21
+ - https://finegrain.ai/
examples/black-lamp.jpg ADDED

Git LFS Details

  • SHA256: fab22747ed54f6fedeb40d729ebf48f2135d1facba05f0b7ee171cfbe986b28e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.6 MB
examples/chair.jpg ADDED

Git LFS Details

  • SHA256: 5e85c3c45a51e45fc9cd9db06a8944cd9951cdb9a4ca8491486ab1c87a4d37df
  • Pointer size: 132 Bytes
  • Size of remote file: 1.12 MB
examples/potted-plant.jpg ADDED

Git LFS Details

  • SHA256: 3582746acb3a3c1f1619203e65347c4d7520c3c2b664c75ed444c22669393c23
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
gradio_imageslider-0.0.20-py3-none-any.whl ADDED
Binary file (85.4 kB). View file
 
pyproject.toml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "cutter"
3
+ version = "0.1.0"
4
+ description = "Finegrain Object Cutter (Gradio)"
5
+ authors = [
6
+ { name = "Pierre Chapuis", email = "pierre@lagon.tech" }
7
+ ]
8
+ dependencies = [
9
+ "gradio>=4.42.0",
10
+ "gradio-image-annotation>=0.2.3",
11
+ "pillow>=10.4.0",
12
+ "pillow-heif>=0.18.0",
13
+ "refiners @ git+https://github.com/finegrain-ai/refiners",
14
+ "numba>=0.60.0",
15
+ "pymatting>=1.1.12",
16
+ "transformers>=4.44.2",
17
+ "spaces>=0.29.3",
18
+ "numpy<2.0.0",
19
+ "gradio-imageslider @ https://fg-cwl-public.s3.eu-west-1.amazonaws.com/9zlqqobl/gradio_imageslider-0.0.20-py3-none-any.whl",
20
+ ]
21
+ readme = "README.md"
22
+ requires-python = ">= 3.12, <3.13"
23
+
24
+ [build-system]
25
+ requires = ["hatchling"]
26
+ build-backend = "hatchling.build"
27
+
28
+ [tool.rye]
29
+ managed = true
30
+ dev-dependencies = []
31
+
32
+ [tool.hatch.metadata]
33
+ allow-direct-references = true
34
+
35
+ [tool.hatch.build.targets.wheel]
36
+ packages = ["src/cutter"]
37
+
38
+ [tool.ruff]
39
+ line-length = 120
40
+ target-version = "py312"
41
+
42
+ [tool.ruff.lint]
43
+ select = [
44
+ "E", # pycodestyle errors
45
+ "W", # pycodestyle warnings
46
+ "F", # pyflakes
47
+ "UP", # pyupgrade
48
+ "A", # flake8-builtins
49
+ "B", # flake8-bugbear
50
+ "Q", # flake8-quotes
51
+ "I", # isort
52
+ ]
53
+
54
+ [tool.pyright]
55
+ include = ["src"]
56
+ exclude = ["**/__pycache__"]
requirements.lock ADDED
@@ -0,0 +1,267 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # generated by rye
2
+ # use `rye lock` or `rye sync` to update this lockfile
3
+ #
4
+ # last locked with the following flags:
5
+ # pre: false
6
+ # features: []
7
+ # all-features: false
8
+ # with-sources: false
9
+ # generate-hashes: false
10
+ # universal: false
11
+
12
+ -e file:.
13
+ aiofiles==23.2.1
14
+ # via gradio
15
+ annotated-types==0.7.0
16
+ # via pydantic
17
+ anyio==4.4.0
18
+ # via gradio
19
+ # via httpx
20
+ # via starlette
21
+ build==1.2.1
22
+ # via gradio-imageslider
23
+ certifi==2024.7.4
24
+ # via httpcore
25
+ # via httpx
26
+ # via requests
27
+ charset-normalizer==3.3.2
28
+ # via requests
29
+ click==8.1.7
30
+ # via typer
31
+ # via uvicorn
32
+ contourpy==1.3.0
33
+ # via matplotlib
34
+ cycler==0.12.1
35
+ # via matplotlib
36
+ fastapi==0.112.2
37
+ # via gradio
38
+ ffmpy==0.4.0
39
+ # via gradio
40
+ filelock==3.15.4
41
+ # via huggingface-hub
42
+ # via torch
43
+ # via transformers
44
+ # via triton
45
+ fonttools==4.53.1
46
+ # via matplotlib
47
+ fsspec==2024.6.1
48
+ # via gradio-client
49
+ # via huggingface-hub
50
+ # via torch
51
+ gradio==4.42.0
52
+ # via cutter
53
+ # via gradio-image-annotation
54
+ # via gradio-imageslider
55
+ # via spaces
56
+ gradio-client==1.3.0
57
+ # via gradio
58
+ gradio-image-annotation==0.2.3
59
+ # via cutter
60
+ gradio-imageslider @ https://fg-cwl-public.s3.eu-west-1.amazonaws.com/9zlqqobl/gradio_imageslider-0.0.20-py3-none-any.whl
61
+ # via cutter
62
+ h11==0.14.0
63
+ # via httpcore
64
+ # via uvicorn
65
+ httpcore==1.0.5
66
+ # via httpx
67
+ httpx==0.27.2
68
+ # via gradio
69
+ # via gradio-client
70
+ # via spaces
71
+ huggingface-hub==0.24.6
72
+ # via gradio
73
+ # via gradio-client
74
+ # via tokenizers
75
+ # via transformers
76
+ idna==3.8
77
+ # via anyio
78
+ # via httpx
79
+ # via requests
80
+ importlib-resources==6.4.4
81
+ # via gradio
82
+ jaxtyping==0.2.33
83
+ # via refiners
84
+ jinja2==3.1.4
85
+ # via gradio
86
+ # via torch
87
+ kiwisolver==1.4.5
88
+ # via matplotlib
89
+ llvmlite==0.43.0
90
+ # via numba
91
+ markdown-it-py==3.0.0
92
+ # via rich
93
+ markupsafe==2.1.5
94
+ # via gradio
95
+ # via jinja2
96
+ matplotlib==3.9.2
97
+ # via gradio
98
+ mdurl==0.1.2
99
+ # via markdown-it-py
100
+ mpmath==1.3.0
101
+ # via sympy
102
+ networkx==3.3
103
+ # via torch
104
+ numba==0.60.0
105
+ # via cutter
106
+ # via pymatting
107
+ numpy==1.26.4
108
+ # via contourpy
109
+ # via cutter
110
+ # via gradio
111
+ # via matplotlib
112
+ # via numba
113
+ # via pandas
114
+ # via pymatting
115
+ # via refiners
116
+ # via scipy
117
+ # via transformers
118
+ nvidia-cublas-cu12==12.1.3.1
119
+ # via nvidia-cudnn-cu12
120
+ # via nvidia-cusolver-cu12
121
+ # via torch
122
+ nvidia-cuda-cupti-cu12==12.1.105
123
+ # via torch
124
+ nvidia-cuda-nvrtc-cu12==12.1.105
125
+ # via torch
126
+ nvidia-cuda-runtime-cu12==12.1.105
127
+ # via torch
128
+ nvidia-cudnn-cu12==9.1.0.70
129
+ # via torch
130
+ nvidia-cufft-cu12==11.0.2.54
131
+ # via torch
132
+ nvidia-curand-cu12==10.3.2.106
133
+ # via torch
134
+ nvidia-cusolver-cu12==11.4.5.107
135
+ # via torch
136
+ nvidia-cusparse-cu12==12.1.0.106
137
+ # via nvidia-cusolver-cu12
138
+ # via torch
139
+ nvidia-nccl-cu12==2.20.5
140
+ # via torch
141
+ nvidia-nvjitlink-cu12==12.6.20
142
+ # via nvidia-cusolver-cu12
143
+ # via nvidia-cusparse-cu12
144
+ nvidia-nvtx-cu12==12.1.105
145
+ # via torch
146
+ orjson==3.10.7
147
+ # via gradio
148
+ packaging==24.1
149
+ # via build
150
+ # via gradio
151
+ # via gradio-client
152
+ # via huggingface-hub
153
+ # via matplotlib
154
+ # via refiners
155
+ # via spaces
156
+ # via transformers
157
+ pandas==2.2.2
158
+ # via gradio
159
+ pillow==10.4.0
160
+ # via cutter
161
+ # via gradio
162
+ # via gradio-imageslider
163
+ # via matplotlib
164
+ # via pillow-heif
165
+ # via pymatting
166
+ # via refiners
167
+ pillow-heif==0.18.0
168
+ # via cutter
169
+ psutil==5.9.8
170
+ # via spaces
171
+ pydantic==2.8.2
172
+ # via fastapi
173
+ # via gradio
174
+ # via spaces
175
+ pydantic-core==2.20.1
176
+ # via pydantic
177
+ pydub==0.25.1
178
+ # via gradio
179
+ pygments==2.18.0
180
+ # via rich
181
+ pymatting==1.1.12
182
+ # via cutter
183
+ pyparsing==3.1.4
184
+ # via matplotlib
185
+ pyproject-hooks==1.1.0
186
+ # via build
187
+ python-dateutil==2.9.0.post0
188
+ # via matplotlib
189
+ # via pandas
190
+ python-multipart==0.0.9
191
+ # via gradio
192
+ pytz==2024.1
193
+ # via pandas
194
+ pyyaml==6.0.2
195
+ # via gradio
196
+ # via huggingface-hub
197
+ # via transformers
198
+ refiners @ git+https://github.com/finegrain-ai/refiners@7ca1774b5f8f172708db647a26c3be68858f285a
199
+ # via cutter
200
+ regex==2024.7.24
201
+ # via transformers
202
+ requests==2.32.3
203
+ # via huggingface-hub
204
+ # via spaces
205
+ # via transformers
206
+ rich==13.8.0
207
+ # via typer
208
+ ruff==0.6.2
209
+ # via gradio
210
+ safetensors==0.4.4
211
+ # via refiners
212
+ # via transformers
213
+ scipy==1.14.1
214
+ # via pymatting
215
+ semantic-version==2.10.0
216
+ # via gradio
217
+ setuptools==74.0.0
218
+ # via torch
219
+ shellingham==1.5.4
220
+ # via typer
221
+ six==1.16.0
222
+ # via python-dateutil
223
+ sniffio==1.3.1
224
+ # via anyio
225
+ # via httpx
226
+ spaces==0.29.3
227
+ # via cutter
228
+ starlette==0.38.2
229
+ # via fastapi
230
+ sympy==1.13.2
231
+ # via torch
232
+ tokenizers==0.19.1
233
+ # via transformers
234
+ tomlkit==0.12.0
235
+ # via gradio
236
+ torch==2.4.0
237
+ # via refiners
238
+ tqdm==4.66.5
239
+ # via huggingface-hub
240
+ # via transformers
241
+ transformers==4.44.2
242
+ # via cutter
243
+ triton==3.0.0
244
+ # via torch
245
+ typeguard==2.13.3
246
+ # via jaxtyping
247
+ typer==0.12.5
248
+ # via gradio
249
+ typing-extensions==4.12.2
250
+ # via fastapi
251
+ # via gradio
252
+ # via gradio-client
253
+ # via huggingface-hub
254
+ # via pydantic
255
+ # via pydantic-core
256
+ # via spaces
257
+ # via torch
258
+ # via typer
259
+ tzdata==2024.1
260
+ # via pandas
261
+ urllib3==2.2.2
262
+ # via gradio
263
+ # via requests
264
+ uvicorn==0.30.6
265
+ # via gradio
266
+ websockets==12.0
267
+ # via gradio-client
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio_image_annotation==0.2.3
2
+ https://huggingface.co/spaces/finegrain/finegrain-object-cutter/resolve/main/gradio_imageslider-0.0.20-py3-none-any.whl
3
+ pillow>=10.4.0
4
+ pillow-heif>=0.18.0
5
+ git+https://github.com/finegrain-ai/refiners@7ca1774b5f8f172708db647a26c3be68858f285a
6
+ numba>=0.60.0
7
+ pymatting>=1.1.12
8
+ transformers>=4.44.2
9
+ spaces>=0.29.3
10
+ numpy<2.0.0
src/app.py ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import time
3
+ from collections.abc import Sequence
4
+ from typing import Any, cast
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ import pillow_heif
9
+ import spaces
10
+ import torch
11
+ from gradio_image_annotation import image_annotator
12
+ from gradio_imageslider import ImageSlider
13
+ from PIL import Image
14
+ from pymatting.foreground.estimate_foreground_ml import estimate_foreground_ml
15
+ from refiners.fluxion.utils import no_grad
16
+ from refiners.solutions import BoxSegmenter
17
+ from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
18
+
19
+ BoundingBox = tuple[int, int, int, int]
20
+
21
+ pillow_heif.register_heif_opener()
22
+ pillow_heif.register_avif_opener()
23
+
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+
26
+ # weird dance because ZeroGPU
27
+ segmenter = BoxSegmenter(device="cpu")
28
+ segmenter.device = device
29
+ segmenter.model = segmenter.model.to(device=segmenter.device)
30
+
31
+ gd_model_path = "IDEA-Research/grounding-dino-base"
32
+ gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
33
+ gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_dtype=torch.float32)
34
+ gd_model = gd_model.to(device=device) # type: ignore
35
+ assert isinstance(gd_model, GroundingDinoForObjectDetection)
36
+
37
+
38
+ def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
39
+ if not bboxes:
40
+ return None
41
+ for bbox in bboxes:
42
+ assert len(bbox) == 4
43
+ assert all(isinstance(x, int) for x in bbox)
44
+ return (
45
+ min(bbox[0] for bbox in bboxes),
46
+ min(bbox[1] for bbox in bboxes),
47
+ max(bbox[2] for bbox in bboxes),
48
+ max(bbox[3] for bbox in bboxes),
49
+ )
50
+
51
+
52
+ def corners_to_pixels_format(bboxes: torch.Tensor, width: int, height: int) -> torch.Tensor:
53
+ x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
54
+ return torch.stack((x1.clamp_(0, width), y1.clamp_(0, height), x2.clamp_(0, width), y2.clamp_(0, height)), dim=-1)
55
+
56
+
57
+ def gd_detect(img: Image.Image, prompt: str) -> BoundingBox | None:
58
+ assert isinstance(gd_processor, GroundingDinoProcessor)
59
+
60
+ # Grounding Dino expects a dot after each category.
61
+ inputs = gd_processor(images=img, text=f"{prompt}.", return_tensors="pt").to(device=device)
62
+
63
+ with no_grad():
64
+ outputs = gd_model(**inputs)
65
+ width, height = img.size
66
+ results: dict[str, Any] = gd_processor.post_process_grounded_object_detection(
67
+ outputs,
68
+ inputs["input_ids"],
69
+ target_sizes=[(height, width)],
70
+ )[0]
71
+ assert "boxes" in results and isinstance(results["boxes"], torch.Tensor)
72
+
73
+ bboxes = corners_to_pixels_format(results["boxes"].cpu(), width, height)
74
+ return bbox_union(bboxes.numpy().tolist())
75
+
76
+
77
+ def apply_mask(
78
+ img: Image.Image,
79
+ mask_img: Image.Image,
80
+ defringe: bool = True,
81
+ ) -> Image.Image:
82
+ assert img.size == mask_img.size
83
+ img = img.convert("RGB")
84
+ mask_img = mask_img.convert("L")
85
+
86
+ if defringe:
87
+ # Mitigate edge halo effects via color decontamination
88
+ rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
89
+ foreground = cast(np.ndarray[Any, np.dtype[np.uint8]], estimate_foreground_ml(rgb, alpha))
90
+ img = Image.fromarray((foreground * 255).astype("uint8"))
91
+
92
+ result = Image.new("RGBA", img.size)
93
+ result.paste(img, (0, 0), mask_img)
94
+ return result
95
+
96
+
97
+ @spaces.GPU
98
+ def _gpu_process(
99
+ img: Image.Image,
100
+ prompt: str | BoundingBox | None,
101
+ ) -> tuple[Image.Image, BoundingBox | None, list[str]]:
102
+ # Because of ZeroGPU shenanigans, we need a *single* function with the
103
+ # `spaces.GPU` decorator that *does not* contain postprocessing.
104
+
105
+ time_log: list[str] = []
106
+
107
+ if isinstance(prompt, str):
108
+ t0 = time.time()
109
+ bbox = gd_detect(img, prompt)
110
+ time_log.append(f"detect: {time.time() - t0}")
111
+ if not bbox:
112
+ print(time_log[0])
113
+ raise gr.Error("No object detected")
114
+ else:
115
+ bbox = prompt
116
+
117
+ t0 = time.time()
118
+ mask = segmenter(img, bbox)
119
+ time_log.append(f"segment: {time.time() - t0}")
120
+
121
+ return mask, bbox, time_log
122
+
123
+
124
+ def _process(
125
+ img: Image.Image,
126
+ prompt: str | BoundingBox | None,
127
+ ) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
128
+ # enforce max dimensions for pymatting performance reasons
129
+ if img.width > 2048 or img.height > 2048:
130
+ orig_width = img.width
131
+ img.thumbnail((2048, 2048))
132
+ if isinstance(prompt, tuple):
133
+ x0, y0, x1, y1 = (int(x * 2048 / orig_width) for x in prompt)
134
+ prompt = (x0, y0, x1, y1)
135
+
136
+ mask, bbox, time_log = _gpu_process(img, prompt)
137
+
138
+ t0 = time.time()
139
+ masked_alpha = apply_mask(img, mask, defringe=True)
140
+ time_log.append(f"crop: {time.time() - t0}")
141
+ print(", ".join(time_log))
142
+
143
+ masked_rgb = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)
144
+
145
+ thresholded = mask.point(lambda p: 255 if p > 10 else 0)
146
+ bbox = thresholded.getbbox()
147
+ to_dl = masked_alpha.crop(bbox)
148
+
149
+ temp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
150
+ to_dl.save(temp, format="PNG")
151
+ temp.close()
152
+
153
+ return (img, masked_rgb), gr.DownloadButton(value=temp.name, interactive=True)
154
+
155
+
156
+ def process_bbox(prompts: dict[str, Any]) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
157
+ assert isinstance(img := prompts["image"], Image.Image)
158
+ assert isinstance(boxes := prompts["boxes"], list)
159
+ if len(boxes) == 1:
160
+ assert isinstance(box := boxes[0], dict)
161
+ bbox = tuple(box[k] for k in ["xmin", "ymin", "xmax", "ymax"])
162
+ else:
163
+ assert len(boxes) == 0
164
+ bbox = None
165
+ return _process(img, bbox)
166
+
167
+
168
+ def on_change_bbox(img: Image.Image | None):
169
+ return gr.update(interactive=bool(img))
170
+
171
+
172
+ def process_prompt(img: Image.Image, prompt: str) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
173
+ return _process(img, prompt)
174
+
175
+
176
+ def on_change_prompt(img: Image.Image | None, prompt: str | None):
177
+ return gr.update(interactive=bool(img and prompt))
178
+
179
+
180
+ TITLE = """
181
+ <center>
182
+
183
+ <h1 style="font-size: 1.5rem; margin-bottom: 0.5rem;">
184
+ Object Cutter Powered By Refiners
185
+ </h1>
186
+
187
+ <div style="
188
+ display: flex;
189
+ align-items: center;
190
+ justify-content: center;
191
+ gap: 0.5rem;
192
+ margin-bottom: 0.5rem;
193
+ font-size: 1.25rem;
194
+ flex-wrap: wrap;
195
+ ">
196
+ <a href="https://github.com/finegrain-ai/refiners" target="_blank">[Refiners]</a>
197
+ <a href="https://finegrain.ai/" target="_blank">[Finegrain]</a>
198
+ <a
199
+ href="https://huggingface.co/spaces/finegrain/finegrain-object-eraser"
200
+ target="_blank"
201
+ >[Finegrain Object Eraser]</a>
202
+ <a
203
+ href="https://huggingface.co/spaces/finegrain/finegrain-image-enhancer"
204
+ target="_blank"
205
+ >[Finegrain Image Enhancer]</a>
206
+ </div>
207
+
208
+ <p>
209
+ Create high-quality HD cutouts for any object in your image with just a text prompt — no manual work required!
210
+ <br>
211
+ The object will be available on a transparent background, ready to paste elsewhere.
212
+ </p>
213
+
214
+ <p>
215
+ This space uses the
216
+ <a
217
+ href="https://huggingface.co/finegrain/finegrain-box-segmenter"
218
+ target="_blank"
219
+ >Finegrain Box Segmenter model</a>,
220
+ trained with a mix of natural data curated by Finegrain and
221
+ <a
222
+ href="https://huggingface.co/datasets/Nfiniteai/product-masks-sample"
223
+ target="_blank"
224
+ >synthetic data provided by Nfinite</a>.
225
+ <br>
226
+ It is powered by Refiners, our open source micro-framework for simple foundation model adaptation.
227
+ If you enjoyed it, please consider starring Refiners on GitHub!
228
+ </p>
229
+
230
+ <a href="https://github.com/finegrain-ai/refiners" target="_blank">
231
+ <img src="https://img.shields.io/github/stars/finegrain-ai/refiners?style=social" />
232
+ </a>
233
+
234
+ </center>
235
+ """
236
+
237
+ with gr.Blocks() as demo:
238
+ gr.HTML(TITLE)
239
+
240
+ with gr.Tab("By prompt", id="tab_prompt"):
241
+ with gr.Row():
242
+ with gr.Column():
243
+ iimg = gr.Image(type="pil", label="Input")
244
+ prompt = gr.Textbox(label="What should we cut?")
245
+ btn = gr.ClearButton(value="Cut Out Object", interactive=False)
246
+ with gr.Column():
247
+ oimg = ImageSlider(label="Before / After", show_download_button=False, interactive=False)
248
+ dlbt = gr.DownloadButton("Download Cutout", interactive=False)
249
+
250
+ btn.add(oimg)
251
+
252
+ for inp in [iimg, prompt]:
253
+ inp.change(
254
+ fn=on_change_prompt,
255
+ inputs=[iimg, prompt],
256
+ outputs=[btn],
257
+ )
258
+ btn.click(
259
+ fn=process_prompt,
260
+ inputs=[iimg, prompt],
261
+ outputs=[oimg, dlbt],
262
+ api_name=False,
263
+ )
264
+
265
+ examples = [
266
+ [
267
+ "examples/potted-plant.jpg",
268
+ "potted plant",
269
+ ],
270
+ [
271
+ "examples/chair.jpg",
272
+ "chair",
273
+ ],
274
+ [
275
+ "examples/black-lamp.jpg",
276
+ "black lamp",
277
+ ],
278
+ ]
279
+
280
+ ex = gr.Examples(
281
+ examples=examples,
282
+ inputs=[iimg, prompt],
283
+ outputs=[oimg, dlbt],
284
+ fn=process_prompt,
285
+ cache_examples=True,
286
+ )
287
+
288
+ with gr.Tab("By bounding box", id="tab_bb"):
289
+ with gr.Row():
290
+ with gr.Column():
291
+ annotator = image_annotator(
292
+ image_type="pil",
293
+ disable_edit_boxes=True,
294
+ show_download_button=False,
295
+ show_share_button=False,
296
+ single_box=True,
297
+ label="Input",
298
+ )
299
+ btn = gr.ClearButton(value="Cut Out Object", interactive=False)
300
+ with gr.Column():
301
+ oimg = ImageSlider(label="Before / After", show_download_button=False)
302
+ dlbt = gr.DownloadButton("Download Cutout", interactive=False)
303
+
304
+ btn.add(oimg)
305
+
306
+ iimg.change(
307
+ fn=on_change_bbox,
308
+ inputs=[iimg],
309
+ outputs=[btn],
310
+ )
311
+ btn.click(
312
+ fn=process_bbox,
313
+ inputs=[annotator],
314
+ outputs=[oimg, dlbt],
315
+ api_name=False,
316
+ )
317
+
318
+ examples = [
319
+ {
320
+ "image": "examples/potted-plant.jpg",
321
+ "boxes": [{"xmin": 51, "ymin": 511, "xmax": 639, "ymax": 1255}],
322
+ },
323
+ {
324
+ "image": "examples/chair.jpg",
325
+ "boxes": [{"xmin": 98, "ymin": 330, "xmax": 973, "ymax": 1468}],
326
+ },
327
+ {
328
+ "image": "examples/black-lamp.jpg",
329
+ "boxes": [{"xmin": 88, "ymin": 148, "xmax": 700, "ymax": 1414}],
330
+ },
331
+ ]
332
+
333
+ ex = gr.Examples(
334
+ examples=examples,
335
+ inputs=[annotator],
336
+ outputs=[oimg, dlbt],
337
+ fn=process_bbox,
338
+ cache_examples=True,
339
+ )
340
+
341
+
342
+ demo.queue(max_size=30, api_open=False)
343
+ demo.launch(show_api=False)