OasisArtisan commited on
Commit
90226e0
1 Parent(s): 9f70665
Files changed (4) hide show
  1. Dockerfile +57 -0
  2. README.md +5 -4
  3. __init__.py +0 -0
  4. main.py +276 -0
Dockerfile ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:20.04
2
+
3
+ ARG DEBIAN_FRONTEND=noninteractive
4
+
5
+ # Install apt-getable dependencies
6
+ RUN apt-get update \
7
+ && apt-get install -y \
8
+ build-essential \
9
+ cmake \
10
+ git \
11
+ libeigen3-dev \
12
+ libopencv-dev \
13
+ libceres-dev \
14
+ python3-dev \
15
+ curl \
16
+ pkg-config \
17
+ libcairo2-dev \
18
+ software-properties-common \
19
+ && apt-get clean \
20
+ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
21
+
22
+ # Mapmachine requirements
23
+ RUN add-apt-repository ppa:ubuntugis/ppa && \
24
+ apt-get update && \
25
+ apt-get -y install libgeos-dev
26
+
27
+ RUN add-apt-repository ppa:deadsnakes/ppa && \
28
+ apt-get update && \
29
+ apt install -y python3.9-dev && \
30
+ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
31
+ python3.9 get-pip.py
32
+
33
+ ARG REINSTALL_MAPMACHINE=1
34
+ RUN pip3.9 install git+https://github.com/tonyzzzzzz/map-machine
35
+
36
+ WORKDIR /home/
37
+
38
+ # OrienterNet Requirements TODO: Install directly from our requirements once our repo is public
39
+
40
+ RUN git clone https://github.com/mapillary/OpenSfM.git && cd OpenSfM && \
41
+ pip3.9 install -r requirements.txt
42
+
43
+ RUN git clone https://github.com/facebookresearch/OrienterNet.git && cd OrienterNet && \
44
+ pip3 install -r requirements/full.txt
45
+
46
+ # MapPerceptionNet extra requirements
47
+ RUN pip3.9 install geojson shapely geopandas mercantile turfpy vt2geojson folium fastapi\
48
+ geopy gradio pyarrow cloudpickle==2.0.0 urllib3~=1.25.6 scikit-image filelock hydra-core
49
+
50
+ ARG CACHE_RESET=1
51
+ RUN useradd -m -u 1000 user
52
+ USER user
53
+ WORKDIR /app
54
+
55
+ RUN git clone https://github.com/MapItAnywhere/MapItAnywhere.git
56
+ COPY --chown=user . /app
57
+ CMD ["python3.9", "-m", "main"]
README.md CHANGED
@@ -1,10 +1,11 @@
1
  ---
2
- title: DataEngine
3
- emoji: 🌖
4
- colorFrom: indigo
5
- colorTo: red
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: MIA Data Engine
3
+ emoji: 🗺️
4
+ colorFrom: yellow
5
+ colorTo: pink
6
  sdk: docker
7
  pinned: false
8
+ app_port: 7860
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
__init__.py ADDED
File without changes
main.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import os
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+ import pandas as pd
10
+ import geopandas as gpd
11
+ from pyproj.transformer import Transformer
12
+
13
+ sys.path.append(os.path.dirname(os.path.realpath(__file__)))
14
+ from MapItAnywhere.mia.bev import get_bev
15
+ from MapItAnywhere.mia.fpv import get_fpv
16
+ from MapItAnywhere.mia.fpv import filters
17
+ from MapItAnywhere.mia import logger
18
+
19
+ def get_city_boundary(query, fetch_shape=False):
20
+ # Use Nominatim API to get the boundary of the city
21
+ base_url = "https://nominatim.openstreetmap.org/search"
22
+ params = {
23
+ 'q': query,
24
+ 'format': 'json',
25
+ 'limit': 1,
26
+ 'polygon_geojson': 1 if fetch_shape else 0
27
+ }
28
+
29
+ headers = {
30
+ 'User-Agent': f'mapperceptionnet_{query}'
31
+ }
32
+ response = requests.get(base_url, params=params, headers=headers)
33
+
34
+ if response.status_code != 200:
35
+ logger.error(f"Nominatim error when fetching boundary data for {query}.\n"
36
+ f"Status code: {response.status_code}. Content: {response.content}")
37
+ return None
38
+
39
+ data = response.json()
40
+
41
+ if data is None:
42
+ logger.warn(f"No data returned by Nominatim for {query}")
43
+ return None
44
+
45
+ # Extract bbox data from the API response
46
+ bbox_data = data[0]['boundingbox']
47
+ bbox = {
48
+ 'west': float(bbox_data[2]),
49
+ 'south': float(bbox_data[0]),
50
+ 'east': float(bbox_data[3]),
51
+ 'north': float(bbox_data[1])
52
+ }
53
+
54
+ if fetch_shape:
55
+ # Extract GeoJSON boundary data from the API response
56
+ boundary_geojson = data[0]['geojson']
57
+ boundary_geojson = {
58
+ "type": "FeatureCollection",
59
+ "features": [
60
+ {"type": "Feature",
61
+ "properties": {},
62
+ "geometry": boundary_geojson}]
63
+ }
64
+ return bbox, boundary_geojson
65
+ else:
66
+ return bbox
67
+
68
+ def split_dataframe(df, chunk_size = 100):
69
+ chunks = list()
70
+ num_chunks = len(df) // chunk_size + 1
71
+ for i in range(num_chunks):
72
+ chunks.append(df[i*chunk_size:(i+1)*chunk_size])
73
+ return chunks
74
+
75
+ async def fetch(location, filter_undistort, disable_cam_filter, map_length, mpp):
76
+ N=1
77
+ TOTAL_LOOKED_INTO_LIMIT = 10000
78
+
79
+ ################ FPV
80
+ downloader = get_fpv.MapillaryDownloader(os.getenv("MLY_TOKEN"))
81
+ bbox = get_city_boundary(query=location)
82
+ tiles = get_fpv.get_tiles_from_boundary(boundary_info=dict(bound_type="auto_bbox", bbox=bbox), zoom=14)
83
+ np.random.shuffle(tiles)
84
+ total_looked_into = 0
85
+ dfs_meta = list()
86
+ for tile in tiles:
87
+ image_points_response = await downloader.get_tiles_image_points([tile])
88
+ if image_points_response is None:
89
+ continue
90
+ try:
91
+ df = get_fpv.parse_image_points_json_data(image_points_response)
92
+ if len(df) == 0:
93
+ continue
94
+ total_looked_into += len(df)
95
+ df_split = split_dataframe(df, chunk_size=100)
96
+
97
+ for df in df_split:
98
+ image_ids = df["id"]
99
+ image_infos, num_fail = await get_fpv.fetch_image_infos(image_ids, downloader, infos_dir)
100
+ df_meta = get_fpv.geojson_feature_list_to_pandas(image_infos.values())
101
+
102
+ # Some standardization of the data
103
+ df_meta["model"] = df_meta["model"].str.lower().str.replace(' ', '').str.replace('_', '')
104
+ df_meta["make"] = df_meta["make"].str.lower().str.replace(' ', '').str.replace('_', '')
105
+
106
+ if filter_undistort:
107
+ fp = no_cam_filter_pipeline if disable_cam_filter else filter_pipeline
108
+ df_meta = fp(df_meta)
109
+
110
+ dfs_meta.append(df_meta)
111
+ total_rows = sum([len(x) for x in dfs_meta])
112
+ if total_rows > N:
113
+ break
114
+ elif total_looked_into > TOTAL_LOOKED_INTO_LIMIT:
115
+ yield (f"Went through {total_looked_into} images and could not find images satisfying the filters."
116
+ "\nPlease rerun or run the data engine locally for bulk time consuming operations.", None, None)
117
+ return
118
+ if total_rows > N:
119
+ break
120
+ except:
121
+ pass
122
+
123
+ df_meta = pd.concat(dfs_meta)
124
+ df_meta = df_meta.sample(N)
125
+
126
+ # Calc derrivative attributes
127
+ df_meta["loc_descrip"] = filters.haversine_np(
128
+ lon1=df_meta["geometry.long"], lat1=df_meta["geometry.lat"],
129
+ lon2=df_meta["computed_geometry.long"], lat2=df_meta["computed_geometry.lat"]
130
+ )
131
+
132
+ df_meta["angle_descrip"] = filters.angle_dist(
133
+ df_meta["compass_angle"],
134
+ df_meta["computed_compass_angle"]
135
+ )
136
+
137
+ for index, row in df_meta.iterrows():
138
+ desc = list()
139
+ # Display attributes
140
+ keys = ["id", "geometry.long", "geometry.lat", "compass_angle",
141
+ "loc_descrip", "angle_descrip",
142
+ "make", "model", "camera_type",
143
+ "quality_score"]
144
+ for k in keys:
145
+ v = row[k]
146
+ if isinstance(v, float):
147
+ v = f"{v:.4f}"
148
+ bullet = f"{k}: {v}"
149
+ desc.append(bullet)
150
+ metadata_fmt = "\n".join(desc)
151
+
152
+ yield metadata_fmt, None, None
153
+ image_urls = list(df_meta.set_index("id")["thumb_2048_url"].items())
154
+ num_fail = await get_fpv.fetch_images_pixels(image_urls, downloader, raw_image_dir)
155
+ if num_fail > 0:
156
+ logger.error(f"Failed to download {num_fail} images.")
157
+
158
+ seq_to_image_ids = df_meta.groupby('sequence')['id'].agg(list).to_dict()
159
+ lon_center = (bbox['east'] + bbox['west']) / 2
160
+ lat_center = (bbox['north'] + bbox['south']) / 2
161
+ projection = get_fpv.Projection(lat_center, lon_center, max_extent=200e3)
162
+
163
+ df_meta.index = df_meta["id"]
164
+ image_infos = df_meta.to_dict(orient="index")
165
+ process_sequence_args = get_fpv.default_cfg
166
+
167
+ if filter_undistort:
168
+ for seq_id, seq_image_ids in seq_to_image_ids.items():
169
+ try:
170
+ d, pi = get_fpv.process_sequence(
171
+ seq_image_ids,
172
+ image_infos,
173
+ projection,
174
+ process_sequence_args,
175
+ raw_image_dir,
176
+ out_image_dir,
177
+ )
178
+ if d is None or pi is None:
179
+ raise Exception("process_sequence returned None")
180
+ except Exception as e:
181
+ logger.error(f"Failed to process sequence {seq_id} skipping it. Error: {repr(e)}.")
182
+
183
+ fpv = plt.imread(out_image_dir/ f"{row['id']}_undistorted.jpg")
184
+ else:
185
+ fpv = plt.imread(raw_image_dir/ f"{row['id']}.jpg")
186
+ yield metadata_fmt, fpv, None
187
+ ################ BEV
188
+ df = df_meta
189
+ # convert pandas dataframe to geopandas dataframe
190
+ gdf = gpd.GeoDataFrame(df,
191
+ geometry=gpd.points_from_xy(
192
+ df['computed_geometry.long'],
193
+ df['computed_geometry.lat']),
194
+ crs=4326)
195
+
196
+ # convert the geopandas dataframe to UTM
197
+ utm_crs = gdf.estimate_utm_crs()
198
+ gdf_utm = gdf.to_crs(utm_crs)
199
+ transformer = Transformer.from_crs(utm_crs, 4326)
200
+ # load OSM data, if available
201
+ padding = 50
202
+ # calculate the required distance from the center to the edge of the image
203
+ # so that the image will not be out of bounds when we rotate it
204
+ map_length = map_length
205
+ map_length = np.ceil(np.sqrt(map_length**2 + map_length**2))
206
+ distance = map_length * mpp
207
+
208
+ # create bounding boxes for each point
209
+ gdf_utm['bounding_box_utm_p1'] = gdf_utm.apply(lambda row: (
210
+ row.geometry.x - distance - padding,
211
+ row.geometry.y - distance - padding,
212
+ ), axis=1)
213
+
214
+ gdf_utm['bounding_box_utm_p2'] = gdf_utm.apply(lambda row: (
215
+ row.geometry.x + distance + padding,
216
+ row.geometry.y + distance + padding,
217
+ ), axis=1)
218
+
219
+ # convert the bounding box back to lat, long
220
+ gdf_utm['bounding_box_lat_long_p1'] = gdf_utm.apply(lambda row: transformer.transform(*row['bounding_box_utm_p1']), axis=1)
221
+ gdf_utm['bounding_box_lat_long_p2'] = gdf_utm.apply(lambda row: transformer.transform(*row['bounding_box_utm_p2']), axis=1)
222
+ gdf_utm['bbox_min_lat'] = gdf_utm['bounding_box_lat_long_p1'].apply(lambda x: x[0])
223
+ gdf_utm['bbox_min_long'] = gdf_utm['bounding_box_lat_long_p1'].apply(lambda x: x[1])
224
+ gdf_utm['bbox_max_lat'] = gdf_utm['bounding_box_lat_long_p2'].apply(lambda x: x[0])
225
+ gdf_utm['bbox_max_long'] = gdf_utm['bounding_box_lat_long_p2'].apply(lambda x: x[1])
226
+ gdf_utm['bbox_formatted'] = gdf_utm.apply(lambda row: f"{row['bbox_min_long']},{row['bbox_min_lat']},{row['bbox_max_long']},{row['bbox_max_lat']}", axis=1)
227
+
228
+ # iterate over the dataframe and get BEV images
229
+ jobs = gdf_utm[['id', 'bbox_formatted', 'computed_compass_angle']] # only need the id and bbox_formatted columns for the jobs
230
+ jobs = jobs.to_dict(orient='records').copy()
231
+
232
+ get_bev.get_bev_from_bbox_worker_init(osm_cache_dir, bev_dir, semantic_mask_dir, rendered_mask_dir,
233
+ "MapItAnywhere/mia/bev/styles/mia.yml", map_length, mpp,
234
+ None, True, False, True, True, 1)
235
+ for job_dict in jobs:
236
+ get_bev.get_bev_from_bbox_worker(job_dict)
237
+
238
+ bev = plt.imread(rendered_mask_dir / f"{row['id']}.png")
239
+
240
+ yield metadata_fmt, fpv, bev
241
+
242
+ filter_pipeline = filters.FilterPipeline.load_from_yaml("MapItAnywhere/mia/fpv/filter_pipelines/mia.yaml")
243
+ filter_pipeline.verbose=False
244
+ no_cam_filter_pipeline = filters.FilterPipeline.load_from_yaml("MapItAnywhere/mia/fpv/filter_pipelines/mia_rural.yaml")
245
+ no_cam_filter_pipeline.verbose=False
246
+
247
+ loc = Path(".")
248
+ infos_dir =loc / "infos_dir"
249
+ raw_image_dir = loc / "raw_images"
250
+ out_image_dir = loc / "images"
251
+ osm_cache_dir = loc / "osm_cache"
252
+ bev_dir = loc / "bev_raw"
253
+ semantic_mask_dir = loc / "semantic_masks"
254
+ rendered_mask_dir = loc / "rendered_semantic_masks"
255
+
256
+ all_dirs = [loc, osm_cache_dir, bev_dir, semantic_mask_dir, rendered_mask_dir, out_image_dir, raw_image_dir]
257
+ for d in all_dirs:
258
+ os.makedirs(d, exist_ok=True)
259
+
260
+ logger.info(f"Current working directory: {os.getcwd()}, listdir: {os.listdir('.')}")
261
+
262
+ demo = gr.Interface(
263
+ fn=fetch,
264
+ inputs=[gr.Text("Pittsburgh, PA, United States", label="Location"),
265
+ gr.Checkbox(value=False, label="Filter & Undistort"),
266
+ gr.Checkbox(value=False, label="Disable camera model filtering"),
267
+ gr.Slider(minimum=64, maximum=512, step=1, label="BEV Dimension", value=224),
268
+ gr.Slider(minimum=0.1, maximum=2, label="Meters Per Pixel", value=0.5)],
269
+ outputs=[gr.Text(label="METADATA"), gr.Image(label="FPV"), gr.Image(label="BEV")],
270
+ title="MapItAnywhere (Data Engine)",
271
+ description="A demo showcasing samples of MIA's capability to retrieve FPV-BEV pairs worldwide."
272
+ "For bulk download/heavy filtering please visit the github and follow the instructions to run locally"
273
+ )
274
+
275
+ logger.info("Starting server")
276
+ demo.launch(server_name="0.0.0.0", server_port=7860,share=False)