Skip to content

Commit 83ae08b

Browse files
matt-deboerjsbroks
authored andcommitted
COCO importer for imports in background with progress updates (#91)
* added coco importer for imports in background with progress updates * Add configuration options for importer; cleanup code * should be min * add separate method for area/bbox * update ann area/bbox, as well as image categories * images_and_categories as list * add exception logging for bg tasks * apply exception logging fixes * Formatting * fix comment * print to stderr by default; aslo, return result of called function
1 parent 45053a7 commit 83ae08b

File tree

6 files changed

+410
-105
lines changed

6 files changed

+410
-105
lines changed

app/api/datasets.py

Lines changed: 17 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ..util.pagination_util import Pagination
1010
from ..util import query_util, coco_util
1111
from ..models import *
12-
12+
from ..util.coco_importer import CocoImporter
1313

1414
import datetime
1515
import json
@@ -284,114 +284,30 @@ def post(self, dataset_id):
284284
coco = args['coco']
285285

286286
dataset = current_user.datasets.filter(id=dataset_id).first()
287-
images = ImageModel.objects(dataset_id=dataset_id)
288-
categories = CategoryModel.objects
289-
290287
if dataset is None:
291288
return {'message': 'Invalid dataset ID'}, 400
292289

293-
coco_json = json.load(coco)
294-
coco_images = coco_json.get('images')
295-
coco_annotations = coco_json.get('annotations')
296-
coco_categories = coco_json.get('categories')
297-
298-
errors = []
299-
300-
categories_id = {}
301-
images_id = {}
302-
303-
# Create any missing categories
304-
for category in coco_categories:
305-
category_name = category.get('name')
306-
print("Loading category {}".format(category_name), flush=True)
307-
308-
category_id = category.get('id')
309-
category_model = categories.filter(name__exact=category_name).all()
310-
311-
if len(category_model) == 0:
312-
errors.append({'category': category_name,
313-
'message': 'Creating category ' + category_name + '.'})
290+
import_id = CocoImporter.import_coco(
291+
coco, dataset_id, current_user.username)
314292

315-
new_category = CategoryModel(name=category_name, color=color_util.random_color_hex())
316-
new_category.save()
317-
categories_id[category_id] = new_category.id
318-
print("Category not found! (Creating new one)", flush=True)
319-
continue
320-
321-
if len(category_model) > 1:
322-
errors.append({'category': category_name,
323-
'message': 'To many categories found with file name.'})
324-
continue
293+
return {
294+
"import_id": import_id
295+
}
325296

326-
category_model = category_model[0]
327-
categories_id[category_id] = category_model.id
328297

329-
# Add any new categories to dataset
330-
for key, value in categories_id.items():
331-
if value not in dataset.categories:
332-
dataset.categories.append(value)
298+
@api.route('/coco/<int:import_id>')
299+
class ImageCocoId(Resource):
333300

334-
dataset.update(set__categories=dataset.categories)
301+
@login_required
302+
def get(self, import_id):
303+
""" Returns current progress and errors of a coco import """
304+
coco_import = CocoImportModel.objects(
305+
id=import_id, creator=current_user.username).first()
335306

336-
# Find all images
337-
for image in coco_images:
338-
image_id = image.get('id')
339-
image_filename = image.get('file_name')
340-
341-
print("Loading image {}".format(image_filename), flush=True)
342-
image_model = images.filter(file_name__exact=image_filename).all()
343-
344-
if len(image_model) == 0:
345-
errors.append({'file_name': image_filename,
346-
'message': 'Could not find image.'})
347-
continue
348-
349-
if len(image_model) > 1:
350-
errors.append({'file_name': image_filename,
351-
'message': 'To many images found with the same file name.'})
352-
continue
353-
354-
image_model = image_model[0]
355-
print("Image found", flush=True)
356-
images_id[image_id] = image_model
357-
358-
# Generate annotations
359-
for annotation in coco_annotations:
360-
image_id = annotation.get('image_id')
361-
category_id = annotation.get('category_id')
362-
segmentation = annotation.get('segmentation', [])
363-
is_crowd = annotation.get('iscrowed', False)
364-
365-
if len(segmentation) == 0:
366-
continue
367-
368-
print("Loading annotation data (image:{} category:{})".format(image_id, category_id), flush=True)
369-
370-
try:
371-
image_model = images_id[image_id]
372-
category_model_id = categories_id[category_id]
373-
except KeyError:
374-
continue
375-
376-
# Check if annotation already exists
377-
annotation = AnnotationModel.objects(image_id=image_model.id,
378-
category_id=category_model_id,
379-
segmentation=segmentation, delete=False).first()
380-
# Create annotation
381-
if annotation is None:
382-
print("Creating annotation", flush=True)
383-
annotation = AnnotationModel(image_id=image_model.id)
384-
annotation.category_id = category_model_id
385-
# annotation.iscrowd = is_crowd
386-
annotation.segmentation = segmentation
387-
annotation.color = color_util.random_color_hex()
388-
annotation.save()
389-
390-
image_model.update(set__annotated=True)
391-
else:
392-
print("Annotation already exists", flush=True)
307+
if not coco_import:
308+
return {'message': 'No such coco import'}, 400
393309

394310
return {
395-
'errors': errors
311+
"progress": coco_import.progress,
312+
"errors": coco_import.errors
396313
}
397-

app/config.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,14 @@ class Config:
2121
INITIALIZE_FROM_FILE = os.getenv("INITIALIZE_FROM_FILE")
2222
LOAD_IMAGES_ON_START = os.getenv("LOAD_IMAGES_ON_START", False)
2323

24+
# Coco Importer Options
25+
COCO_IMPORTER_VERBOSE = os.getenv("COCO_IMPORTER_VERBOSE", False)
26+
COCO_IMPORTER_MAX_WORKERS = int(os.getenv("COCO_IMPORTER_MAX_WORKERS", 4))
27+
COCO_IMPORTER_IMAGE_BATCH_SIZE = int(
28+
os.getenv("COCO_IMPORTER_IMAGE_BATCH_SIZE", 1000))
29+
COCO_IMPORTER_ANNOTATION_BATCH_SIZE = int(
30+
os.getenv("COCO_IMPORTER_ANNOTATION_BATCH_SIZE", 1000))
31+
2432
# User Options
2533
LOGIN_DISABLED = os.getenv('LOGIN_DISABLED', False)
2634
ALLOW_REGISTRATION = True
27-
28-

app/models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,13 @@ class LicenseModel(db.DynamicDocument):
317317
url = db.StringField()
318318

319319

320+
class CocoImportModel(db.DynamicDocument):
321+
id = db.SequenceField(primary_key=True)
322+
creator = db.StringField(required=True)
323+
progress = db.FloatField(default=0.0, min_value=0.0, max_value=1.0)
324+
errors = db.ListField(default=[])
325+
326+
320327
class UserModel(db.DynamicDocument, UserMixin):
321328
password = db.StringField(required=True)
322329
username = db.StringField(max_length=25, required=True, unique=True)

0 commit comments

Comments
 (0)