Merge pull request #12 from doccano/feature/GCPLabelDetection

Hironsan · web-flow · commit d17c31ecb418 · 2021-06-08T12:40:42.000+09:00
Feature/gcp label detection
diff --git a/auto_labeling_pipeline/mappings.py b/auto_labeling_pipeline/mappings.py
@@ -44,6 +44,11 @@ class AmazonComprehendSentimentTemplate(MappingTemplate):
     template_file = 'amazon_comprehend_sentiment.jinja2'
 
 
+class GCPImageLabelDetectionTemplate(MappingTemplate):
+    label_collection = ClassificationLabels
+    template_file = 'gcp_image_label_detection.jinja2'
+
+
 class AmazonComprehendEntityTemplate(MappingTemplate):
     label_collection = SequenceLabels
     template_file = 'amazon_comprehend_entity.jinja2'
diff --git a/auto_labeling_pipeline/menu.py b/auto_labeling_pipeline/menu.py
@@ -2,20 +2,17 @@
 
 from pydantic import BaseModel
 
-from auto_labeling_pipeline.mappings import (AmazonComprehendEntityTemplate, AmazonComprehendSentimentTemplate,
-                                             GCPEntitiesTemplate, MappingTemplate)
-from auto_labeling_pipeline.models import (AmazonComprehendEntityRequestModel, AmazonComprehendPIIEntityRequestModel,
-                                           AmazonComprehendSentimentRequestModel, CustomRESTRequestModel,
-                                           GCPEntitiesRequestModel, RequestModel)
-from auto_labeling_pipeline.task import DocumentClassification, GenericTask, SequenceLabeling, Task, TaskFactory
+from auto_labeling_pipeline import mappings as mp
+from auto_labeling_pipeline import models as mo
+from auto_labeling_pipeline import task as t
 
 
 class Option(BaseModel):
     name: str
     description: str
-    task: Type[Task]
-    model: Type[RequestModel]
-    template: Type[MappingTemplate]
+    task: Type[t.Task]
+    model: Type[mo.RequestModel]
+    template: Type[mp.MappingTemplate]
 
     class Config:
         arbitrary_types_allowed = True
@@ -34,8 +31,8 @@ class Options:
 
     @classmethod
     def filter_by_task(cls, task_name: str) -> List[Option]:
-        task = TaskFactory.create(task_name)
-        return [option for option in cls.options if option.task == task or option.task == GenericTask]
+        task = t.TaskFactory.create(task_name)
+        return [option for option in cls.options if option.task == task or option.task == t.GenericTask]
 
     @classmethod
     def find(cls, option_name: str) -> Option:
@@ -45,7 +42,7 @@ def find(cls, option_name: str) -> Option:
         raise ValueError('Option {} is not found.'.format(option_name))
 
     @classmethod
-    def register(cls, task: Type[Task], model: Type[RequestModel], template: Type[MappingTemplate]):
+    def register(cls, task: Type[t.Task], model: Type[mo.RequestModel], template: Type[mp.MappingTemplate]):
         schema = model.schema()
         cls.options.append(
             Option(
@@ -58,8 +55,33 @@ def register(cls, task: Type[Task], model: Type[RequestModel], template: Type[Ma
         )
 
 
-Options.register(GenericTask, CustomRESTRequestModel, MappingTemplate)
-Options.register(DocumentClassification, AmazonComprehendSentimentRequestModel, AmazonComprehendSentimentTemplate)
-Options.register(SequenceLabeling, GCPEntitiesRequestModel, GCPEntitiesTemplate)
-Options.register(SequenceLabeling, AmazonComprehendEntityRequestModel, AmazonComprehendEntityTemplate)
-Options.register(SequenceLabeling, AmazonComprehendPIIEntityRequestModel, AmazonComprehendEntityTemplate)
+Options.register(
+    t.GenericTask,
+    mo.CustomRESTRequestModel,
+    mp.MappingTemplate
+)
+Options.register(
+    t.DocumentClassification,
+    mo.AmazonComprehendSentimentRequestModel,
+    mp.AmazonComprehendSentimentTemplate
+)
+Options.register(
+    t.SequenceLabeling,
+    mo.GCPEntitiesRequestModel,
+    mp.GCPEntitiesTemplate
+)
+Options.register(
+    t.SequenceLabeling,
+    mo.AmazonComprehendEntityRequestModel,
+    mp.AmazonComprehendEntityTemplate
+)
+Options.register(
+    t.SequenceLabeling,
+    mo.AmazonComprehendPIIEntityRequestModel,
+    mp.AmazonComprehendEntityTemplate
+)
+Options.register(
+    t.ImageClassification,
+    mo.GCPImageLabelDetectionRequestModel,
+    mp.GCPImageLabelDetectionTemplate
+)
diff --git a/auto_labeling_pipeline/models.py b/auto_labeling_pipeline/models.py
@@ -206,3 +206,36 @@ def send(self, text: str):
             LanguageCode=self.language_code
         )
         return response
+
+
+class GCPImageLabelDetectionRequestModel(RequestModel):
+    """
+    This allow you to detect labels for a image by
+    <a href="https://cloud.google.com/vision/docs/labels">Cloud Vision API</a>.
+    """
+    key: str
+
+    class Config:
+        title = 'GCP Image Label Detection'
+
+    def send(self, b64_image: str):
+        url = 'https://vision.googleapis.com/v1/images:annotate'
+        headers = {'Content-Type': 'application/json'}
+        params = {'key': self.key}
+        body = {
+            'requests': [
+                {
+                    'image': {
+                        'content': b64_image
+                    },
+                    'features': [
+                        {
+                            'maxResults': 5,
+                            'type': 'LABEL_DETECTION'
+                        }
+                    ]
+                }
+            ]
+        }
+        response = requests.post(url, headers=headers, params=params, json=body).json()
+        return response
diff --git a/auto_labeling_pipeline/task.py b/auto_labeling_pipeline/task.py
@@ -24,12 +24,17 @@ class Seq2seq(Task):
     label_collection = Seq2seqLabels
 
 
+class ImageClassification(Task):
+    label_collection = ClassificationLabels
+
+
 class TaskFactory:
 
     @classmethod
     def create(cls, task_name: str) -> Type[Task]:
         return {
             'DocumentClassification': DocumentClassification,
             'SequenceLabeling': SequenceLabeling,
-            'Seq2seq': Seq2seq
+            'Seq2seq': Seq2seq,
+            'ImageClassification': ImageClassification
         }.get(task_name, GenericTask)
diff --git a/auto_labeling_pipeline/templates/gcp_image_label_detection.jinja2 b/auto_labeling_pipeline/templates/gcp_image_label_detection.jinja2
@@ -0,0 +1,5 @@
+[
+  {
+    "label": "{{ input.responses[0].labelAnnotations[0].description }}"
+  }
+]
diff --git a/tests/data/gcp_image_label_detection.json b/tests/data/gcp_image_label_detection.json
@@ -0,0 +1,38 @@
+{
+  "responses":[
+    {
+      "labelAnnotations":[
+        {
+          "description":"Cat",
+          "mid":"/m/01yrx",
+          "score":0.945612,
+          "topicality":0.945612
+        },
+        {
+          "description":"Eye",
+          "mid":"/m/014sv8",
+          "score":0.9400194,
+          "topicality":0.9400194
+        },
+        {
+          "description":"Felidae",
+          "mid":"/m/0307l",
+          "score":0.8835683,
+          "topicality":0.8835683
+        },
+        {
+          "description":"Carnivore",
+          "mid":"/m/01lrl",
+          "score":0.8821837,
+          "topicality":0.8821837
+        },
+        {
+          "description":"Plant",
+          "mid":"/m/05s2s",
+          "score":0.8714797,
+          "topicality":0.8714797
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/data/images/1500x500.jpeg b/tests/data/images/1500x500.jpeg
diff --git a/tests/fixtures/cassettes/gcp_label_detection.yaml b/tests/fixtures/cassettes/gcp_label_detection.yaml
diff --git a/tests/fixtures/cassettes/pipeline_gcp_label_detection.yaml b/tests/fixtures/cassettes/pipeline_gcp_label_detection.yaml
diff --git a/tests/test_mappings.py b/tests/test_mappings.py
@@ -1,7 +1,7 @@
 import json
 
 from auto_labeling_pipeline.mappings import (AmazonComprehendEntityTemplate, AmazonComprehendSentimentTemplate,
-                                             GCPEntitiesTemplate)
+                                             GCPEntitiesTemplate, GCPImageLabelDetectionTemplate)
 
 
 def load_json(filepath):
@@ -86,3 +86,12 @@ def test_amazon_comprehend_entities(data_path):
         }
     ]
     assert labels == expected
+
+
+def test_gcp_image_label_detection(data_path):
+    response = load_json(data_path / 'gcp_image_label_detection.json')
+    template = GCPImageLabelDetectionTemplate()
+    labels = template.render(response)
+    labels = labels.dict()
+    expected = [{'label': 'Cat'}]
+    assert labels == expected
diff --git a/tests/test_menu.py b/tests/test_menu.py
@@ -23,7 +23,8 @@ def test_filter_task(task, expected):
         'Custom REST Request',
         'Amazon Comprehend Sentiment Analysis',
         'Amazon Comprehend Entity Recognition',
-        'GCP Entity Analysis'
+        'GCP Entity Analysis',
+        'GCP Image Label Detection'
     ]
 )
 def test_find_option(option_name):
diff --git a/tests/test_models.py b/tests/test_models.py
@@ -1,10 +1,17 @@
+import base64
 import os
 
 import pytest
 import vcr
 
-from auto_labeling_pipeline.models import (AmazonComprehendSentimentRequestModel, GCPEntitiesRequestModel, RequestModel,
-                                           RequestModelFactory)
+from auto_labeling_pipeline.models import (AmazonComprehendSentimentRequestModel, GCPEntitiesRequestModel,
+                                           GCPImageLabelDetectionRequestModel, RequestModel, RequestModelFactory)
+
+
+def load_image_as_b64(filepath):
+    with open(filepath, 'rb') as f:
+        b64_image = base64.b64encode(f.read())
+        return b64_image.decode('utf-8')
 
 
 def test_request_model_raises_type_error_on_instantiation():
@@ -36,6 +43,21 @@ def test_gcp_entities_request(cassettes_path):
         assert 'entities' in response
 
 
+def test_gcp_image_label_detection(data_path, cassettes_path):
+    with vcr.use_cassette(
+            str(cassettes_path / 'gcp_label_detection.yaml'),
+            mode='once',
+            filter_query_parameters=['key']
+    ):
+        model = GCPImageLabelDetectionRequestModel(
+            key=os.environ.get('API_KEY_GCP', '')
+        )
+        image = load_image_as_b64(data_path / 'images/1500x500.jpeg')
+        response = model.send(b64_image=image)
+        assert 'responses' in response
+        assert 'labelAnnotations' in response['responses'][0]
+
+
 def test_amazon_comprehend_sentiment_request(cassettes_path):
     with vcr.use_cassette(str(cassettes_path / 'amazon_comprehend_sentiment.yaml'),
                           mode='once',
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
@@ -2,11 +2,13 @@
 
 import vcr
 
-from auto_labeling_pipeline.mappings import AmazonComprehendSentimentTemplate
-from auto_labeling_pipeline.models import AmazonComprehendSentimentRequestModel
+from auto_labeling_pipeline.mappings import AmazonComprehendSentimentTemplate, GCPImageLabelDetectionTemplate
+from auto_labeling_pipeline.models import AmazonComprehendSentimentRequestModel, GCPImageLabelDetectionRequestModel
 from auto_labeling_pipeline.pipeline import pipeline
 from auto_labeling_pipeline.postprocessing import PostProcessor
 
+from .test_models import load_image_as_b64
+
 
 def test_amazon_pipeline(cassettes_path):
     with vcr.use_cassette(str(cassettes_path / 'amazon_comprehend_sentiment.yaml'),
@@ -30,3 +32,25 @@ def test_amazon_pipeline(cassettes_path):
         assert isinstance(labels, list)
         assert len(labels) == 1
         assert 'label' in labels[0]
+
+
+def test_gcp_label_detection_pipeline(data_path, cassettes_path):
+    with vcr.use_cassette(
+            str(cassettes_path / 'pipeline_gcp_label_detection.yaml'),
+            mode='once',
+            filter_query_parameters=['key']
+    ):
+        model = GCPImageLabelDetectionRequestModel(key=os.environ.get('API_KEY_GCP', ''))
+        template = GCPImageLabelDetectionTemplate()
+        b64_image = load_image_as_b64(data_path / 'images/1500x500.jpeg')
+        post_processor = PostProcessor({})
+        labels = pipeline(
+            text=b64_image,
+            request_model=model,
+            mapping_template=template,
+            post_processing=post_processor
+        )
+        labels = labels.dict()
+        assert isinstance(labels, list)
+        assert len(labels) == 1
+        assert 'label' in labels[0]
diff --git a/tests/test_task.py b/tests/test_task.py
@@ -1,15 +1,16 @@
 import pytest
 
 from auto_labeling_pipeline.labels import ClassificationLabels, Seq2seqLabels, SequenceLabels
-from auto_labeling_pipeline.task import DocumentClassification, Seq2seq, SequenceLabeling
+from auto_labeling_pipeline.task import DocumentClassification, ImageClassification, Seq2seq, SequenceLabeling
 
 
 @pytest.mark.parametrize(
     'task, expected',
     [
         (DocumentClassification, ClassificationLabels),
         (SequenceLabeling, SequenceLabels),
-        (Seq2seq, Seq2seqLabels)
+        (Seq2seq, Seq2seqLabels),
+        (ImageClassification, ClassificationLabels)
     ]
 )
 def test_return_correct_label_collection(task, expected):

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +[
 +  {
 +    "label": "{{ input.responses[0].labelAnnotations[0].description }}"
 +  }
 +]