open-mmlab · pangyyyyy · Jan 9, 2023 · Jan 9, 2023 · Jan 9, 2023 · Jan 9, 2023
diff --git a/mmhuman3d/data/data_converters/__init__.py b/mmhuman3d/data/data_converters/__init__.py
@@ -1,4 +1,5 @@
 from .agora import AgoraConverter
+from .aic import AicConverter
 from .amass import AmassConverter
 from .builder import build_data_converter
 from .coco import CocoConverter
@@ -17,20 +18,27 @@
 from .h36m_smplx import H36mSMPLXConverter
 from .humman import HuMManConverter
 from .insta_vibe import InstaVibeConverter
+from .instavariety import InstaVarietyConverter
 from .lsp import LspConverter
 from .lsp_extended import LspExtendedConverter
 from .mpi_inf_3dhp import MpiInf3dhpConverter
 from .mpi_inf_3dhp_hybrik import MpiInf3dhpHybrIKConverter
 from .mpii import MpiiConverter
+from .mtp import MtpConverter
+from .muco3dhp import Muco3dhpConverter
+from .ochuman import OCHumanConverter
+from .oh50k3d import OH50k3DConverter
 from .penn_action import PennActionConverter
 from .posetrack import PosetrackConverter
+from .prox import ProxConverter
 from .pw3d import Pw3dConverter
 from .pw3d_hybrik import Pw3dHybrIKConverter
 from .spin import SpinConverter
 from .stirling import StirlingConverter
 from .surreal import SurrealConverter
 from .up3d import Up3dConverter
 from .vibe import VibeConverter
+from .vlog import VlogConverter
 
 __all__ = [
     'build_data_converter', 'AgoraConverter', 'MpiiConverter', 'H36mConverter',
@@ -43,5 +51,7 @@
     'SurrealConverter', 'InstaVibeConverter', 'SpinConverter', 'VibeConverter',
     'HuMManConverter', 'FFHQFlameConverter', 'ExposeCuratedFitsConverter',
     'ExposeSPINSMPLXConverter', 'FreihandConverter', 'StirlingConverter',
-    'EHFConverter'
+    'EHFConverter', 'ProxConverter', 'OH50k3DConverter', 'Muco3dhpConverter',
+    'AicConverter', 'InstaVarietyConverter', 'VlogConverter',
+    'OCHumanConverter', 'MtpConverter'
 ]
diff --git a/mmhuman3d/data/data_converters/aic.py b/mmhuman3d/data/data_converters/aic.py
@@ -0,0 +1,104 @@
+import json
+import os
+from typing import List
+
+import numpy as np
+from tqdm import tqdm
+
+from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
+from mmhuman3d.data.data_structures.human_data import HumanData
+from .base_converter import BaseModeConverter
+from .builder import DATA_CONVERTERS
+
+
+@DATA_CONVERTERS.register_module()
+class AicConverter(BaseModeConverter):
+    """AI Challenger dataset `Ai challenger: A large-scale dataset for going
+    deeper in image understanding' arXiv'2017 More details can be found in the
+    `paper.
+
+    <https://arxiv.org/abs/1711.06475>`__ .
+
+    Args:
+        modes (list): 'validation' and/or 'train' for
+        accepted modes
+    """
+    ACCEPTED_MODES = ['validation', 'train']
+
+    def __init__(self, modes: List = []) -> None:
+        super(AicConverter, self).__init__(modes)
+        self.json_mapping_dict = {
+            'train': ['20170909', '20170902'],
+            'validation': ['20170911', '20170911'],
+        }
+
+    def convert_by_mode(self, dataset_path: str, out_path: str,
+                        mode: str) -> dict:
+        """
+        Args:
+            dataset_path (str): Path to directory where raw images and
+            annotations are stored.
+            out_path (str): Path to directory to save preprocessed npz file
+            mode (str): Mode in accepted modes
+
+        Returns:
+            dict:
+                A dict containing keys image_path, bbox_xywh, keypoints2d,
+                keypoints2d_mask stored in HumanData() format
+        """
+        # use HumanData to store all data
+        human_data = HumanData()
+
+        # structs we need
+        image_path_, keypoints2d_, bbox_xywh_ = [], [], []
+
+        # json annotation file
+        iid = self.json_mapping_dict[mode][1]
+        aid = self.json_mapping_dict[mode][0]
+        root_dir = f'ai_challenger_keypoint_{mode}_{aid}'
+        json_path = os.path.join(dataset_path, root_dir,
+                                 f'keypoint_{mode}_annotations_{aid}.json')
+        img_dir = f'{root_dir}/keypoint_{mode}_images_{iid}'
+
+        json_data = json.load(open(json_path, 'r'))
+
+        for annot in tqdm(json_data):
+
+            # image name
+            image_id = annot['image_id']
+            img_path = os.path.join(img_dir, f'{image_id}.jpg')
+            if not os.path.exists(os.path.join(dataset_path, img_path)):
+                print('image path does not exist')
+            keypoints_annot = annot['keypoint_annotations']
+            bbox_annot = annot['human_annotations']
+
+            for pid in list(keypoints_annot.keys()):
+                # scale and center
+                keypoints2d = np.array(keypoints_annot[pid]).reshape(14, 3)
+                bbox_xywh = np.array(bbox_annot[pid]).reshape(-1)
+                keypoints2d[keypoints2d[:, 2] < 0, 2] = 0
+                # check if all keypoints are annotated
+                if sum(keypoints2d[:, 2] > 0) == 14:
+                    # store data
+                    image_path_.append(img_path)
+                    keypoints2d_.append(keypoints2d)
+                    bbox_xywh_.append(bbox_xywh)
+
+        # convert keypoints
+        bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
+        bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
+        keypoints2d_ = np.array(keypoints2d_).reshape((-1, 14, 3))
+        keypoints2d_, mask = convert_kps(keypoints2d_, 'aic', 'human_data')
+
+        human_data['image_path'] = image_path_
+        human_data['keypoints2d_mask'] = mask
+        human_data['keypoints2d'] = keypoints2d_
+        human_data['bbox_xywh'] = bbox_xywh_
+        human_data['config'] = 'aic'
+        human_data.compress_keypoints_by_mask()
+
+        # store the data struct
+        if not os.path.isdir(out_path):
+            os.makedirs(out_path)
+        out_file = os.path.join(out_path, 'aic_{}.npz'.format(mode))
+        human_data.dump(out_file)
diff --git a/mmhuman3d/data/data_converters/instavariety.py b/mmhuman3d/data/data_converters/instavariety.py
@@ -0,0 +1,148 @@
+import glob
+import os
+from typing import List
+
+import cv2
+import numpy as np
+import tensorflow as tf
+from tqdm import tqdm
+
+from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
+from mmhuman3d.data.data_structures.human_data import HumanData
+from .base_converter import BaseModeConverter
+from .builder import DATA_CONVERTERS
+
+
+@DATA_CONVERTERS.register_module()
+class InstaVarietyConverter(BaseModeConverter):
+    """Instavariety dataset `Learning 3D Human Dynamics from Video' CVPR'2019
+    More details can be found in the `paper.
+
+    <https://arxiv.org/pdf/1812.01601.pdf>`__ .
+
+    Args:
+        modes (list): 'train' and/or 'test' for
+        accepted modes
+    """
+    ACCEPTED_MODES = ['train', 'test']
+
+    def __init__(self, modes: List = [], extract_img: bool = False) -> None:
+        super(InstaVarietyConverter, self).__init__(modes)
+        self.extract_img = extract_img
+
+    def convert_by_mode(self, dataset_path: str, out_path: str,
+                        mode: str) -> dict:
+        """
+        Args:
+            dataset_path (str): Path to directory where raw images and
+            annotations are stored.
+            out_path (str): Path to directory to save preprocessed npz file
+            mode (str): Mode in accepted modes
+
+        Returns:
+            dict:
+                A dict containing keys image_path, bbox_xywh, keypoints2d,
+                keypoints2d_mask stored in HumanData() format
+        """
+        # use HumanData to store all data
+        human_data = HumanData()
+
+        # structs we need
+        image_path_, keypoints2d_, bbox_xywh_ = [], [], []
+
+        filenames = glob.glob(os.path.join(dataset_path, f'{mode}/*.tfrecord'))
+        raw_dataset = tf.data.TFRecordDataset(filenames)
+
+        for raw_record in raw_dataset.take(-1):
+            example = tf.train.Example()
+            example.ParseFromString(raw_record.numpy())
+
+            # Now these are sequences.
+            N = int(example.features.feature['meta/N'].int64_list.value[0])
+            print(N)
+            # This is a list of length N
+            images_data = example.features.feature[
+                'image/encoded'].bytes_list.value
+
+            images_name = example.features.feature[
+                'image/filenames'].bytes_list.value
+
+            xys = example.features.feature['image/xys'].float_list.value
+            xys = np.array(xys).reshape(-1, 2, 14)
+
+            face_pts = example.features.feature[
+                'image/face_pts'].float_list.value
+            face_pts = np.array(face_pts).reshape(-1, 3, 5)
+
+            toe_pts = example.features.feature[
+                'image/toe_pts'].float_list.value
+
+            if len(toe_pts) == 0:
+                toe_pts = np.zeros(xys.shape[0], 3, 6)
+
+            toe_pts = np.array(toe_pts).reshape(-1, 3, 6)
+
+            vis = example.features.feature[
+                'image/visibilities'].int64_list.value
+            vis = np.array(vis).reshape(-1, 1, 14)
+
+            for i in tqdm(range(N)):
+                image = tf.image.decode_jpeg(images_data[i], channels=3)
+                kp = np.vstack((xys[i], vis[i]))
+                faces = face_pts[i]
+
+                toes = toe_pts[i]
+                kp = np.hstack((kp, faces, toes))
+                if 'image/phis' in example.features.feature.keys():
+                    # Preprocessed, so kps are in [-1, 1]
+                    img_shape = image.shape[0]
+                    vis = kp[2, :]
+                    kp = ((kp[:2, :] + 1) * 0.5) * img_shape
+                    kp = np.vstack((kp, vis))
+
+                keypoints2d = kp.T
+
+                # get bbox from visible keypoints
+                vis_index = np.where(keypoints2d[:, 2] == 1)[0]
+                keypoints2d_vis = keypoints2d[vis_index]
+                bbox_xyxy = [
+                    min(keypoints2d_vis[:, 0]),
+                    min(keypoints2d_vis[:, 1]),
+                    max(keypoints2d_vis[:, 0]),
+                    max(keypoints2d_vis[:, 1])
+                ]
+                bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
+                bbox_xywh = self._xyxy2xywh(bbox_xyxy)
+
+                image_path = images_name[i].decode('utf-8').replace(
+                    '/data2/Data/instagram_download/frames_raw/', 'images/')
+
+                if self.extract_img:
+                    image_abs_path = os.path.join(dataset_path, image_path)
+                    folder = os.path.dirname(image_abs_path)
+                    if not os.path.exists(folder):
+                        os.makedirs(folder, exist_ok=True)
+                    cv2.imwrite(image_abs_path, np.array(image))
+
+                image_path_.append(image_path)
+                keypoints2d_.append(keypoints2d)
+                bbox_xywh_.append(bbox_xywh)
+
+        # convert keypoints
+        bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
+        bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
+        keypoints2d_ = np.array(keypoints2d_).reshape((-1, 25, 3))
+        keypoints2d_, mask = convert_kps(keypoints2d_, 'instavariety_nop',
+                                         'human_data')
+        human_data['image_path'] = image_path_
+        human_data['bbox_xywh'] = bbox_xywh_
+        human_data['keypoints2d_mask'] = mask
+        human_data['keypoints2d'] = keypoints2d_
+        human_data['config'] = 'instavariety'
+        human_data.compress_keypoints_by_mask()
+
+        # store the data struct
+        if not os.path.isdir(out_path):
+            os.makedirs(out_path)
+        out_file = os.path.join(out_path, f'instavariety_{mode}.npz')
+        human_data.dump(out_file)