Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion mmhuman3d/data/data_converters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .agora import AgoraConverter
from .aic import AicConverter
from .amass import AmassConverter
from .builder import build_data_converter
from .coco import CocoConverter
Expand All @@ -17,20 +18,27 @@
from .h36m_smplx import H36mSMPLXConverter
from .humman import HuMManConverter
from .insta_vibe import InstaVibeConverter
from .instavariety import InstaVarietyConverter
from .lsp import LspConverter
from .lsp_extended import LspExtendedConverter
from .mpi_inf_3dhp import MpiInf3dhpConverter
from .mpi_inf_3dhp_hybrik import MpiInf3dhpHybrIKConverter
from .mpii import MpiiConverter
from .mtp import MtpConverter
from .muco3dhp import Muco3dhpConverter
from .ochuman import OCHumanConverter
from .oh50k3d import OH50k3DConverter
from .penn_action import PennActionConverter
from .posetrack import PosetrackConverter
from .prox import ProxConverter
from .pw3d import Pw3dConverter
from .pw3d_hybrik import Pw3dHybrIKConverter
from .spin import SpinConverter
from .stirling import StirlingConverter
from .surreal import SurrealConverter
from .up3d import Up3dConverter
from .vibe import VibeConverter
from .vlog import VlogConverter

__all__ = [
'build_data_converter', 'AgoraConverter', 'MpiiConverter', 'H36mConverter',
Expand All @@ -43,5 +51,7 @@
'SurrealConverter', 'InstaVibeConverter', 'SpinConverter', 'VibeConverter',
'HuMManConverter', 'FFHQFlameConverter', 'ExposeCuratedFitsConverter',
'ExposeSPINSMPLXConverter', 'FreihandConverter', 'StirlingConverter',
'EHFConverter'
'EHFConverter', 'ProxConverter', 'OH50k3DConverter', 'Muco3dhpConverter',
'AicConverter', 'InstaVarietyConverter', 'VlogConverter',
'OCHumanConverter', 'MtpConverter'
]
104 changes: 104 additions & 0 deletions mmhuman3d/data/data_converters/aic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import json
import os
from typing import List

import numpy as np
from tqdm import tqdm

from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
from .base_converter import BaseModeConverter
from .builder import DATA_CONVERTERS


@DATA_CONVERTERS.register_module()
class AicConverter(BaseModeConverter):
"""AI Challenger dataset `Ai challenger: A large-scale dataset for going
deeper in image understanding' arXiv'2017 More details can be found in the
`paper.

<https://arxiv.org/abs/1711.06475>`__ .

Args:
modes (list): 'validation' and/or 'train' for
accepted modes
"""
ACCEPTED_MODES = ['validation', 'train']

def __init__(self, modes: List = []) -> None:
super(AicConverter, self).__init__(modes)
self.json_mapping_dict = {
'train': ['20170909', '20170902'],
'validation': ['20170911', '20170911'],
}

def convert_by_mode(self, dataset_path: str, out_path: str,
mode: str) -> dict:
"""
Args:
dataset_path (str): Path to directory where raw images and
annotations are stored.
out_path (str): Path to directory to save preprocessed npz file
mode (str): Mode in accepted modes

Returns:
dict:
A dict containing keys image_path, bbox_xywh, keypoints2d,
keypoints2d_mask stored in HumanData() format
"""
# use HumanData to store all data
human_data = HumanData()

# structs we need
image_path_, keypoints2d_, bbox_xywh_ = [], [], []

# json annotation file
iid = self.json_mapping_dict[mode][1]
aid = self.json_mapping_dict[mode][0]
root_dir = f'ai_challenger_keypoint_{mode}_{aid}'
json_path = os.path.join(dataset_path, root_dir,
f'keypoint_{mode}_annotations_{aid}.json')
img_dir = f'{root_dir}/keypoint_{mode}_images_{iid}'

json_data = json.load(open(json_path, 'r'))

for annot in tqdm(json_data):

# image name
image_id = annot['image_id']
img_path = os.path.join(img_dir, f'{image_id}.jpg')
if not os.path.exists(os.path.join(dataset_path, img_path)):
print('image path does not exist')
keypoints_annot = annot['keypoint_annotations']
bbox_annot = annot['human_annotations']

for pid in list(keypoints_annot.keys()):
# scale and center
keypoints2d = np.array(keypoints_annot[pid]).reshape(14, 3)
bbox_xywh = np.array(bbox_annot[pid]).reshape(-1)
keypoints2d[keypoints2d[:, 2] < 0, 2] = 0
# check if all keypoints are annotated
if sum(keypoints2d[:, 2] > 0) == 14:
# store data
image_path_.append(img_path)
keypoints2d_.append(keypoints2d)
bbox_xywh_.append(bbox_xywh)

# convert keypoints
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 14, 3))
keypoints2d_, mask = convert_kps(keypoints2d_, 'aic', 'human_data')

human_data['image_path'] = image_path_
human_data['keypoints2d_mask'] = mask
human_data['keypoints2d'] = keypoints2d_
human_data['bbox_xywh'] = bbox_xywh_
human_data['config'] = 'aic'
human_data.compress_keypoints_by_mask()

# store the data struct
if not os.path.isdir(out_path):
os.makedirs(out_path)
out_file = os.path.join(out_path, 'aic_{}.npz'.format(mode))
human_data.dump(out_file)
148 changes: 148 additions & 0 deletions mmhuman3d/data/data_converters/instavariety.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import glob
import os
from typing import List

import cv2
import numpy as np
import tensorflow as tf
from tqdm import tqdm

from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
from .base_converter import BaseModeConverter
from .builder import DATA_CONVERTERS


@DATA_CONVERTERS.register_module()
class InstaVarietyConverter(BaseModeConverter):
"""Instavariety dataset `Learning 3D Human Dynamics from Video' CVPR'2019
More details can be found in the `paper.

<https://arxiv.org/pdf/1812.01601.pdf>`__ .

Args:
modes (list): 'train' and/or 'test' for
accepted modes
"""
ACCEPTED_MODES = ['train', 'test']

def __init__(self, modes: List = [], extract_img: bool = False) -> None:
super(InstaVarietyConverter, self).__init__(modes)
self.extract_img = extract_img

def convert_by_mode(self, dataset_path: str, out_path: str,
mode: str) -> dict:
"""
Args:
dataset_path (str): Path to directory where raw images and
annotations are stored.
out_path (str): Path to directory to save preprocessed npz file
mode (str): Mode in accepted modes

Returns:
dict:
A dict containing keys image_path, bbox_xywh, keypoints2d,
keypoints2d_mask stored in HumanData() format
"""
# use HumanData to store all data
human_data = HumanData()

# structs we need
image_path_, keypoints2d_, bbox_xywh_ = [], [], []

filenames = glob.glob(os.path.join(dataset_path, f'{mode}/*.tfrecord'))
raw_dataset = tf.data.TFRecordDataset(filenames)

for raw_record in raw_dataset.take(-1):
example = tf.train.Example()
example.ParseFromString(raw_record.numpy())

# Now these are sequences.
N = int(example.features.feature['meta/N'].int64_list.value[0])
print(N)
# This is a list of length N
images_data = example.features.feature[
'image/encoded'].bytes_list.value

images_name = example.features.feature[
'image/filenames'].bytes_list.value

xys = example.features.feature['image/xys'].float_list.value
xys = np.array(xys).reshape(-1, 2, 14)

face_pts = example.features.feature[
'image/face_pts'].float_list.value
face_pts = np.array(face_pts).reshape(-1, 3, 5)

toe_pts = example.features.feature[
'image/toe_pts'].float_list.value

if len(toe_pts) == 0:
toe_pts = np.zeros(xys.shape[0], 3, 6)

toe_pts = np.array(toe_pts).reshape(-1, 3, 6)

vis = example.features.feature[
'image/visibilities'].int64_list.value
vis = np.array(vis).reshape(-1, 1, 14)

for i in tqdm(range(N)):
image = tf.image.decode_jpeg(images_data[i], channels=3)
kp = np.vstack((xys[i], vis[i]))
faces = face_pts[i]

toes = toe_pts[i]
kp = np.hstack((kp, faces, toes))
if 'image/phis' in example.features.feature.keys():
# Preprocessed, so kps are in [-1, 1]
img_shape = image.shape[0]
vis = kp[2, :]
kp = ((kp[:2, :] + 1) * 0.5) * img_shape
kp = np.vstack((kp, vis))

keypoints2d = kp.T

# get bbox from visible keypoints
vis_index = np.where(keypoints2d[:, 2] == 1)[0]
keypoints2d_vis = keypoints2d[vis_index]
bbox_xyxy = [
min(keypoints2d_vis[:, 0]),
min(keypoints2d_vis[:, 1]),
max(keypoints2d_vis[:, 0]),
max(keypoints2d_vis[:, 1])
]
bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
bbox_xywh = self._xyxy2xywh(bbox_xyxy)

image_path = images_name[i].decode('utf-8').replace(
'/data2/Data/instagram_download/frames_raw/', 'images/')

if self.extract_img:
image_abs_path = os.path.join(dataset_path, image_path)
folder = os.path.dirname(image_abs_path)
if not os.path.exists(folder):
os.makedirs(folder, exist_ok=True)
cv2.imwrite(image_abs_path, np.array(image))

image_path_.append(image_path)
keypoints2d_.append(keypoints2d)
bbox_xywh_.append(bbox_xywh)

# convert keypoints
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 25, 3))
keypoints2d_, mask = convert_kps(keypoints2d_, 'instavariety_nop',
'human_data')
human_data['image_path'] = image_path_
human_data['bbox_xywh'] = bbox_xywh_
human_data['keypoints2d_mask'] = mask
human_data['keypoints2d'] = keypoints2d_
human_data['config'] = 'instavariety'
human_data.compress_keypoints_by_mask()

# store the data struct
if not os.path.isdir(out_path):
os.makedirs(out_path)
out_file = os.path.join(out_path, f'instavariety_{mode}.npz')
human_data.dump(out_file)
Loading