Skip to content

Commit

Permalink
Add pose estimation and refine code
Browse files Browse the repository at this point in the history
  • Loading branch information
cleardusk committed Dec 1, 2018
1 parent 09d138b commit e53a741
Show file tree
Hide file tree
Showing 16 changed files with 254 additions and 44 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
.idea/
*.pyc
__pycache__/
utils/__pycache__/
test.data/
training/snapshot/
training/logs/
Expand All @@ -21,5 +22,3 @@ models/shape_predictor_68_face_landmarks.dat

demo_obama/
todo.md

utils/__pycacje__/
3 changes: 0 additions & 3 deletions benchmark_aflw.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
#!/usr/bin/env python3
# coding: utf-8

import os
import os.path as osp
import numpy as np
import sys
from glob import glob
from math import sqrt
from utils.io import _load

Expand Down
34 changes: 25 additions & 9 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/env python3
# coding: utf-8
import sys

from utils.cv_plot import plot_pose_box

__author__ = 'cleardusk'

"""
Expand All @@ -10,8 +14,6 @@
1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
"""

# import modules

import torch
import torchvision.transforms as transforms
import mobilenet_v1
Expand All @@ -22,9 +24,12 @@
import scipy.io as sio
from utils.inference import get_suffix, calc_roi_box, crop_img, predict_68pts, dump_to_ply, dump_vertex, draw_landmarks, \
predict_dense
from utils.estimate_pose import parse_pose
import argparse
import torch.backends.cudnn as cudnn

STD_SIZE = 120


def main(args):
# 1. load pre-tained model
Expand All @@ -51,6 +56,7 @@ def main(args):

# 3. forward
tri = sio.loadmat('visualize/tri.mat')['tri']
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
for img_fp in args.files:
img_ori = cv2.imread(img_fp)
if args.dlib_bbox:
Expand All @@ -67,22 +73,21 @@ def main(args):
rect = dlib.rectangle(l, r, t, b)
rects.append(rect)

pts_dlib = []
pts_res = []
Ps = [] # Camera matrix collection
poses = [] # pose collection, [todo: validate it]
ind = 0
suffix = get_suffix(img_fp)
for rect in rects:
# landmark & crop
pts = face_regressor(img_ori, rect).parts()
pts = np.array([[pt.x, pt.y] for pt in pts]).T
pts_dlib.append(pts)

roi_box = calc_roi_box(pts)
img = crop_img(img_ori, roi_box)

# forward: one step
img = cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LINEAR)
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
input = transform(img).unsqueeze(0)
with torch.no_grad():
if args.mode == 'gpu':
Expand All @@ -97,16 +102,20 @@ def main(args):
if args.box_init == 'two':
roi_box = calc_roi_box(pts68)
img_step2 = crop_img(img_ori, roi_box)
img_step2 = cv2.resize(img_step2, dsize=(120, 120), interpolation=cv2.INTER_LINEAR)
img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
input = transform(img_step2).unsqueeze(0)
with torch.no_grad():
if args.mode == 'gpu':
input = input.cuda()
param = model(input)
param = param.squeeze().cpu().numpy().flatten().astype(np.float32)

pts68 = predict_68pts(param, roi_box)

pts_res.append(pts68)
P, pose = parse_pose(param)
Ps.append(P)
poses.append(pose)

# dense face vertices
if args.dump_ply or args.dump_vertex:
Expand All @@ -123,8 +132,14 @@ def main(args):
wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
np.savetxt(wfp, roi_box, fmt='%.3f')
print('Save roi box to {}'.format(wfp))

ind += 1

if args.dump_pose:
# P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
img_pose = plot_pose_box(img_ori, Ps, pts_res)
wfp = img_fp.replace(suffix, '_pose.jpg')
cv2.imwrite(wfp, img_pose)
print('Dump to {}'.format(wfp))
if args.dump_res:
draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg)

Expand All @@ -137,11 +152,12 @@ def main(args):
parser.add_argument('--show_flg', default='True', type=str2bool, help='whether show the visualization result')
parser.add_argument('--box_init', default='one', type=str, help='one|two: one-step bbox initialization or two-step')
parser.add_argument('--dump_res', default='true', type=str2bool, help='whether write out the visualization image')
parser.add_argument('--dump_vertex', default='true', type=str2bool,
parser.add_argument('--dump_vertex', default='false', type=str2bool,
help='whether write out the dense face vertices to mat')
parser.add_argument('--dump_ply', default='true', type=str2bool)
parser.add_argument('--dump_pts', default='true', type=str2bool)
parser.add_argument('--dump_roi_box', default='false', type=str2bool)
parser.add_argument('--dump_pose', default='true', type=str2bool)
parser.add_argument('--dlib_bbox', default='true', type=str2bool, help='whether use dlib to predict bbox')

args = parser.parse_args()
Expand Down
24 changes: 15 additions & 9 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

**\[Updates\]**

- `2018.12.1`: Add pose estimation and refine code, see [utils/estimate_pose.py](./utils/estimate_pose.py) for more details.
- `2018.11.17`: Refine code and map the 3d vertex to original image space.
- `2018.11.11`: **Update end-to-end inference pipeline: infer/serialize 3D face shape and 68 landmarks given one arbitrary image, please see readme.md below for more details.**
- `2018.11.9`: Update trained model with higher performance in [models](./models).
Expand Down Expand Up @@ -76,23 +77,26 @@ In addition, I strongly recommend using Python3.6+ instead of older version for
```
If you can see these output log in terminal, you run it successfully.
```
Dump to samples/test1_0.ply
Dump to samples/test1_0.mat
Dump tp samples/test1_0.ply
Save 68 3d landmarks to samples/test1_0.txt
Dump to samples/test1_1.ply
Dump to samples/test1_1.mat
Dump tp samples/test1_1.ply
Save 68 3d landmarks to samples/test1_1.txt
Dump to samples/test1_pose.jpg
Save visualization result to samples/test1_3DDFA.jpg
```
Because `test1.jpg` has two faces, there are two `mat` (stores dense face vertices, can be rendered by Matlab, see [visualize](./visualize)) and `ply` files (can be rendered by Meshlab or Microsoft 3D Builder) predicted.
Please run `python3 main.py -h` or review the code for more details.
The result `samples/test1_3DDFA.jpg` is shown below
The 68 landmarks visualization result `samples/test1_3DDFA.jpg` and pose estimation result `samples/test1_pose.jpg` are shown below
<p align="center">
<img src="samples/test1_3DDFA.jpg" alt="samples" width="700px">
<img src="samples/test1_3DDFA.jpg" alt="samples" width="650px">
</p>
<p align="center">
<img src="samples/test1_pose.jpg" alt="samples" width="650px">
</p>
3. Additional example
Expand All @@ -102,7 +106,11 @@ In addition, I strongly recommend using Python3.6+ instead of older version for
```
<p align="center">
<img src="samples/emma_input_3DDFA.jpg" alt="samples" width="700px">
<img src="samples/emma_input_3DDFA.jpg" alt="samples" width="750px">
</p>
<p align="center">
<img src="samples/emma_input_pose.jpg" alt="samples" width="750px">
</p>
## Citation
Expand All @@ -122,8 +130,6 @@ In addition, I strongly recommend using Python3.6+ instead of older version for
}
## Inference speed
When batch size is 128, the inference time of MobileNet-V1 takes about 34.7ms. The average speed is about **0.27ms/pic**.
Expand Down
Binary file modified samples/emma_input_3DDFA.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added samples/emma_input_pose.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified samples/test1_3DDFA.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added samples/test1_pose.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
99 changes: 99 additions & 0 deletions utils/cv_plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3
# coding: utf-8


"""
Modified from: https://sourcegraph.com/github.com/YadiraF/PRNet@master/-/blob/utils/cv_plot.py
"""

import numpy as np
import cv2

from utils.inference import calc_hypotenuse

end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1


def plot_kpt(image, kpt):
''' Draw 68 key points
Args:
image: the input image
kpt: (68, 3).
'''
image = image.copy()
kpt = np.round(kpt).astype(np.int32)
for i in range(kpt.shape[0]):
st = kpt[i, :2]
image = cv2.circle(image, (st[0], st[1]), 1, (0, 0, 255), 2)
if i in end_list:
continue
ed = kpt[i + 1, :2]
image = cv2.line(image, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1)
return image


def build_camera_box(rear_size=90):
point_3d = []
rear_depth = 0
point_3d.append((-rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, rear_size, rear_depth))
point_3d.append((rear_size, rear_size, rear_depth))
point_3d.append((rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, -rear_size, rear_depth))

front_size = int(4 / 3 * rear_size)
front_depth = int(4 / 3 * rear_size)
point_3d.append((-front_size, -front_size, front_depth))
point_3d.append((-front_size, front_size, front_depth))
point_3d.append((front_size, front_size, front_depth))
point_3d.append((front_size, -front_size, front_depth))
point_3d.append((-front_size, -front_size, front_depth))
point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)

return point_3d


def plot_pose_box(image, Ps, pts68s, color=(40, 255, 0), line_width=2):
''' Draw a 3D box as annotation of pose. Ref:https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py
Args:
image: the input image
P: (3, 4). Affine Camera Matrix.
kpt: (2, 68) or (3, 68)
'''
image = image.copy()
if not isinstance(pts68s, list):
pts68s = [pts68s]
if not isinstance(Ps, list):
Ps = [Ps]
for i in range(len(pts68s)):
pts68 = pts68s[i]
llength = calc_hypotenuse(pts68)
point_3d = build_camera_box(llength)
P = Ps[i]

# Map to 2d image points
point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1]))) # n x 4
point_2d = point_3d_homo.dot(P.T)[:, :2]

point_2d[:, 1] = - point_2d[:, 1]
point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(pts68[:2, :27], 1)
point_2d = np.int32(point_2d.reshape(-1, 2))

# Draw all the lines
cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[1]), tuple(
point_2d[6]), color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[2]), tuple(
point_2d[7]), color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[3]), tuple(
point_2d[8]), color, line_width, cv2.LINE_AA)

return image


def main():
pass


if __name__ == '__main__':
main()
15 changes: 9 additions & 6 deletions utils/ddfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
import pickle
import argparse
from .io import _numpy_to_tensor, _load_cpu, _load_gpu
from params import *
from utils.params import *


def reconstruct_vertex(param, whitening=True, dense=False):
def reconstruct_vertex(param, whitening=True, dense=False, transform=True):
"""Whitening param -> 3d vertex, based on the 3dmm param: u_base, w_shp, w_exp
dense: if True, return dense vertex, else return 68 sparse landmarks. All dense or sparse vertex is transformed to
image coordinate space, but without alignment caused by face cropping.
transform: whether transform to image space
"""
if len(param) == 12:
param = np.concatenate((param, [0] * 50))
Expand All @@ -36,14 +37,16 @@ def reconstruct_vertex(param, whitening=True, dense=False):
if dense:
vertex = p @ (u + w_shp @ alpha_shp + w_exp @ alpha_exp).reshape(3, -1, order='F') + offset

# transform to image coordinate space
vertex[1, :] = std_size + 1 - vertex[1, :]
if transform:
# transform to image coordinate space
vertex[1, :] = std_size + 1 - vertex[1, :]
else:
"""For 68 pts"""
vertex = p @ (u_base + w_shp_base @ alpha_shp + w_exp_base @ alpha_exp).reshape(3, -1, order='F') + offset

# transform to image coordinate space
vertex[1, :] = std_size + 1 - vertex[1, :]
if transform:
# transform to image coordinate space
vertex[1, :] = std_size + 1 - vertex[1, :]

return vertex

Expand Down
Loading

0 comments on commit e53a741

Please sign in to comment.