Skip to content

Commit

Permalink
add HRNet/human_keypoint_inference
Browse files Browse the repository at this point in the history
  • Loading branch information
xuewen committed Nov 19, 2020
1 parent b541834 commit 3254aa2
Show file tree
Hide file tree
Showing 34 changed files with 4,394 additions and 1 deletion.
60 changes: 60 additions & 0 deletions HRNet/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Keypoint Detection by HRNet

`original code`
clone from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch

参考https://github.com/lxy5513/hrnet



## Demo

```
python tools/human_keypoint_inference.py
```



## Model Download
+ 下载pose_hrnet_*.pth模型文件,保存到models/pytorch/pose_coco文件夹中
+ address: https://drive.google.com/drive/folders/1nzM_OBV9LbAEA7HClC0chEyf_7ECDXYA)

+ 下载yolov3目标检测模型文件,保存到/lib/detector/yolo文件夹中
+ yolov3 model download: wget https://pjreddie.com/media/files/yolov3.weights



## Main Steps

1. 人体目标检测:

```python
bboxs, scores = yolo_det(args.img_input, human_model, confidence=0.5) # bboxes (N, 4) [x0, y0, x1, y1]
```



2. 根据上一步得到的bbox提取单个的人体图像:

```
inputs, origin_img, center, scale = preprocess(args.img_input, bboxs, scores, cfg)
```



3. 关键点检测,得到每个关键点的heatmap:

```
output = model(inputs)
```



4. heatmap后处理,得到关键点坐标:

```
preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
```



6 changes: 6 additions & 0 deletions HRNet/lib/detector/yolo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
## use for image human bounding box obtain

`original code`
clone from https://github.com/lxy5513/hrnet

https://github.com/lxy5513/hrnet/tree/master/lib/detector/yolo
Empty file.
115 changes: 115 additions & 0 deletions HRNet/lib/detector/yolo/bbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from __future__ import division

import torch
import random

import numpy as np
import cv2

def confidence_filter(result, confidence):
conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
result = result*conf_mask

return result

def confidence_filter_cls(result, confidence):
max_scores = torch.max(result[:,:,5:25], 2)[0]
res = torch.cat((result, max_scores),2)
print(res.shape)


cond_1 = (res[:,:,4] > confidence).float()
cond_2 = (res[:,:,25] > 0.995).float()

conf = cond_1 + cond_2
conf = torch.clamp(conf, 0.0, 1.0)
conf = conf.unsqueeze(2)
result = result*conf
return result



def get_abs_coord(box):
box[2], box[3] = abs(box[2]), abs(box[3])
x1 = (box[0] - box[2]/2) - 1
y1 = (box[1] - box[3]/2) - 1
x2 = (box[0] + box[2]/2) - 1
y2 = (box[1] + box[3]/2) - 1
return x1, y1, x2, y2



def sanity_fix(box):
if (box[0] > box[2]):
box[0], box[2] = box[2], box[0]

if (box[1] > box[3]):
box[1], box[3] = box[3], box[1]

return box

def bbox_iou(box1, box2):
"""
Returns the IoU of two bounding boxes
"""
#Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]

#get the corrdinates of the intersection rectangle
inter_rect_x1 = torch.max(b1_x1, b2_x1)
inter_rect_y1 = torch.max(b1_y1, b2_y1)
inter_rect_x2 = torch.min(b1_x2, b2_x2)
inter_rect_y2 = torch.min(b1_y2, b2_y2)

#Intersection area
if torch.cuda.is_available():
inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
else:
inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape))

#Union Area
b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)

iou = inter_area / (b1_area + b2_area - inter_area)

return iou


def pred_corner_coord(prediction):
#Get indices of non-zero confidence bboxes
ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()

box = prediction[ind_nz[0], ind_nz[1]]


box_a = box.new(box.shape)
box_a[:,0] = (box[:,0] - box[:,2]/2)
box_a[:,1] = (box[:,1] - box[:,3]/2)
box_a[:,2] = (box[:,0] + box[:,2]/2)
box_a[:,3] = (box[:,1] + box[:,3]/2)
box[:,:4] = box_a[:,:4]

prediction[ind_nz[0], ind_nz[1]] = box

return prediction




def write(x, batches, results, colors, classes):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
img = results[int(x[0])]
cls = int(x[-1])
label = "{0}".format(classes[cls])
color = random.choice(colors)
cv2.rectangle(img, c1, c2,color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2,color, -1)
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
return img
168 changes: 168 additions & 0 deletions HRNet/lib/detector/yolo/cam_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
from util import *
from darknet import Darknet
from preprocess import prep_image, inp_to_image
import pandas as pd
import random
import argparse
import pickle as pkl

def get_test_input(input_dim, CUDA):
img = cv2.imread("imgs/messi.jpg")
img = cv2.resize(img, (input_dim, input_dim))
img_ = img[:,:,::-1].transpose((2,0,1))
img_ = img_[np.newaxis,:,:,:]/255.0
img_ = torch.from_numpy(img_).float()
img_ = Variable(img_)

if CUDA:
img_ = img_.cuda()

return img_

def prep_image(img, inp_dim):
"""
Prepare image for inputting to the neural network.
Returns a Variable
"""

orig_im = img
dim = orig_im.shape[1], orig_im.shape[0]
img = cv2.resize(orig_im, (inp_dim, inp_dim))
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
return img_, orig_im, dim

def write(x, img):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
cls = int(x[-1])
label = "{0}".format(classes[cls])
color = random.choice(colors)
cv2.rectangle(img, c1, c2,color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2,color, -1)
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
return img

def arg_parse():
"""
Parse arguements to the detect module
"""


parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.25)
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
parser.add_argument("--reso", dest = 'reso', help =
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default = "160", type = str)
return parser.parse_args()



if __name__ == '__main__':
cfgfile = "cfg/yolov3.cfg"
weightsfile = "yolov3.weights"
num_classes = 80

args = arg_parse()
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()




num_classes = 80
bbox_attrs = 5 + num_classes

model = Darknet(cfgfile)
model.load_weights(weightsfile)

model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])

assert inp_dim % 32 == 0
assert inp_dim > 32

if CUDA:
model.cuda()

model.eval()

videofile = 'video.avi'

cap = cv2.VideoCapture(0)

assert cap.isOpened(), 'Cannot capture source'

frames = 0
start = time.time()
while cap.isOpened():

ret, frame = cap.read()
if ret:

img, orig_im, dim = prep_image(frame, inp_dim)

# im_dim = torch.FloatTensor(dim).repeat(1,2)


if CUDA:
im_dim = im_dim.cuda()
img = img.cuda()


output = model(Variable(img), CUDA)
output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)

if type(output) == int:
frames += 1
print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
cv2.imshow("frame", orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
continue



output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim

# im_dim = im_dim.repeat(output.size(0), 1)
output[:,[1,3]] *= frame.shape[1]
output[:,[2,4]] *= frame.shape[0]


classes = load_classes('data/coco.names')
colors = pkl.load(open("pallete", "rb"))

list(map(lambda x: write(x, orig_im), output))


cv2.imshow("frame", orig_im)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))


else:
break





Loading

0 comments on commit 3254aa2

Please sign in to comment.