add HRNet/human_keypoint_inference

xuewengeophysics · Nov 19, 2020 · 3254aa2 · 3254aa2
1 parent b541834
commit 3254aa2
Show file tree

Hide file tree

Showing 34 changed files with 4,394 additions and 1 deletion.
diff --git a/HRNet/README.md b/HRNet/README.md
@@ -0,0 +1,60 @@
+# Keypoint Detection by HRNet   
+
+`original code`
+clone from https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
+
+参考https://github.com/lxy5513/hrnet
+
+
+
+## Demo
+
+```
+python tools/human_keypoint_inference.py
+```
+
+
+
+## Model Download 
++ 下载pose_hrnet_*.pth模型文件，保存到models/pytorch/pose_coco文件夹中
+  + address: https://drive.google.com/drive/folders/1nzM_OBV9LbAEA7HClC0chEyf_7ECDXYA)   
+
++ 下载yolov3目标检测模型文件，保存到/lib/detector/yolo文件夹中
+  + yolov3 model download: wget https://pjreddie.com/media/files/yolov3.weights
+
+
+
+## Main Steps
+
+1. 人体目标检测：
+
+   ```python
+   bboxs, scores = yolo_det(args.img_input, human_model, confidence=0.5)  # bboxes (N, 4) [x0, y0, x1, y1]
+   ```
+
+
+
+2. 根据上一步得到的bbox提取单个的人体图像：
+
+   ```
+   inputs, origin_img, center, scale = preprocess(args.img_input, bboxs, scores, cfg)
+   ```
+
+
+
+3. 关键点检测，得到每个关键点的heatmap：
+
+   ```
+   output = model(inputs)
+   ```
+
+
+
+4. heatmap后处理，得到关键点坐标：
+
+   ```
+   preds, maxvals = get_final_preds(cfg, output.clone().cpu().numpy(), np.asarray(center), np.asarray(scale))
+   ```
+
+
+
diff --git a/HRNet/lib/detector/yolo/README.md b/HRNet/lib/detector/yolo/README.md
@@ -0,0 +1,6 @@
+## use for image human bounding box obtain
+
+`original code`
+clone from https://github.com/lxy5513/hrnet
+
+https://github.com/lxy5513/hrnet/tree/master/lib/detector/yolo
diff --git a/HRNet/lib/detector/yolo/__init__.py b/HRNet/lib/detector/yolo/__init__.py
diff --git a/HRNet/lib/detector/yolo/bbox.py b/HRNet/lib/detector/yolo/bbox.py
@@ -0,0 +1,115 @@
+from __future__ import division
+
+import torch 
+import random
+
+import numpy as np
+import cv2
+
+def confidence_filter(result, confidence):
+    conf_mask = (result[:,:,4] > confidence).float().unsqueeze(2)
+    result = result*conf_mask    
+
+    return result
+
+def confidence_filter_cls(result, confidence):
+    max_scores = torch.max(result[:,:,5:25], 2)[0]
+    res = torch.cat((result, max_scores),2)
+    print(res.shape)
+
+
+    cond_1 = (res[:,:,4] > confidence).float()
+    cond_2 = (res[:,:,25] > 0.995).float()
+
+    conf = cond_1 + cond_2
+    conf = torch.clamp(conf, 0.0, 1.0)
+    conf = conf.unsqueeze(2)
+    result = result*conf   
+    return result
+
+
+
+def get_abs_coord(box):
+    box[2], box[3] = abs(box[2]), abs(box[3])
+    x1 = (box[0] - box[2]/2) - 1 
+    y1 = (box[1] - box[3]/2) - 1 
+    x2 = (box[0] + box[2]/2) - 1 
+    y2 = (box[1] + box[3]/2) - 1
+    return x1, y1, x2, y2
+
+
+
+def sanity_fix(box):
+    if (box[0] > box[2]):
+        box[0], box[2] = box[2], box[0]
+
+    if (box[1] >  box[3]):
+        box[1], box[3] = box[3], box[1]
+
+    return box
+
+def bbox_iou(box1, box2):
+    """
+    Returns the IoU of two bounding boxes 
+    
+    
+    """
+    #Get the coordinates of bounding boxes
+    b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3]
+    b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3]
+
+    #get the corrdinates of the intersection rectangle
+    inter_rect_x1 =  torch.max(b1_x1, b2_x1)
+    inter_rect_y1 =  torch.max(b1_y1, b2_y1)
+    inter_rect_x2 =  torch.min(b1_x2, b2_x2)
+    inter_rect_y2 =  torch.min(b1_y2, b2_y2)
+
+    #Intersection area
+    if torch.cuda.is_available():
+            inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape).cuda())*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape).cuda())
+    else:
+            inter_area = torch.max(inter_rect_x2 - inter_rect_x1 + 1,torch.zeros(inter_rect_x2.shape))*torch.max(inter_rect_y2 - inter_rect_y1 + 1, torch.zeros(inter_rect_x2.shape))
+
+    #Union Area
+    b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1)
+    b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1)
+
+    iou = inter_area / (b1_area + b2_area - inter_area)
+
+    return iou
+
+
+def pred_corner_coord(prediction):
+    #Get indices of non-zero confidence bboxes
+    ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
+
+    box = prediction[ind_nz[0], ind_nz[1]]
+
+
+    box_a = box.new(box.shape)
+    box_a[:,0] = (box[:,0] - box[:,2]/2)
+    box_a[:,1] = (box[:,1] - box[:,3]/2)
+    box_a[:,2] = (box[:,0] + box[:,2]/2) 
+    box_a[:,3] = (box[:,1] + box[:,3]/2)
+    box[:,:4] = box_a[:,:4]
+
+    prediction[ind_nz[0], ind_nz[1]] = box
+
+    return prediction
+
+
+
+
+def write(x, batches, results, colors, classes):
+    c1 = tuple(x[1:3].int())
+    c2 = tuple(x[3:5].int())
+    img = results[int(x[0])]
+    cls = int(x[-1])
+    label = "{0}".format(classes[cls])
+    color = random.choice(colors)
+    cv2.rectangle(img, c1, c2,color, 1)
+    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
+    c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
+    cv2.rectangle(img, c1, c2,color, -1)
+    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
+    return img
diff --git a/HRNet/lib/detector/yolo/cam_demo.py b/HRNet/lib/detector/yolo/cam_demo.py
@@ -0,0 +1,168 @@
+from __future__ import division
+import time
+import torch 
+import torch.nn as nn
+from torch.autograd import Variable
+import numpy as np
+import cv2 
+from util import *
+from darknet import Darknet
+from preprocess import prep_image, inp_to_image
+import pandas as pd
+import random 
+import argparse
+import pickle as pkl
+
+def get_test_input(input_dim, CUDA):
+    img = cv2.imread("imgs/messi.jpg")
+    img = cv2.resize(img, (input_dim, input_dim)) 
+    img_ =  img[:,:,::-1].transpose((2,0,1))
+    img_ = img_[np.newaxis,:,:,:]/255.0
+    img_ = torch.from_numpy(img_).float()
+    img_ = Variable(img_)
+
+    if CUDA:
+        img_ = img_.cuda()
+
+    return img_
+
+def prep_image(img, inp_dim):
+    """
+    Prepare image for inputting to the neural network. 
+    
+    Returns a Variable 
+    """
+
+    orig_im = img
+    dim = orig_im.shape[1], orig_im.shape[0]
+    img = cv2.resize(orig_im, (inp_dim, inp_dim))
+    img_ = img[:,:,::-1].transpose((2,0,1)).copy()
+    img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
+    return img_, orig_im, dim
+
+def write(x, img):
+    c1 = tuple(x[1:3].int())
+    c2 = tuple(x[3:5].int())
+    cls = int(x[-1])
+    label = "{0}".format(classes[cls])
+    color = random.choice(colors)
+    cv2.rectangle(img, c1, c2,color, 1)
+    t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
+    c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
+    cv2.rectangle(img, c1, c2,color, -1)
+    cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
+    return img
+
+def arg_parse():
+    """
+    Parse arguements to the detect module
+    
+    """
+
+
+    parser = argparse.ArgumentParser(description='YOLO v3 Cam Demo')
+    parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.25)
+    parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
+    parser.add_argument("--reso", dest = 'reso', help = 
+                        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
+                        default = "160", type = str)
+    return parser.parse_args()
+
+
+
+if __name__ == '__main__':
+    cfgfile = "cfg/yolov3.cfg"
+    weightsfile = "yolov3.weights"
+    num_classes = 80
+
+    args = arg_parse()
+    confidence = float(args.confidence)
+    nms_thesh = float(args.nms_thresh)
+    start = 0
+    CUDA = torch.cuda.is_available()
+
+
+
+
+    num_classes = 80
+    bbox_attrs = 5 + num_classes
+
+    model = Darknet(cfgfile)
+    model.load_weights(weightsfile)
+
+    model.net_info["height"] = args.reso
+    inp_dim = int(model.net_info["height"])
+
+    assert inp_dim % 32 == 0 
+    assert inp_dim > 32
+
+    if CUDA:
+        model.cuda()
+
+    model.eval()
+
+    videofile = 'video.avi'
+
+    cap = cv2.VideoCapture(0)
+
+    assert cap.isOpened(), 'Cannot capture source'
+
+    frames = 0
+    start = time.time()    
+    while cap.isOpened():
+
+        ret, frame = cap.read()
+        if ret:
+
+            img, orig_im, dim = prep_image(frame, inp_dim)
+
+#            im_dim = torch.FloatTensor(dim).repeat(1,2)                        
+
+
+            if CUDA:
+                im_dim = im_dim.cuda()
+                img = img.cuda()
+
+
+            output = model(Variable(img), CUDA)
+            output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
+
+            if type(output) == int:
+                frames += 1
+                print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
+                cv2.imshow("frame", orig_im)
+                key = cv2.waitKey(1)
+                if key & 0xFF == ord('q'):
+                    break
+                continue
+
+
+
+            output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim
+
+#            im_dim = im_dim.repeat(output.size(0), 1)
+            output[:,[1,3]] *= frame.shape[1]
+            output[:,[2,4]] *= frame.shape[0]
+
+
+            classes = load_classes('data/coco.names')
+            colors = pkl.load(open("pallete", "rb"))
+
+            list(map(lambda x: write(x, orig_im), output))
+
+
+            cv2.imshow("frame", orig_im)
+            key = cv2.waitKey(1)
+            if key & 0xFF == ord('q'):
+                break
+            frames += 1
+            print("FPS of the video is {:5.2f}".format( frames / (time.time() - start)))
+
+
+        else:
+            break
+
+
+
+
+