forked from Zumbalamambo/tf-openpose
-
Notifications
You must be signed in to change notification settings - Fork 2
/
run_video.py
107 lines (87 loc) · 4.16 KB
/
run_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import argparse
import logging
import time
import cv2
import numpy as np
import os
import sys
from tf_pose.estimator import TfPoseEstimator
from tf_pose.networks import get_graph_path, model_wh
logger = logging.getLogger('TfPoseEstimator-Video')
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)
fps_time = 0
def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='tf-pose-estimation Video')
parser.add_argument('--video', type=str, default='')
parser.add_argument('--resolution', type=str, default='432x368', help='network input resolution. default=432x368')
parser.add_argument('--model', type=str, default='mobilenet_thin', help='cmu / mobilenet_thin / mobilenet_v2_large / mobilenet_v2_small')
parser.add_argument('--show-process', action='store_true',
help='for debug purpose, if enabled, speed for inference is dropped.')
parser.add_argument('--no_bg', action='store_true', help='show skeleton only.')
parser.add_argument('--write_json', type=str, default='/tmp/', help='writing output json dir')
parser.add_argument('--no_display', action='store_true', help='disable showing image')
parser.add_argument('--resize_out_ratio', type=float, default=4.0,
help='if provided, resize heatmaps before they are post-processed')
parser.add_argument('--number_people_max', type=int, default=1, help='maximum number of people')
parser.add_argument('--frame_first', type=int, default=0, help='first frame to analyze')
parser.add_argument('--write_video', type=str, default=None, help='output video file')
parser.add_argument('--tensorrt', type=str, default="False",
help='for tensorrt process.')
args = parser.parse_args()
logger.debug('initialization %s : %s' % (args.model, get_graph_path(args.model)))
w, h = model_wh(args.resolution)
e = TfPoseEstimator(get_graph_path(args.model), target_size=(w, h), trt_bool=str2bool(args.tensorrt))
cap = cv2.VideoCapture(args.video)
if cap.isOpened() is False:
logger.error("Error opening input video stream or file: {0}".format(args.video))
sys.exit(1)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
if args.write_video is not None:
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter(args.write_video, fourcc, 30.0, (width, height))
sys.stdout.write("frame: ")
frame = 0
detected = False
while cap.isOpened():
ret_val, image = cap.read()
if not ret_val:
break
if frame < args.frame_first:
frame += 1
continue
sys.stdout.write('\rframe: {:5}'.format(frame))
humans = e.inference(image, resize_to_default=(w > 0 and h > 0), upsample_size=args.resize_out_ratio)
if len(humans) > 0:
detected = True
del humans[args.number_people_max:]
if args.no_bg:
image = np.zeros(image.shape, dtype=np.uint8)
image = TfPoseEstimator.draw_humans(image, humans, imgcopy=False, frame=frame, output_json_dir=args.write_json)
frame += 1
cv2.putText(image, "FPS: %f" % (1.0 / (time.time() - fps_time)), (10, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
if not args.no_display:
cv2.imshow('tf-pose-estimation result', image)
if args.write_video is not None:
out.write(image)
fps_time = time.time()
if cv2.waitKey(1) == 27:
break
sys.stdout.write("\n")
if args.write_video is not None:
out.release()
if frame <= args.frame_first:
logger.error('No frame is processed: frame_first = {0}, frame = {1}'.format(args.frame_first, frame))
sys.exit(1)
if not detected:
logger.error('No human is detected in the video: {0}'.format(args.video))
sys.exit(1)
cv2.destroyAllWindows()
logger.debug('finished+')