SocialDist_app.py

import os
import math
from itertools import combinations

# comment out below line to enable tensorflow logging outputs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import time
import tensorflow as tf
#gpu configuration
physical_devices = tf.config.experimental.list_physical_devices('GPU')
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    
from absl import app,logging
import core.utils as utils
from core.yolov4 import filter_boxes
from tensorflow.python.saved_model import tag_constants
from core.config import cfg
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession


# deep sort imports
from deep_sort import preprocessing, nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_detections as gdet

#API imports
from flask_ngrok import run_with_ngrok
from flask import Flask, flash, request, redirect, url_for, render_template, Response
from werkzeug.utils import secure_filename

# customize API with the parameters   
framework = 'tf'
weights = './checkpoints/yolov4-416'
size = 416
tiny = False
model = 'yolov4'
output = './static/outputs/output.avi'
output_format = 'XVID'
iou = 0.45
score = 0.50
dont_show =  True
info = True
count = True
violations = []
track_cnt = 0
fps = 0
  
UPLOAD_FOLDER = './static/uploads'
ALLOWED_EXTENSIONS = {'gif', 'webm', 'mp4', 'mov', 'avi'}

app1 = Flask(__name__)
app1.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

run_with_ngrok(app1)

def allowed_file(filename):
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@app1.route('/', methods=['GET', 'POST'])
def upload_file():
    global cap_stream1
    global vid_stream1
    if request.method == 'POST':
        # check if the post request has the file part
        if 'file' not in request.files:
            flash('No file part')
            return redirect(request.url)
        file = request.files['file']
        # If the user does not select a file, the browser submits an
        # empty file without a filename.
        if file.filename == '':
            flash('No selected file')
            return redirect(request.url)
        if file and allowed_file(file.filename):
            filename = secure_filename(file.filename)
            vid_stream1 = os.path.join(app1.config['UPLOAD_FOLDER'], filename)
            file.save(vid_stream1)
            cap_stream1 = cv2.VideoCapture(vid_stream1)
            return redirect(url_for('stream', file='show_vdo.html'))
            #return "Uploaded successfully"
            #return redirect(url_for('download_file', name=filename))
    return '''
    <!doctype html>
    <title>uploading</title>
    <h1>Please choose a video from Stanford Dataset</h1>
    <form method=post enctype=multipart/form-data>
      <input type=file name=file>
      <input type=submit value=Upload>
    </form>
    '''

#cap = cv2.VideoCapture(vid)

def generate_frames(cap):
    while True:
        success, frame = cap.read()
        if not success:
            break
        else:
            ret, buffer = cv2.imencode('.jpg', frame)
            frame = buffer.tobytes()
            
        yield(b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
                   
def generate_output(f):
        ret, buffer = cv2.imencode('.jpg', f)
        _frame = buffer.tobytes()
        yield(b'--frame\r\n'
                  b'Content-Type: image/jpeg\r\n\r\n' + _frame + b'\r\n')
            
@app1.route("/stream")
def stream():
    return render_template('show_vdo.html')

            
@app1.route('/video')
def stream_video():
    return Response(generate_frames(cap_stream1), mimetype='multipart/x-mixed-replace; boundary=frame')

@app1.route("/stream2")
def stream2():
    return render_template('monitor.html', v=len(violations), track_cnt=str(track_cnt), _fp=fps)

@app1.route('/violations')
def main():
    # Definition of the parameters
    max_cosine_distance = 0.4
    nn_budget = None
    nms_max_overlap = 0.8
    
    # initialize deep sort
    model_filename = 'model_data/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    # calculate cosine distance metric
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    # initialize tracker
    tracker = Tracker(metric)

    # load configuration for object detector
    config = ConfigProto()
    config.gpu_options.allow_growth = True
    session = InteractiveSession(config=config)
    #STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
    input_size = size
    video_path = vid_stream1

    # load tflite model if flag is set
    if framework == 'tflite':
        interpreter = tf.lite.Interpreter(model_path=weights)
        interpreter.allocate_tensors()
        input_details = interpreter.get_input_details()
        output_details = interpreter.get_output_details()
        print(input_details)
        print(output_details)
    # otherwise load standard tensorflow saved model
    else:
        saved_model_loaded = tf.saved_model.load(weights, tags=[tag_constants.SERVING])
        infer = saved_model_loaded.signatures['serving_default']

    # begin video capture
    try:
        vid = cv2.VideoCapture(int(video_path))
    except:
        vid = cv2.VideoCapture(video_path)

    out = None
    '''def fourcc(a,b,c,d):
        return ((ord(a) & 255) + ((ord(b) & 255) << 8) + ((ord(c) & 255) << 16) + ((ord(d) & 255) << 24))'''
        
    # get video ready to save locally if flag is set
    if output:
        # by default VideoCapture returns float instead of int
        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
        fps = int(vid.get(cv2.CAP_PROP_FPS))
        codec = cv2.VideoWriter_fourcc(*'XVID')
        out = cv2.VideoWriter(output, codec, fps, (width, height))

    frame_num = 0
    #violations=[]
    total_ped = []
    # while video is running
    while True:
        return_value, frame = vid.read()
        if return_value:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = Image.fromarray(frame)
        else:
            return 'Video has ended or failed, try a different video format!'
            break
        frame_num +=1
        print('Frame #: ', frame_num)
        frame_size = frame.shape[:2]
        image_data = cv2.resize(frame, (input_size, input_size))
        image_data = image_data / 255.
        image_data = image_data[np.newaxis, ...].astype(np.float32)
        start_time = time.time()

        # run detections on tflite if flag is set
        if framework == 'tflite':
            interpreter.set_tensor(input_details[0]['index'], image_data)
            interpreter.invoke()
            pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
            # run detections using yolov3 if flag is set
            if model == 'yolov3' and tiny == True:
                boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
            else:
                boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
                                                input_shape=tf.constant([input_size, input_size]))
        else:
            batch_data = tf.constant(image_data)
            pred_bbox = infer(batch_data)
            for key, value in pred_bbox.items():
                boxes = value[:, :, 0:4]
                pred_conf = value[:, :, 4:]

        boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
            boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
            scores=tf.reshape(
                pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
            max_output_size_per_class=50,
            max_total_size=50,
            iou_threshold=iou,
            score_threshold=score
        )

        # convert data to numpy arrays and slice out unused elements
        num_objects = valid_detections.numpy()[0]
        bboxes = boxes.numpy()[0]
        bboxes = bboxes[0:int(num_objects)]
        scores = scores.numpy()[0]
        scores = scores[0:int(num_objects)]
        classes = classes.numpy()[0]
        classes = classes[0:int(num_objects)]

        # format bounding boxes from normalized ymin, xmin, ymax, xmax ---> xmin, ymin, width, height
        original_h, original_w, _ = frame.shape
        bboxes = utils.format_boxes(bboxes, original_h, original_w)

        # store all predictions in one parameter for simplicity when calling functions
        pred_bbox = [bboxes, scores, classes, num_objects]

        # read in all class names from config
        class_names = utils.read_class_names(cfg.YOLO.CLASSES)

        # by default allow all classes in .names file
        #allowed_classes = list(class_names.values())
        
        # custom allowed classes (uncomment line below to customize tracker for only people)
        allowed_classes = ['pedestrian', 'biker']

        # loop through objects and use class index to get class name, allow only classes in allowed_classes list
        names = []
        deleted_indx = []
        for i in range(num_objects):
            class_indx = int(classes[i])
            class_name = class_names[class_indx]
            if class_name not in allowed_classes:
                deleted_indx.append(i)
            else:
                names.append(class_name)
        names = np.array(names)
        count = len(names)
        if count:
            cv2.putText(frame, "Objects being tracked: {}".format(count), (5, 35), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0, 255, 0), 2)
            print("Objects being tracked: {}".format(count))
        # delete detections that are not in allowed_classes
        bboxes = np.delete(bboxes, deleted_indx, axis=0)
        scores = np.delete(scores, deleted_indx, axis=0)

        # encode yolo detections and feed to tracker
        features = encoder(frame, bboxes)
        detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(bboxes, scores, names, features)]

        #initialize color map
        cmap = plt.get_cmap('tab20b')
        colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]

        # run non-maxima supression
        boxs = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        classes = np.array([d.class_name for d in detections])
        indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]       

        # Call the tracker
        tracker.predict()
        tracker.update(detections)

        #initializing a list of centers
        centroids = []
        # update tracks
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue 
            bbox = track.to_tlbr()
            class_name = track.get_class()
            x_min = int((bbox[0]))
            y_min = int((bbox[1]))
            x_max = int((bbox[2]))
            y_max = int((bbox[3]))
            x_center = int(x_min+((x_max-x_min)/2))
            y_center = int(y_min+((y_max-y_min)/2))
            _centroid = (x_center, y_center)
            centroids.append(_centroid)
            #total_ped+=1
            #print(centroids)
            if info:
                tped=track.track_id
                if not tped in total_ped:
                    total_ped.append(tped)
        for centers in combinations(centroids, 2):
          c1, c2 = centers
          x1, y1 = c1
          x2, y2 = c2
          if social_dist(c1, c2)<=45:
            cv2.circle(frame, (x1,y1), radius=7,color = (255,0,0), thickness=-1)
            cv2.circle(frame, (x2,y2), radius=7,color = (255,0,0), thickness=-1)
            if info:
                print("Tracker ID: {}, Class: {},  BBox Coords (x_center. y_center): {}".format(str(track.track_id), class_name, (c1, c2)))
                ped = track.track_id
                if not ped in violations:
                    violations.append(ped)
          '''else:
            cv2.circle(frame, (x1,y1), radius=5,color = (0,255,0), thickness=-1)
            #cv2.circle(frame, (x2,y2), radius=5,color = (0,255,0), thickness=-1)'''
          #cv2.rectangle(frame,(0,0), (70,70 ), (255,255,255), -1)
          cv2.rectangle(frame,(10,5),(315,70),(102, 102, 255), -1)
          cv2.putText(frame, "violations: "+ str(len(violations)), (30,30), 0, 1, (255,255,255), 2)
          cv2.putText(frame, "Total person: "+ str(track.track_id),(30,60), 0, 1, (255,255,255), 2)
          track_cnt = track.track_id
          
        # draw dot on screen
            #color = (0,0,255)
            
            #color = colors[int(track.track_id) % len(colors)]
            #color = [i * 255 for i in color]
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
            #cv2.rectangle(frame, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
            #cv2.putText(frame, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)

        # if enable info flag then print details about each track
        '''if FLAGS.info:
                print("Tracker ID: {}, Class: {},  BBox Coords (x_center. y_center): {}".format(str(track.track_id), class_name, (c1, c2)))
            else:
            cv2.circle(frame, (x1,y1), radius=5,color = (0,255,0), thickness=-1)
            cv2.circle(frame, (x2,y2), radius=5,color = (0,255,0), thickness=-1)'''

        # calculate frames per second of running detections
        fps = 1.0 / (time.time() - start_time)
        print("FPS: %.2f" % fps)
        result = np.asarray(frame)
        result = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        #cv2.putText(frame, "violations: "+ str(violations),(30,30), 0, 1, (0,255,0), 2)
        if not dont_show:
            cv2.imshow("Output Video", result)
        
        # if output flag is set, save video file
        if output:
            out.write(result)
        if cv2.waitKey(1) & 0xFF == ord('q'): break
    cv2.destroyAllWindows()
    #output_vid = './static/outputs/output.avi'
    #cap_outout = cv2.VideoCapture(output_vid)
                             
    #return Response(generate_output(frame), mimetype='multipart/x-mixed-replace; boundary=frame')
    #print( len(violations), str(track.track_id), fps)
    return '''<!doctype html>
    <p>{{len(violations)}} Violations found. Total {{track_cnt}} person tracked</p>'''
    
#claculate the social distances
def social_dist(center1, center2):
  x1, y1 = center1
  x2, y2 = center2
  dist_ = math.sqrt((x2-x1)**2 + (y2-y1)**2)
  return dist_

if __name__ == '__main__':
    app1.run()