본문 바로가기
STUDY/YOLO

opencv with CUDA실행

by brown_board 2022. 5. 25.
728x90

GPU와 CPU를 사용할 때의 FPS를 측정해보았다.

참고로 GPU를 사용할때는 

YOLO_net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
YOLO_net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
위의 두 줄을 추가하면된다. 필자의 경우는 YOLO를 사용해서 YOLO_net이라고 변수를 지정해주었지만 보통 net을 사용한다.
그리고 fps 측정을 위해 추가한 구문은 다음과 같다.

iimport time

    start = time.time()
    for i in range(100):
        YOLO_net.setInput(blob)
        detections = YOLO_net.forward(YOLO_net.getUnconnectedOutLayersNames())
    end = time.time()
 
    ms_per_image = (end - start) * 1000 / 100

    print("Time per inference: %f ms" % (ms_per_image))
print("FPS: "1000.0 / ms_per_image)

 

- GPU사용 시

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import cv2                       #영상처리를 하기 위한 라이브러리
import numpy as np               #박스를 그리고 좌표를 얻기 위한 라이브러리
import time 
# 웹캠 신호 받기
VideoSignal = cv2.VideoCapture(0)
 
captured_num = 0
text=''
 
#----------가중치 파일------------------------------------------------------------------------------------------------
YOLO_net = cv2.dnn.readNet('yolov4-tiny-hyeok_last.weights','yolov4-tiny-hyeok.cfg')
classes = []
with open('package.names'"r"as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = YOLO_net.getLayerNames()
#output_layers = [layer_names[i - 1] for i in YOLO_net.getUnconnectedOutLayers()]
output_layers = [layer_names[i[0- 1for i in YOLO_net.getUnconnectedOutLayers()]
 
# GPU 사용
YOLO_net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
YOLO_net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
#----------가중치 파일------------------------------------------------------------------------------------------------
 
while True:
    # 웹캠 프레임
    ret, frame = VideoSignal.read()
    frame = cv2.resize(frame,(416,416))
    h, w, c = frame.shape
    text=''
    # YOLO 입력
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416416), (000),True, crop=False)
    YOLO_net.setInput(blob)
    outs = YOLO_net.forward(output_layers)
 
    class_ids = []
    confidences = []
    boxes = []
 
    for out in outs:
 
        for detection in out:
 
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
 
            if confidence > 0.5:
                # Object detected
                center_x = int(detection[0* w)
                center_y = int(detection[1* h)
                dw = int((detection[2]) * w)
                dh = int((detection[3]) * h)
                # Rectangle coordinate
                x = int(center_x - dw / 2)
                y = int(center_y - dh / 2)
                boxes.append([x, y, dw, dh])
                confidences.append(float(confidence))
                class_ids.append(class_id)
 
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.80.4)
 
 
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            score = confidences[i]
 
            # 경계상자와 클래스 정보 이미지에 입력
            cv2.rectangle(frame, (x, y), (x + w, y + h), (00255), 1)
            #print(int((x+x+w)/2),int((y+y+h)/2))
 
    start = time.time()
    for i in range(100):
        YOLO_net.setInput(blob)
        detections = YOLO_net.forward(YOLO_net.getUnconnectedOutLayersNames())
    end = time.time()
 
    ms_per_image = (end - start) * 1000 / 100
 
    print("Time per inference: %f ms" % (ms_per_image))
    print("FPS: "1000.0 / ms_per_image)
 
    cv2.imshow("YOLOv4", frame)
    if cv2.waitKey(1& 0xFF == ord('q'):
        break
    
VideoSignal.release()
cs

FPS기 240이 나온다.

-CPU사용 시

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import cv2                       #영상처리를 하기 위한 라이브러리
import numpy as np               #박스를 그리고 좌표를 얻기 위한 라이브러리
import time 
# 웹캠 신호 받기
VideoSignal = cv2.VideoCapture(0)
 
captured_num = 0
text=''
 
#----------가중치 파일------------------------------------------------------------------------------------------------
YOLO_net = cv2.dnn.readNet('yolov4-tiny-hyeok_last.weights','yolov4-tiny-hyeok.cfg')
classes = []
with open('package.names'"r"as f:
    classes = [line.strip() for line in f.readlines()]
layer_names = YOLO_net.getLayerNames()
#output_layers = [layer_names[i - 1] for i in YOLO_net.getUnconnectedOutLayers()]
output_layers = [layer_names[i[0- 1for i in YOLO_net.getUnconnectedOutLayers()]
 
# GPU 사용
#YOLO_net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
#YOLO_net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
#----------가중치 파일------------------------------------------------------------------------------------------------
 
while True:
    # 웹캠 프레임
    ret, frame = VideoSignal.read()
    frame = cv2.resize(frame,(416,416))
    h, w, c = frame.shape
    text=''
    # YOLO 입력
    blob = cv2.dnn.blobFromImage(frame, 0.00392, (416416), (000),True, crop=False)
    YOLO_net.setInput(blob)
    outs = YOLO_net.forward(output_layers)
 
    class_ids = []
    confidences = []
    boxes = []
 
    for out in outs:
 
        for detection in out:
 
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
 
            if confidence > 0.5:
                # Object detected
                center_x = int(detection[0* w)
                center_y = int(detection[1* h)
                dw = int((detection[2]) * w)
                dh = int((detection[3]) * h)
                # Rectangle coordinate
                x = int(center_x - dw / 2)
                y = int(center_y - dh / 2)
                boxes.append([x, y, dw, dh])
                confidences.append(float(confidence))
                class_ids.append(class_id)
 
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.80.4)
 
 
    for i in range(len(boxes)):
        if i in indexes:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            score = confidences[i]
 
            # 경계상자와 클래스 정보 이미지에 입력
            cv2.rectangle(frame, (x, y), (x + w, y + h), (00255), 1)
            #print(int((x+x+w)/2),int((y+y+h)/2))
 
    start = time.time()
    for i in range(100):
        YOLO_net.setInput(blob)
        detections = YOLO_net.forward(YOLO_net.getUnconnectedOutLayersNames())
    end = time.time()
 
    ms_per_image = (end - start) * 1000 / 100
 
    print("Time per inference: %f ms" % (ms_per_image))
    print("FPS: "1000.0 / ms_per_image)
 
    cv2.imshow("YOLOv4", frame)
    if cv2.waitKey(1& 0xFF == ord('q'):
        break
    
VideoSignal.release()
cs

FPS가 11정도가 나온다.

참고 블로그

https://github.com/opencv/opencv/issues/16348

https://learnopencv.com/opencv-dnn-with-gpu-support/

728x90

'STUDY > YOLO' 카테고리의 다른 글

우분투 그래픽드라이버 cuda cudnn  (0) 2022.05.28
우분투 yolo 환경구축  (0) 2022.05.28
opencv with CUDA 정리  (0) 2022.05.25
opencv gpu (3)  (0) 2022.05.24
opencv gpu (2)  (0) 2022.05.24

댓글