Custom Class Using PyTorch Faster-RCNN Model not working
I have been trying the pre-trained faster-rcnn resnet50 PyTorch model in my project, and when I define my function get_detection() as seen below within the same file as where I'm calling it, it works fine. The inference will work on any image I use as input.
import torchvision
from torchvision import transforms as T
import os
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()
COCO_INSTANCE_CATEGORY_NAMES = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
def get_prediction(image, threshold=0.5):
transform = T.Compose([T.ToTensor()])
input = transform(image)
pred = model([input])
pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
pred_score = list(pred[0]['scores'].detach().numpy())
if(max(pred_score) threshold):
pred_box = []
pred_class = []
else:
pred_t = [pred_score.index(x) for x in pred_score if x threshold][-1]
pred_box = pred_boxes[:pred_t+1]
pred_class = pred_class[:pred_t+1]
return (pred_box, pred_class)
However, when I define the same method within a custom class, it always gives me:
IndexError: list index out of range
when called on an image that has no prediction score above the given threshold.
I am using the same inputs for both, the only difference is that my class method will only work on images with scores above the threshold while the function within my main ipynb file will work on all the inputs.
I'm really lost as to why they are behaving differently.
Class code:
import torchvision
from torchvision import transforms as T
import matplotlib.pyplot as plt
import cv2
class PersonDetector:
# Class names that the image classifier being used is trained on (resnet50)
COCO_INSTANCE_CATEGORY_NAMES = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
# Loads pre-trained model and sets it to evaluation mode
def __init__(self):
self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
self.model.eval()
def get_prediction(self, image, threshold):
# Transforms the loaded image (with normalization) into a float tensor of shape (C x H x W)
transform = T.Compose([T.ToTensor()])
imgTensor = transform(image)
# Perform the prediction on the transformed image (tensor)
pred = self.model([imgTensor])
# Adding each predicted class to the pred_class array
pred_class = [self.COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())] # If using CPU, you would have to add .cpu()
# Adding each predicted bounding box to the pred_boxes array
pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
# Adding each prediction score to the pred_score array
pred_score = list(pred[0]['scores'].detach().numpy())
# If there is no score that is above the threshold, set box and class arrays to empty
if(max(pred_score) threshold):
print(No score above threshold)
pred_box = [}
pred_class = []
# Else filter through pred_scores and get the last index where the score threshold
# Assign the box and class arrays with index values until pred_t index
else:
pred_t = [pred_score.index(x) for x in pred_score if x threshold][-1]
pred_box = pred_boxes[:pred_t+1]
pred_class = pred_class[:pred_t+1]
# Returns a tuple containing the final arrays storing the inference results
return (pred_box, pred_class)
Topic faster-rcnn object-detection pytorch computer-vision machine-learning
Category Data Science