Custom Class Using PyTorch Faster-RCNN Model not working

Question

Custom Class Using PyTorch Faster-RCNN Model not working

BencleBoy

2022年2月14日 23:49

I have been trying the pre-trained faster-rcnn resnet50 PyTorch model in my project, and when I define my function get_detection() as seen below within the same file as where I'm calling it, it works fine. The inference will work on any image I use as input.

import torchvision
from torchvision import transforms as T
import os

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

COCO_INSTANCE_CATEGORY_NAMES = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

def get_prediction(image, threshold=0.5):
    transform = T.Compose([T.ToTensor()])
    input = transform(image)
    pred = model([input])
    pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())]
    pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]
    pred_score = list(pred[0]['scores'].detach().numpy()) 

    if(max(pred_score)  threshold):
        pred_box = []
        pred_class = []
    else:
        pred_t = [pred_score.index(x) for x in pred_score if x  threshold][-1]
        pred_box = pred_boxes[:pred_t+1]
        pred_class = pred_class[:pred_t+1]
    return (pred_box, pred_class)

However, when I define the same method within a custom class, it always gives me: IndexError: list index out of range when called on an image that has no prediction score above the given threshold.

I am using the same inputs for both, the only difference is that my class method will only work on images with scores above the threshold while the function within my main ipynb file will work on all the inputs.

I'm really lost as to why they are behaving differently.

Class code:

import torchvision
from torchvision import transforms as T
import matplotlib.pyplot as plt
import cv2

class PersonDetector:
    # Class names that the image classifier being used is trained on (resnet50)
    COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
    ]

    # Loads pre-trained model and sets it to evaluation mode
    def __init__(self):
        self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        self.model.eval()

    def get_prediction(self, image, threshold):
        # Transforms the loaded image (with normalization) into a float tensor of shape (C x H x W)
        transform = T.Compose([T.ToTensor()])
        imgTensor = transform(image)

        # Perform the prediction on the transformed image (tensor)
        pred = self.model([imgTensor])

        # Adding each predicted class to the pred_class array
        pred_class = [self.COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())] # If using CPU, you would have to add .cpu()

        # Adding each predicted bounding box to the pred_boxes array
        pred_boxes = [[(int(i[0]), int(i[1])), (int(i[2]), int(i[3]))] for i in list(pred[0]['boxes'].detach().numpy())]

        # Adding each prediction score to the pred_score array
        pred_score = list(pred[0]['scores'].detach().numpy())
    
        # If there is no score that is above the threshold, set box and class arrays to empty
        if(max(pred_score)  threshold):
            print(No score above threshold)
            pred_box = [}
            pred_class = []

        # Else filter through pred_scores and get the last index where the score  threshold
        # Assign the box and class arrays with index values until pred_t index
        else:
            pred_t = [pred_score.index(x) for x in pred_score if x  threshold][-1]
            pred_box = pred_boxes[:pred_t+1]
            pred_class = pred_class[:pred_t+1]

        # Returns a tuple containing the final arrays storing the inference results    
        return (pred_box, pred_class)

Topic faster-rcnn object-detection pytorch computer-vision machine-learning

Category Data Science

Custom Class Using PyTorch Faster-RCNN Model not working

About