Installing Dependencies for the Detectron.

Creating Conda Environment

  • conda create -n detecron python=3.8 -y
  • conda activate detecron

Installing Pytorch

  • conda install pytorch torchvision torchaudio cudatoolkit=11.0 -c pytorch

Cloning Detectron Repository From Following Command In Your Current Working Directory

  • git clone https://github.com/facebookresearch/detectron2.git

Move To The Cloned Repository In Your Current Working Directory And Install All the Setup By Running Following Command.

  • cd detectron2
  • pip install -e .

After following all above mentioned step you are ready to go.

Performing Object Detection Using Detectron.

Importing Necessary Modules

from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.data import MetadataCatalog
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer
import matplotlib.pyplot as plt
import cv2 
import numpy as np

Loading Pre-Trained model weights and config file.

  • In this cell we are are doing four tasks, they are:

    • Object Detection :

      • Here multiple objects are detected in the image, just by loading pretrained model which is trained on COCO dataset and it is COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml.
      • In figure below model detects bike and a person.

        bike-detection

    • Keypoints Detection :

      • In this step we detect the object and simulteneously detect their keypoints. Here this pre trained model only detects keypoints of person.
      • Pretrained model is COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml which will be downloadded during execution.

        bikekeypoint

    • Instance Segmentation :

      • Instance segmentation is the task of detecting and delineating each distinct object of interest appearing in an image.
      • This is done by downloading pretrained model that is COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml

        bikesegment

    • Panoptic Segmentation :

      • Panoptic segmentation is an image segmentation task that combines the prediction from both instance and semantic segmentation into a general unified output.
      • This is done by downloading pretrained model that is COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml

        bikepanoptic

class Detector:
    def __init__(self, model_type="OD"):
        self.cfg = get_cfg()
        self.model_type = model_type
        
        #Load model config and pretrained model weights
        if model_type == "OD": # IT is for object detection
            self.cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
            self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")
        elif model_type == "IS": # It is for image segmentation.
            self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
            self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
        elif model_type == "KP": # It is for Key point detection.
            self.cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml"))
            self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml")
       
        elif model_type == "PS": #It is for Panoptic Segmentation.
            self.cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
            self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")
         
        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
        self.cfg.MODEL.DEVICE = "cuda" # You can specify cuda if there is otherwise specify CPU.
        
        self.predictor = DefaultPredictor(self.cfg)
        
    def image(self, imagePath):
        """This method is used to perform a specific task on an image, by mentioning model_type."""
        image = cv2.imread(imagePath)
        if self.model_type != "PS":
            predictions = self.predictor(image)
            viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
                            instance_mode = ColorMode.IMAGE_BW)
            output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
        else:
          predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
          viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
          output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
                            
        gray = cv2.imshow("Result", output.get_image()[:, :, ::-1])
        img = cv2.imread(gray)
        cv2.waitKey(0)
        
    def video(self, videoPath):
        """This method is used to perform a specific task on a video, by mentioning model_type."""
        vid = cv2.VideoCapture(videoPath)
        
        if (vid.isOpened() == False):
            print("Error opening video stream or file")
            return
        (success, image) = vid.read()
        while success:
            if self.model_type != "PS":
                predictions = self.predictor(image)
                viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
                            instance_mode = ColorMode.IMAGE_BW)
                output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
            
            else:
                predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
                viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
                output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
            cv2.imshow('Frame', output.get_image()[:, :, ::-1])
            key=cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            (success, image) = vid.read()
                
        

What image Methods Do in Detector Class?

  • This methods simply takes image input and return the image by doing a task that we have assigned in the previous steps, they are:
    • OD : Object Detection
    • KD : Keypoints Detection
    • IS : Instance Segmentation
    • PS : Panoptic Segmentation
"""
    def image(self, imagePath):
        image = cv2.imread(imagePath)
        if self.model_type != "PS":
            predictions = self.predictor(image)
            viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
                            instance_mode = ColorMode.IMAGE_BW)
            output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
        else:
          predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
          viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
          output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
                            
        gray = cv2.imshow("Result", output.get_image()[:, :, ::-1])
        img = cv2.imread(gray)
        cv2.waitKey(0)
"""

What video Method Do in Detector Class?

  • This method takes video input and return the video by doing a task that we have assigned in the previous steps, they are:
    • OD : Object Detection
    • KD : Keypoints Detection
    • IS : Instance Segmentation
    • PS : Panoptic Segmentation
"""
def video(self, videoPath):
        vid = cv2.VideoCapture(videoPath)
        
        if (vid.isOpened() == False):
            print("Error opening video stream or file")
            return
        (success, image) = vid.read()
        while success:
            if self.model_type != "PS":
                predictions = self.predictor(image)
                viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
                            instance_mode = ColorMode.IMAGE_BW)
                output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
            
            else:
                predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
                viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
                output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
            cv2.imshow('Frame', output.get_image()[:, :, ::-1])
            key=cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                break
            (success, image) = vid.read()
"""

Now performing a task for image with initializing a object detector of class Detector

  • It will perform a specific task but we have to mention what task our method will perform.
detector = Detector(model_type="PS")
detector.image("./images/bike.png")

This code is for cleaning a cache of our dedicated gpu memory so that cuda runout of memory does not occur.

import torch
torch.cuda.empty_cache()

Here we will perform a specific task on video.

detector = Detector(model_type="OD")
videoPath = "D://detectron_project//images//vidpred.mp4"
detector.video(videoPath)