Instance Segmentation, Object Detection, Keypoints Detection and Panoptic Segmentation Using Detectron
All about Implementation of Detectron.
- Installing Dependencies for the Detectron.
- Performing Object Detection Using Detectron.
- Loading Pre-Trained model weights and config file.
Creating Conda Environment
conda create -n detecron python=3.8 -y
conda activate detecron
Installing Pytorch
conda install pytorch torchvision torchaudio cudatoolkit=11.0 -c pytorch
Cloning Detectron Repository From Following Command In Your Current Working Directory
git clone https://github.com/facebookresearch/detectron2.git
Move To The Cloned Repository In Your Current Working Directory And Install All the Setup By Running Following Command.
cd detectron2
pip install -e .
After following all above mentioned step you are ready to go.
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.data import MetadataCatalog
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer
import matplotlib.pyplot as plt
import cv2
import numpy as np
Loading Pre-Trained model weights and config file.
-
In this cell we are are doing four tasks, they are:
-
Object Detection :
- Here multiple objects are detected in the image, just by loading pretrained model which is trained on COCO dataset and it is COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml.
-
In figure below model detects bike and a person.
-
Keypoints Detection :
- In this step we detect the object and simulteneously detect their keypoints. Here this pre trained model only detects keypoints of person.
-
Pretrained model is COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml which will be downloadded during execution.
-
Instance Segmentation :
- Instance segmentation is the task of detecting and delineating each distinct object of interest appearing in an image.
-
This is done by downloading pretrained model that is COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
-
Panoptic Segmentation :
- Panoptic segmentation is an image segmentation task that combines the prediction from both instance and semantic segmentation into a general unified output.
-
This is done by downloading pretrained model that is COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
-
class Detector:
def __init__(self, model_type="OD"):
self.cfg = get_cfg()
self.model_type = model_type
#Load model config and pretrained model weights
if model_type == "OD": # IT is for object detection
self.cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml"))
self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")
elif model_type == "IS": # It is for image segmentation.
self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
elif model_type == "KP": # It is for Key point detection.
self.cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml"))
self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml")
elif model_type == "PS": #It is for Panoptic Segmentation.
self.cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")
self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
self.cfg.MODEL.DEVICE = "cuda" # You can specify cuda if there is otherwise specify CPU.
self.predictor = DefaultPredictor(self.cfg)
def image(self, imagePath):
"""This method is used to perform a specific task on an image, by mentioning model_type."""
image = cv2.imread(imagePath)
if self.model_type != "PS":
predictions = self.predictor(image)
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
instance_mode = ColorMode.IMAGE_BW)
output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
else:
predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
gray = cv2.imshow("Result", output.get_image()[:, :, ::-1])
img = cv2.imread(gray)
cv2.waitKey(0)
def video(self, videoPath):
"""This method is used to perform a specific task on a video, by mentioning model_type."""
vid = cv2.VideoCapture(videoPath)
if (vid.isOpened() == False):
print("Error opening video stream or file")
return
(success, image) = vid.read()
while success:
if self.model_type != "PS":
predictions = self.predictor(image)
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
instance_mode = ColorMode.IMAGE_BW)
output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
else:
predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
cv2.imshow('Frame', output.get_image()[:, :, ::-1])
key=cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
(success, image) = vid.read()
What image Methods Do in Detector Class?
- This methods simply takes image input and return the image by doing a task that we have assigned in the previous steps, they are:
- OD : Object Detection
- KD : Keypoints Detection
- IS : Instance Segmentation
- PS : Panoptic Segmentation
"""
def image(self, imagePath):
image = cv2.imread(imagePath)
if self.model_type != "PS":
predictions = self.predictor(image)
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
instance_mode = ColorMode.IMAGE_BW)
output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
else:
predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
gray = cv2.imshow("Result", output.get_image()[:, :, ::-1])
img = cv2.imread(gray)
cv2.waitKey(0)
"""
What video Method Do in Detector Class?
- This method takes video input and return the video by doing a task that we have assigned in the previous steps, they are:
- OD : Object Detection
- KD : Keypoints Detection
- IS : Instance Segmentation
- PS : Panoptic Segmentation
"""
def video(self, videoPath):
vid = cv2.VideoCapture(videoPath)
if (vid.isOpened() == False):
print("Error opening video stream or file")
return
(success, image) = vid.read()
while success:
if self.model_type != "PS":
predictions = self.predictor(image)
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]),
instance_mode = ColorMode.IMAGE_BW)
output = viz.draw_instance_predictions(predictions["instances"].to("cpu"))
else:
predictions, segmentInfo = self.predictor(image)["panoptic_seg"]
viz = Visualizer(image[:, :, ::-1], metadata = MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]))
output = viz.draw_panoptic_seg_predictions(predictions.to("cpu"), segmentInfo)
cv2.imshow('Frame', output.get_image()[:, :, ::-1])
key=cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
(success, image) = vid.read()
"""
Now performing a task for image with initializing a object detector of class Detector
- It will perform a specific task but we have to mention what task our method will perform.
detector = Detector(model_type="PS")
detector.image("./images/bike.png")
This code is for cleaning a cache of our dedicated gpu memory so that cuda runout of memory does not occur.
import torch
torch.cuda.empty_cache()
Here we will perform a specific task on video.
detector = Detector(model_type="OD")
videoPath = "D://detectron_project//images//vidpred.mp4"
detector.video(videoPath)