Extracting Text Opencv

Extracting text OpenCV

You can detect text by finding close edge elements (inspired from a LPD):

#include "opencv2/opencv.hpp"

std::vector<cv::Rect> detectLetters(cv::Mat img)
{
std::vector<cv::Rect> boundRect;
cv::Mat img_gray, img_sobel, img_threshold, element;
cvtColor(img, img_gray, CV_BGR2GRAY);
cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
std::vector< std::vector< cv::Point> > contours;
cv::findContours(img_threshold, contours, 0, 1);
std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
for( int i = 0; i < contours.size(); i++ )
if (contours[i].size()>100)
{
cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
if (appRect.width>appRect.height)
boundRect.push_back(appRect);
}
return boundRect;
}

Usage:

int main(int argc,char** argv)
{
//Read
cv::Mat img1=cv::imread("side_1.jpg");
cv::Mat img2=cv::imread("side_2.jpg");
//Detect
std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
//Display
for(int i=0; i< letterBBoxes1.size(); i++)
cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0);
cv::imwrite( "imgOut1.jpg", img1);
for(int i=0; i< letterBBoxes2.size(); i++)
cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0);
cv::imwrite( "imgOut2.jpg", img2);
return 0;
}

Results:

a. element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
imgOut1
imgOut2

b. element = getStructuringElement(cv::MORPH_RECT, cv::Size(30, 30) );
imgOut1
imgOut2

Results are similar for the other image mentioned.

how to extract text using opencv and pytesseract python?

To get xmin you can use xpath() with '//annotation/object/bndbox/xmin' or even shorter '//xmin'

It always gives list (even if there is only one element or there are no elements) so it will need [0] to get first element or for-loop to work with all elements.

Using if list_of_elelemts: ... you can run code only when list has some elements.

You can also use len() to check how many elements you get.

text = '''
<annotation>
<folder>Test Images</folder>
<filename>FreKa.jpg</filename>
<path>/home/sumit/Desktop/office_works/Fusion_Code/BIS_Final/Test Images/FreKa.jpg</path>
<source>
<database>Unknown</database>
</source>
<size>
<width>679</width>
<height>341</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>Contact Type</name>
<pose>Unspecified</pose>
<truncated>1</truncated>
<difficult>0</difficult>
<bndbox>
<xmin>1</xmin>
<ymin>100</ymin>
<xmax>678</xmax>
<ymax>157</ymax>
</bndbox>
</object>
</annotation>
'''

import lxml.etree

tree = lxml.etree.fromstring(text)

print('xmin:', tree.xpath("//annotation/object/bndbox/xmin")[0].text)
print('xmin:', tree.xpath("//bndbox/xmin")[0].text)
print('xmin:', tree.xpath("//object//xmin")[0].text)
print('xmin:', tree.xpath("//xmin")[0].text)

print('xmin:', tree.xpath("//xmin/text()")[0]) # with `text()` instead of `.text`

for item in tree.xpath("//xmin/text()"):
print('xmin:', item) # with `text()` instead of `.text`

objects = tree.xpath("//object")
print('len(objects):', len(objects))

other = tree.xpath("//bndbox/other")
if other:
print('found', len(other), 'elements')
else:
print('there is no "other" elements')

How to process and extract text from image

Preprocessing to clean the image before performing text extraction can help. Here's a simple approach

  • Convert image to grayscale and sharpen image
  • Adaptive threshold
  • Perform morpholgical operations to clean image
  • Invert image

First we convert to grayscale then sharpen the image using a sharpening kernel

Sample Image

Next we adaptive threshold to obtain a binary image

Sample Image

Now we perform morphological transformations to smooth the image

Sample Image

Finally we invert the image

Sample Image

import cv2
import numpy as np

image = cv2.imread('1.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
sharpen = cv2.filter2D(gray, -1, sharpen_kernel)
thresh = cv2.threshold(sharpen, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
close = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
result = 255 - close

cv2.imshow('sharpen', sharpen)
cv2.imshow('thresh', thresh)
cv2.imshow('close', close)
cv2.imshow('result', result)
cv2.waitKey()


Related Topics



Leave a reply



Submit