Opencv Adaptive Threshold Ocr

OpenCV Adaptive Threshold OCR

Here is my result:
Sample Image

Here is the code:

#include 
#include
#include
#include
#include "opencv2/opencv.hpp"
#include "fstream"
#include "iostream"
using namespace std;
using namespace cv;

//-----------------------------------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------------------------------
void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=21) // blockSide - the parameter (set greater for larger font on image)
{
Mat I;
Img.convertTo(I,CV_32FC1);
Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
Mat inpaintmask;
Mat patch;
Mat smallImg;
Scalar m,s;

for(int i=0;i {
for (int j=0;j {
patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1));
cv::meanStdDev(patch,m,s);
if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
{
Res.at(i/blockSide,j/blockSide)=m[0];
}else
{
Res.at(i/blockSide,j/blockSide)=0;
}
}
}

cv::resize(I,smallImg,Res.size());

cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);

Mat inpainted;
smallImg.convertTo(smallImg,CV_8UC1,255);

inpaintmask.convertTo(inpaintmask,CV_8UC1);
inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);

cv::resize(inpainted,Res,Img.size());
Res.convertTo(Res,CV_32FC1,1.0/255.0);

}
//-----------------------------------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------------------------------
int main( int argc, char** argv )
{
namedWindow("Img");
namedWindow("Edges");
//Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0);
Mat Img=imread("Test2.JPG",0);
Mat res;
Img.convertTo(Img,CV_32FC1,1.0/255.0);
CalcBlockMeanVariance(Img,res);
res=1.0-res;
res=Img+res;
imshow("Img",Img);
cv::threshold(res,res,0.85,1,cv::THRESH_BINARY);
cv::resize(res,res,cv::Size(res.cols/2,res.rows/2));
imwrite("result.jpg",res*255);
imshow("Edges",res);
waitKey(0);

return 0;
}

And Python port:

import cv2 as cv
import numpy as np

#-----------------------------------------------------------------------------------------------------
#
#-----------------------------------------------------------------------------------------------------
def CalcBlockMeanVariance(Img,blockSide=21): # blockSide - the parameter (set greater for larger font on image)
I=np.float32(Img)/255.0
Res=np.zeros( shape=(int(Img.shape[0]/blockSide),int(Img.shape[1]/blockSide)),dtype=np.float)

for i in range(0,Img.shape[0]-blockSide,blockSide):
for j in range(0,Img.shape[1]-blockSide,blockSide):
patch=I[i:i+blockSide+1,j:j+blockSide+1]
m,s=cv.meanStdDev(patch)
if(s[0]>0.001): # Thresholding parameter (set smaller for lower contrast image)
Res[int(i/blockSide),int(j/blockSide)]=m[0]
else:
Res[int(i/blockSide),int(j/blockSide)]=0

smallImg=cv.resize(I,(Res.shape[1],Res.shape[0] ) )
_,inpaintmask=cv.threshold(Res,0.02,1.0,cv.THRESH_BINARY);
smallImg=np.uint8(smallImg*255)

inpaintmask=np.uint8(inpaintmask)
inpainted=cv.inpaint(smallImg, inpaintmask, 5, cv.INPAINT_TELEA)
Res=cv.resize(inpainted,(Img.shape[1],Img.shape[0] ) )
Res=np.float32(Res)/255
return Res

#-----------------------------------------------------------------------------------------------------
#
#-----------------------------------------------------------------------------------------------------

cv.namedWindow("Img")
cv.namedWindow("Edges")
Img=cv.imread("F:\\ImagesForTest\\BookPage.JPG",0)
res=CalcBlockMeanVariance(Img)
res=1.0-res
Img=np.float32(Img)/255
res=Img+res
cv.imshow("Img",Img);
_,res=cv.threshold(res,0.85,1,cv.THRESH_BINARY);
res=cv.resize(res,( int(res.shape[1]/2),int(res.shape[0]/2) ))
cv.imwrite("result.jpg",res*255);
cv.imshow("Edges",res)
cv.waitKey(0)

OpenCV binary adaptive threshold OCR

I think you can do your thresholding using Otsu method. You can apply it on your whole image or on the blocks of the image. I did the following steps:

  • thresholding using Otsu method on desired input.
  • Closing the result.

Python Code

image = cv2.imread('image4.png', cv2.IMREAD_GRAYSCALE) # reading image
if image is None:
print 'Can not find the image!'
exit(-1)
# thresholding image using ostu method
ret, thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
# applying closing operation using ellipse kernel
N = 3
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (N, N))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
# showing the result
cv2.imshow('thresh', thresh)
cv2.waitKey(0)
cv2.destroyAllWindows()

Explanation

In the first part I read the input image using imread and checked that the image opened correctly!.

image = cv2.imread('image4.png', cv2.IMREAD_GRAYSCALE) # reading image
if image is None:
print 'Can not find the image!'
exit(-1)

Now thresholding the image with otsu method by feeding the thresh method with THRESH_BINARY_INV | THRESH_OTSU as its argument. The otsu method works base on an optimization problem finding the best value for thresholding. So I provided the range of possible value for the threshold value by giving it a lower bound by 0 and an upper bound by 255.

ret, thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)

And a closing operation is done for removing black holes in the image using an Ellipse kernel.

kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (N, N))
thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

Result

Figure 1

Proper image thresholding to prepare it for OCR in python using opencv

You can try to medianBlur the gray(blur) image with different kernels(such as 3, 51), divide the blured results, and threshold it. Something like this:

Sample Image


#!/usr/bin/python3
# 2018/09/23 17:29 (CST)
# (中秋节快乐)
# (Happy Mid-Autumn Festival)

import cv2
import numpy as np

fname = "color.png"
bgray = cv2.imread(fname)[...,0]

blured1 = cv2.medianBlur(bgray,3)
blured2 = cv2.medianBlur(bgray,51)
divided = np.ma.divide(blured1, blured2).data
normed = np.uint8(255*divided/divided.max())
th, threshed = cv2.threshold(normed, 100, 255, cv2.THRESH_OTSU)

dst = np.vstack((bgray, blured1, blured2, normed, threshed))
cv2.imwrite("dst.png", dst)

The result:

Sample Image

OpenCV output of Adaptive Threshold

The main issue is that the result of adaptiveThreshold has gaps in the external edge, so you can't use it as input to findContours.

I think that using GaussianBlur makes things worst, because it blurs the edge between the hand and the background.

You may use the following stages:

  • Convert frame to Grayscale.
  • Apply adaptiveThreshold with large kernel size (I used size 51).

    Using a large kernel size, keeps a thick edge line without gaps (except from a small gap at the fingernail).
  • Find contours.

    Find the contour with the maximum area.
  • Draw the contour (fill with solid value of 255) on a zeros image.

    There is a problem: the inner part of the hand is not filled due to the weird shape of the contour.
  • For complete the filling:

    Find the center of the contour, and fill it using floodFill.

Here is a Python code sample:

import numpy as np
import cv2

frame = cv2.imread("hand.jpg") # Read image from file (for testing).

gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # Use BGR to Gray conversion (not RGBA, because image is read from file)

# Apply adaptiveThreshold with large filter size.
thres_gray = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 51, 2)

# Find contours (external contours)
cnts, hier = cv2.findContours(thres_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

# Find contour with the maximum area
c = max(cnts, key=cv2.contourArea)

res = np.zeros_like(gray) # Create new zeros images for storing the result.

# Fill the contour with white color - draw the filled contour on res image.
cv2.drawContours(res, [c], -1, 255, -1)

# Compute the center of the contour
# https://www.pyimagesearch.com/2016/02/01/opencv-center-of-contour/
M = cv2.moments(c)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])

# Use floodFill for filling the center of the contour
cv2.floodFill(res, None, (cX, cY), 255)

# Show images for testing
cv2.imshow('thres_gray', thres_gray)
cv2.imshow('res', res)
cv2.waitKey()
cv2.destroyAllWindows()

Results:

thres_gray:

Sample Image

res before floodFill:

Sample Image

res after floodFill:

Sample Image


JAVA implementation:

package myproject;

import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.CvType;
import org.opencv.core.Scalar;
import org.opencv.core.Point;
import org.opencv.core.MatOfPoint;
import org.opencv.imgproc.Imgproc;
import org.opencv.imgproc.Moments;
import org.opencv.imgcodecs.Imgcodecs;
import java.util.List;
import java.util.ArrayList;

class Sample {


static { System.loadLibrary(Core.NATIVE_LIBRARY_NAME); }



public static void main(String[] args) {
Mat frame = Imgcodecs.imread("hand.jpg");

Mat gray = new Mat();
Mat thres_gray = new Mat();

Imgproc.cvtColor(frame, gray, Imgproc.COLOR_BGR2GRAY);

//Apply adaptiveThreshold with large filter size.
Imgproc.adaptiveThreshold(gray, thres_gray, 255, Imgproc.ADAPTIVE_THRESH_GAUSSIAN_C, Imgproc.THRESH_BINARY_INV, 51, 2);

List contours = new ArrayList<>();
Mat hierarchy = new Mat();

//Find contours
Imgproc.findContours(thres_gray, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_NONE);

//Find contour with the maximum area
//https://stackoverflow.com/questions/38759925/how-to-find-largest-contour-in-java-opencv
double maxVal = 0;
int maxValIdx = 0;
for (int contourIdx = 0; contourIdx < contours.size(); contourIdx++)
{
double contourArea = Imgproc.contourArea(contours.get(contourIdx));
if (maxVal < contourArea)
{
maxVal = contourArea;
maxValIdx = contourIdx;
}
}

Mat res = Mat.zeros(gray.size(), CvType.CV_8UC1); //Create new zeros images for storing the result.

Imgproc.drawContours(res, contours, maxValIdx, new Scalar(255), -1);

//Compute the center of the contour
//https://www.pyimagesearch.com/2016/02/01/opencv-center-of-contour/
Moments M = Imgproc.moments(contours.get(maxValIdx));
int cX = (int)(M.get_m10() / M.get_m00());
int cY = (int)(M.get_m01() / M.get_m00());

//Use floodFill for filling the center of the contour.
Mat mask = Mat.zeros(res.rows() + 2, res.cols() + 2, CvType.CV_8UC1);
Imgproc.floodFill(res, mask, new Point(cX, cY), new Scalar(255));

Imgcodecs.imwrite("res.png", res);
}

}

Way to apply smart thresholding in images

One approach is division normalization in Python/OpenCV.

Input:

Sample Image

import cv2
import numpy as np

# load image
img = cv2.imread("hebrew_text.jpg")

# convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# blur
blur = cv2.GaussianBlur(gray, (99,99), 0)

# divide
divide = cv2.divide(gray, blur, scale=255)

# write result to disk
cv2.imwrite("hebrew_text_division.png", divide)

# display it
#cv2.imshow("thresh", thresh)
cv2.imshow("gray", gray)
cv2.imshow("divide", divide)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result:

Sample Image

After doing this, you may want to threshold and then clean it up by getting contours and discarding any contour with an area less than the size of your smallest accent mark.

I would also suggest, if possible, saving your images as PNG rather than JPG. JPG has lossy compression and introduces changes in color. This may be the source of some of the issues you have with extraneous marks in the background.

OpenCV - How to apply adaptive threshold to an image on iOS

You can use OpenCV framework and implement below code

 +(UIImage *)blackandWhite:(UIImage *)processedImage
{
cv::Mat original = [MMOpenCVHelper cvMatGrayFromAdjustedUIImage:processedImage];

cv::Mat new_image = cv::Mat::zeros( original.size(), original.type() );

original.convertTo(new_image, -1, 1.4, -50);
original.release();

UIImage *blackWhiteImage=[MMOpenCVHelper UIImageFromCVMat:new_image];
new_image.release();

return blackWhiteImage;
}

+ (cv::Mat)cvMatGrayFromAdjustedUIImage:(UIImage *)image
{
cv::Mat cvMat = [self cvMatFromAdjustedUIImage:image];
cv::Mat grayMat;
if ( cvMat.channels() == 1 ) {
grayMat = cvMat;
}
else {
grayMat = cv :: Mat( cvMat.rows,cvMat.cols, CV_8UC1 );
cv::cvtColor( cvMat, grayMat, cv::COLOR_BGR2GRAY );
}
return grayMat;
}

+ (cv::Mat)cvMatFromAdjustedUIImage:(UIImage *)image
{
CGColorSpaceRef colorSpace = CGImageGetColorSpace(image.CGImage);
CGFloat cols = image.size.width;
CGFloat rows = image.size.height;

cv::Mat cvMat(rows, cols, CV_8UC4); // 8 bits per component, 4 channels

CGContextRef contextRef = CGBitmapContextCreate(cvMat.data, // Pointer to backing data
cols, // Width of bitmap
rows, // Height of bitmap
8, // Bits per component
cvMat.step[0], // Bytes per row
colorSpace, // Colorspace
kCGImageAlphaNoneSkipLast |
kCGBitmapByteOrderDefault);

CGContextDrawImage(contextRef, CGRectMake(0, 0, cols, rows), image.CGImage);
CGContextRelease(contextRef);
return cvMat;
}


+ (UIImage *)UIImageFromCVMat:(cv::Mat)cvMat
{
NSData *data = [NSData dataWithBytes:cvMat.data length:cvMat.elemSize() * cvMat.total()];

CGColorSpaceRef colorSpace;

if (cvMat.elemSize() == 1) {
colorSpace = CGColorSpaceCreateDeviceGray();
} else {
colorSpace = CGColorSpaceCreateDeviceRGB();
}

CGDataProviderRef provider = CGDataProviderCreateWithCFData((__bridge CFDataRef)data);

CGImageRef imageRef = CGImageCreate(cvMat.cols, // Width
cvMat.rows, // Height
8, // Bits per component
8 * cvMat.elemSize(), // Bits per pixel
cvMat.step[0], // Bytes per row
colorSpace, // Colorspace
kCGImageAlphaNone | kCGBitmapByteOrderDefault, // Bitmap info flags
provider, // CGDataProviderRef
NULL, // Decode
false, // Should interpolate
kCGRenderingIntentDefault); // Intent

UIImage *image = [[UIImage alloc] initWithCGImage:imageRef];
CGImageRelease(imageRef);
CGDataProviderRelease(provider);
CGColorSpaceRelease(colorSpace);

return image;
}

its working for me check the output for your document Sample Image



Related Topics



Leave a reply



Submit