Converting Cv::Mat for Tesseract

converting cv::Mat for tesseract

tesseract::TessBaseAPI tess; 
cv::Mat sub = image(cv::Rect(50, 200, 300, 100));
tess.SetImage((uchar*)sub.data, sub.size().width, sub.size().height, sub.channels(), sub.step1());
tess.Recognize(0);
const char* out = tess.GetUTF8Text();

Different Tesseract result for Mat and Pix

OpenCV imread function by default reads image as colored, which means you get pixels as BGRBGRBGR....

In your example you are assuming opencv image is grayscale, so there are 2 ways of fixing that:

Change your SetImage line according to number of channels in opencv image
ocr.SetImage((uchar*)image.data, image.size().width, simageb.size().height, image.channels(), image.step1());
Convert your opencv image to grayscale with 1 channel
cv::cvtColor(image, image, CV_BGR2GRAY);

Convert between OpenCV Mat and Leptonica Pix

I know that i'm about 4 years late and nobody cares, but this task has frustrated me a lot 'cause leptonica's documentation is just awful.

The problem with @loot's answer is that it does not work with colored images, so here is my (maybe crude) modification of his pix8ToMat:

cv::Mat pixToMat(Pix *pix) {
    int width = pixGetWidth(pix);
    int height = pixGetHeight(pix);
    int depth = pixGetDepth(pix);

    cv::Mat mat(cv::Size(width, height), depth == 1 ? CV_8UC1 : CV_8UC3);

    for (uint32_t y = 0; y < height; ++y) {
        for (uint32_t x = 0; x < width; ++x) {
            if (depth == 1) {
                l_uint32 val;
                pixGetPixel(pix, x, y, &val);
                mat.at<uchar>(cv::Point(x, y)) = static_cast<uchar>(255 * val);
            } else {
                l_int32 r, g, b;
                pixGetRGBPixel(pix, x, y, &r, &g, &b);

                cv::Vec3b color(b, g, r);
                mat.at<cv::Vec3b>(cv::Point(x, y)) = color;
            }
        }
    }

    return mat;
}

OpenCV / Tesseract: How to replace libpng, libtiff etc with GDI+ Bitmap (Load into cv::Mat via GDI+)

I could not find a ready class so I wrote my own:

I hope it will be useful for someone and I hope it will be included into the OpenCV project as an optional Add-On for Windows users.

Advantages:

Getting rid of several libraries that are already implemented into Windows,
Unicode support,
The Bitmap can be passed directly to C# applications.

When you study the code you will see that there are several pitfalls and that the conversion between cv::Mat and Gdiplus::Bitmap is not as trivial as it might seem.

NOTE: This code supports black/white (2 bit), gray scale palette (8 bit), 24 bit RGB and 32 bit ARGB images. Color palette images are not supported. But this does not matter as OpenCV doesn't support them neither and .NET also has a very limited support for them.

HEADER FILE:

#pragma once

#include <gdiplus.h>
#pragma comment(lib, "gdiplus.lib")

// IMPORTANT:
// This must be included AFTER gdiplus !!
// (OpenCV #undefine's min(), max())
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"

using namespace cv;

class CGdiPlus
{
public:
    static void  Init();
    static Mat  ImgRead(const WCHAR* u16_File);
    static void ImgWrite(Mat i_Mat, const WCHAR* u16_File);
    static Mat  CopyBmpToMat(Gdiplus::Bitmap* pi_Bmp);
    static Mat  CopyBmpDataToMat(Gdiplus::BitmapData* pi_Data);
    static Gdiplus::Bitmap* CopyMatToBmp(Mat& i_Mat);

private:
    static CLSID GetEncoderClsid(const WCHAR* u16_File);

    static BOOL mb_InitDone;
};

CPP FILE:

#include "stdafx.h"
#include "CGdiPlus.h"

using namespace Gdiplus;

BOOL CGdiPlus::mb_InitDone = FALSE;

// Do not call this function in the DLL loader lock!
void CGdiPlus::Init()
{
    if (mb_InitDone)
        return;

    GdiplusStartupInput k_Input;
    ULONG_PTR u32_Token;
    if (Ok != GdiplusStartup(&u32_Token, &k_Input, NULL))
        throw L"Error initializing GDI+";

    mb_InitDone = TRUE;
}

Mat CGdiPlus::CopyBmpToMat(Bitmap* pi_Bmp)
{
    assert(mb_InitDone);

    BitmapData i_Data;
    Gdiplus::Rect k_Rect(0, 0, pi_Bmp->GetWidth(), pi_Bmp->GetHeight());
    if (Ok != pi_Bmp->LockBits(&k_Rect, ImageLockModeRead, pi_Bmp->GetPixelFormat(), &i_Data))
        throw L"Error locking Bitmap.";

    Mat i_Mat = CopyBmpDataToMat(&i_Data);

    pi_Bmp->UnlockBits(&i_Data);
    return i_Mat;
}

Mat CGdiPlus::CopyBmpDataToMat(BitmapData* pi_Data)
{
    assert(mb_InitDone);

    int s32_CvType;
    switch (pi_Data->PixelFormat)
    {
        case PixelFormat1bppIndexed:
        case PixelFormat8bppIndexed:
            // Special case treated separately below
            break;

        case PixelFormat24bppRGB:  // 24 bit
            s32_CvType = CV_8UC3; 
            break;

        case PixelFormat32bppRGB:  // 32 bit
        case PixelFormat32bppARGB: // 32 bit + Alpha channel    
            s32_CvType = CV_8UC4; 
            break; 

        default: 
            throw L"Image format not supported.";
    }

    Mat i_Mat;
    if (pi_Data->PixelFormat == PixelFormat1bppIndexed) // 1 bit (special case)
    {
        i_Mat = Mat(pi_Data->Height, pi_Data->Width, CV_8UC1);

        for (UINT Y=0; Y<pi_Data->Height; Y++)
        {
            BYTE* pu8_Src = (BYTE*)pi_Data->Scan0 + Y * pi_Data->Stride;
            BYTE* pu8_Dst = i_Mat.ptr<BYTE>(Y);

            BYTE u8_Mask = 0x80;
            for (UINT X=0; X<pi_Data->Width; X++)
            {
                pu8_Dst[0] = (pu8_Src[0] & u8_Mask) ? 255 : 0;
                pu8_Dst++;

                u8_Mask >>= 1;
                if (u8_Mask == 0)
                {
                    pu8_Src++;
                    u8_Mask = 0x80;
                }
            }
        }
    }
    else if (pi_Data->PixelFormat == PixelFormat8bppIndexed) // 8 bit gray scale palette (special case)
    {
        i_Mat = Mat(pi_Data->Height, pi_Data->Width, CV_8UC1);

        BYTE* u8_Src = (BYTE*)pi_Data->Scan0;
        BYTE* u8_Dst = i_Mat.data;

        for (UINT R=0; R<pi_Data->Height; R++)
        {
            memcpy(u8_Dst, u8_Src, pi_Data->Width);
            u8_Src += pi_Data->Stride;
            u8_Dst += i_Mat.step;
        }
    }
    else // 24 Bit / 32 Bit
    {
        // Create a Mat pointing to external memory
        Mat i_Ext(pi_Data->Height, pi_Data->Width, s32_CvType, pi_Data->Scan0, pi_Data->Stride);

        // Create a Mat with own memory
        i_Ext.copyTo(i_Mat);
    }
    return i_Mat;
}

Bitmap* CGdiPlus::CopyMatToBmp(Mat& i_Mat)
{
    assert(mb_InitDone);

    PixelFormat e_Format;
    switch (i_Mat.channels())
    {
        case 1: e_Format = PixelFormat8bppIndexed; break;
        case 3: e_Format = PixelFormat24bppRGB;    break;
        case 4: e_Format = PixelFormat32bppARGB;   break;
        default: throw L"Image format not supported.";
    }

    // Create Bitmap with own memory
    Bitmap* pi_Bmp = new Bitmap(i_Mat.cols, i_Mat.rows, e_Format);

    BitmapData i_Data;
    Gdiplus::Rect k_Rect(0, 0, i_Mat.cols, i_Mat.rows);
    if (Ok != pi_Bmp->LockBits(&k_Rect, ImageLockModeWrite, e_Format, &i_Data))
    {
        delete pi_Bmp;
        throw L"Error locking Bitmap.";
    }

    if (i_Mat.elemSize1() == 1) // 1 Byte per channel (8 bit gray scale palette)
    {
        BYTE* u8_Src = i_Mat.data;
        BYTE* u8_Dst = (BYTE*)i_Data.Scan0;

        int s32_RowLen = i_Mat.cols * i_Mat.channels(); // != i_Mat.step !!

        // The Windows Bitmap format requires all rows to be DWORD aligned (always!)
        // while OpenCV by default stores bitmap data sequentially.
        for (int R=0; R<i_Mat.rows; R++)
        {
            memcpy(u8_Dst, u8_Src, s32_RowLen);
            u8_Src += i_Mat.step;    // step may be e.g 3729
            u8_Dst += i_Data.Stride; // while Stride is 3732
        }
    }
    else // i_Mat may contain e.g. float data (CV_32F -> 4 Bytes per pixel grayscale)
    {
        int s32_Type;
        switch (i_Mat.channels())
        {
            case 1: s32_Type = CV_8UC1; break;
            case 3: s32_Type = CV_8UC3; break;
            default: throw L"Image format not supported.";
        }

        CvMat i_Dst;
        cvInitMatHeader(&i_Dst, i_Mat.rows, i_Mat.cols, s32_Type, i_Data.Scan0, i_Data.Stride);

        CvMat i_Img = i_Mat;
        cvConvertImage(&i_Img, &i_Dst, 0);
    }

    pi_Bmp->UnlockBits(&i_Data);

    // Add the grayscale palette if required.
    if (e_Format == PixelFormat8bppIndexed)
    {
        CByteArray i_Arr;
        i_Arr.SetSize(sizeof(ColorPalette) + 256 * sizeof(ARGB));
        ColorPalette* pk_Palette = (ColorPalette*)i_Arr.GetData();

        pk_Palette->Count = 256;
        pk_Palette->Flags = PaletteFlagsGrayScale;

        ARGB* pk_Color = &pk_Palette->Entries[0];
        for (int i=0; i<256; i++)
        {
            pk_Color[i] = Color::MakeARGB(255, i, i, i);
        }

        if (Ok != pi_Bmp->SetPalette(pk_Palette))
        {
            delete pi_Bmp;
            throw L"Error setting grayscale palette.";
        }
    }
    return pi_Bmp;
}

Mat CGdiPlus::ImgRead(const WCHAR* u16_File)
{
    assert(mb_InitDone);

    Bitmap i_Bmp(u16_File);
    if (!i_Bmp.GetWidth() || !i_Bmp.GetHeight())
        throw L"Error loading image from file.";

    return CopyBmpToMat(&i_Bmp);
}

void CGdiPlus::ImgWrite(Mat i_Mat, const WCHAR* u16_File)
{
    assert(mb_InitDone);

    CLSID k_Clsid = GetEncoderClsid(u16_File);

    Bitmap* pi_Bmp = CopyMatToBmp(i_Mat);

    Status e_Status = pi_Bmp->Save(u16_File, &k_Clsid);

    delete pi_Bmp;

    if (e_Status != Ok)
        throw L"Error saving image to file.";
}

// Get the class identifier of the image encoder for the given file extension.
// e.g. {557CF406-1A04-11D3-9A73-0000F81EF32E}  for PNG images
CLSID CGdiPlus::GetEncoderClsid(const WCHAR* u16_File)
{
    assert(mb_InitDone);

    UINT u32_Encoders, u32_Size;
    if (Ok != GetImageEncodersSize(&u32_Encoders, &u32_Size))
        throw L"Error obtaining image encoders size";

    CByteArray i_Arr;
    i_Arr.SetSize(u32_Size);
    ImageCodecInfo* pi_Info = (ImageCodecInfo*)i_Arr.GetData();

    if (Ok != GetImageEncoders(u32_Encoders, u32_Size, pi_Info))
        throw L"Error obtaining image encoders";

    CStringW s_Ext = u16_File;
    int Pos = s_Ext.ReverseFind('.');
    if (Pos < 0)
        throw L"Invalid image filename.";

    // s_Ext = "*.TIF;"
    s_Ext = L"*" + s_Ext.Mid(Pos) + L";";
    s_Ext.MakeUpper();

    // Search the file extension
    for (UINT i=0; i<u32_Encoders; i++)
    {
        CStringW s_Extensions = pi_Info->FilenameExtension;
        s_Extensions += ';';

        // s_Extensions = "*.TIFF;*.TIF;"
        if (s_Extensions.Find(s_Ext) >= 0)
            return pi_Info->Clsid;

        pi_Info ++;
    }

    throw L"No image encoder found for file extension " + s_Ext;
}

Converting Cv::Mat for Tesseract