Capturing Webpage as Image in C#, Ensuring JavaScript Rendered Elements Are Visible

Capturing webpage as image in c#, ensuring javascript rendered elements are visible

You have possibly tried IECapt. I think it is the right way to go. I created a modified version of it and use a timer instead of Thread.Sleep it captures your site as expected.

------EDIT------

Here is the ugly source. Just Add a reference to Microsoft HTML Object Library.

And this is the usage:

HtmlCapture capture = new HtmlCapture(@"c:\temp\myimg.png");
capture.HtmlImageCapture += new HtmlCapture.HtmlCaptureEvent(capture_HtmlImageCapture);
capture.Create("http://www.highcharts.com/demo/combo-dual-axes");

void capture_HtmlImageCapture(object sender, Uri url)
{
this.Close();
}

File1

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;

namespace MyIECapt
{
public class HtmlCapture
{
private WebBrowser web;
private Timer tready;
private Rectangle screen;
private Size? imgsize = null;

//an event that triggers when the html document is captured
public delegate void HtmlCaptureEvent(object sender, Uri url);

public event HtmlCaptureEvent HtmlImageCapture;

string fileName = "";

//class constructor
public HtmlCapture(string fileName)
{
this.fileName = fileName;

//initialise the webbrowser and the timer
web = new WebBrowser();
tready = new Timer();
tready.Interval = 2000;
screen = Screen.PrimaryScreen.Bounds;
//set the webbrowser width and hight
web.Width = 1024; //screen.Width;
web.Height = 768; // screen.Height;
//suppress script errors and hide scroll bars
web.ScriptErrorsSuppressed = true;
web.ScrollBarsEnabled = false;
//attached events
web.Navigating +=
new WebBrowserNavigatingEventHandler(web_Navigating);
web.DocumentCompleted += new
WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted);
tready.Tick += new EventHandler(tready_Tick);
}

public void Create(string url)
{
imgsize = null;
web.Navigate(url);
}

public void Create(string url, Size imgsz)
{
this.imgsize = imgsz;
web.Navigate(url);
}

void web_DocumentCompleted(object sender,
WebBrowserDocumentCompletedEventArgs e)
{
//start the timer
tready.Start();
}

void web_Navigating(object sender, WebBrowserNavigatingEventArgs e)
{
//stop the timer
tready.Stop();
}

void tready_Tick(object sender, EventArgs e)
{
try
{
//stop the timer
tready.Stop();

mshtml.IHTMLDocument2 docs2 = (mshtml.IHTMLDocument2)web.Document.DomDocument;
mshtml.IHTMLDocument3 docs3 = (mshtml.IHTMLDocument3)web.Document.DomDocument;
mshtml.IHTMLElement2 body2 = (mshtml.IHTMLElement2)docs2.body;
mshtml.IHTMLElement2 root2 = (mshtml.IHTMLElement2)docs3.documentElement;

// Determine dimensions for the image; we could add minWidth here
// to ensure that we get closer to the minimal width (the width
// computed might be a few pixels less than what we want).
int width = Math.Max(body2.scrollWidth, root2.scrollWidth);
int height = Math.Max(root2.scrollHeight, body2.scrollHeight);

//get the size of the document's body
Rectangle docRectangle = new Rectangle(0, 0, width, height);

web.Width = docRectangle.Width;
web.Height = docRectangle.Height;

//if the imgsize is null, the size of the image will
//be the same as the size of webbrowser object
//otherwise set the image size to imgsize
Rectangle imgRectangle;
if (imgsize == null) imgRectangle = docRectangle;
else imgRectangle = new Rectangle() { Location = new Point(0, 0), Size = imgsize.Value };

//create a bitmap object
Bitmap bitmap = new Bitmap(imgRectangle.Width, imgRectangle.Height);
//get the viewobject of the WebBrowser
IViewObject ivo = web.Document.DomDocument as IViewObject;

using (Graphics g = Graphics.FromImage(bitmap))
{
//get the handle to the device context and draw
IntPtr hdc = g.GetHdc();
ivo.Draw(1, -1, IntPtr.Zero, IntPtr.Zero,
IntPtr.Zero, hdc, ref imgRectangle,
ref docRectangle, IntPtr.Zero, 0);
g.ReleaseHdc(hdc);
}
//invoke the HtmlImageCapture event
bitmap.Save(fileName);
bitmap.Dispose();
}
catch
{
//System.Diagnostics.Process.GetCurrentProcess().Kill();
}
if(HtmlImageCapture!=null) HtmlImageCapture(this, web.Url);
}
}
}

and File2

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Drawing;
using System.Runtime.InteropServices;

namespace MyIECapt
{
[ComVisible(true), ComImport()]
[GuidAttribute("0000010d-0000-0000-C000-000000000046")]
[InterfaceTypeAttribute(ComInterfaceType.InterfaceIsIUnknown)]
public interface IViewObject
{
[return: MarshalAs(UnmanagedType.I4)]
[PreserveSig]
int Draw(
[MarshalAs(UnmanagedType.U4)] UInt32 dwDrawAspect,
int lindex,
IntPtr pvAspect,
[In] IntPtr ptd,
IntPtr hdcTargetDev,
IntPtr hdcDraw,
[MarshalAs(UnmanagedType.Struct)] ref Rectangle lprcBounds,
[MarshalAs(UnmanagedType.Struct)] ref Rectangle lprcWBounds,
IntPtr pfnContinue,
[MarshalAs(UnmanagedType.U4)] UInt32 dwContinue);
[PreserveSig]
int GetColorSet([In, MarshalAs(UnmanagedType.U4)] int dwDrawAspect,
int lindex, IntPtr pvAspect, [In] IntPtr ptd,
IntPtr hicTargetDev, [Out] IntPtr ppColorSet);
[PreserveSig]
int Freeze([In, MarshalAs(UnmanagedType.U4)] int dwDrawAspect,
int lindex, IntPtr pvAspect, [Out] IntPtr pdwFreeze);
[PreserveSig]
int Unfreeze([In, MarshalAs(UnmanagedType.U4)] int dwFreeze);
}
}

Taking a Screen shot of WebPage using Windows Application

You can use a third party library called Selenium to wait for specific elements in the DOM to load before taking your screenshot. This library is normally used for automated testing of webpage user interfaces. More details can be found here.

Selenium is capable of working with different browser types (you did not specify which browser you are working with in your question), so you will need to do some further research based on this. Searching for something along the lines of "Selenium element visibility" with your browser type will lead you to the required functions.

How to get full height of web page in WebKit Browser

Here is the solution:

To scroll down to the end of the page, following code can be used:

   int prevx;
WebKit.WebBrowser browser; //Assume created by drag-and-drop.
while(true) {
prevx = browser.ScrollOffset.X;
browser.ScrollOffset = new Point(prevx+1000,0);
if(prevx == browser.ScrollOffset.X)
break;
}

So now, browser.ScrollOffset.X will return final scroll offset. By following code, we can take total width of a web page:

   int totalHeight = browser.ScrollOffset.X + browser.Width;

Same process should be done for Height, in Y-axis.

How to fix a opacity bug with DrawToBitmap on WebBrowser Control?

I'm looking for a built-in solution to take a screenshot from posted
URL in the question without HIDDEN TEXT. In other words a solution to
respect opacity.

The following code does just that: respects CSS opacity. Amongst other things, it uses a Metafile object and OleDraw API to render the web page's image.

The test HTML:

<!DOCTYPE html>
<body style='background-color: grey'>
<div style='background-color: blue; opacity: 0.2; color: yellow'>This is a text</div>
</body>

The output:

opacity

The code (console app):

using Microsoft.Win32;
using System;
using System.ComponentModel;
using System.Drawing;
using System.Drawing.Imaging;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;

namespace Console_21697048
{
// http://stackoverflow.com/q/21697048/1768303

class Program
{
const string HTML = "<!DOCTYPE html><body style='background-color: grey'><div style='background-color: blue; opacity: 0.2; color: yellow'>This is a text</div></body>";
const string FILE_NAME = "webpage.png";
readonly static Size IMAGE_SIZE = new Size(320, 200);

// Main
static void Main(string[] args)
{
try
{
// enable HTML5 etc (assuming we're running IE9+)
SetFeatureBrowserFeature("FEATURE_BROWSER_EMULATION", 9000);
// force software rendering
SetFeatureBrowserFeature("FEATURE_IVIEWOBJECTDRAW_DMLT9_WITH_GDI", 1);
SetFeatureBrowserFeature("FEATURE_GPU_RENDERING", 0);

using (var apartment = new MessageLoopApartment())
{
// create WebBrowser on a seprate thread with its own message loop
var webBrowser = apartment.Invoke(() => new WebBrowser());

// navigate and wait for the result
apartment.Invoke(() =>
{
var pageLoadedTcs = new TaskCompletionSource<bool>();
webBrowser.DocumentCompleted += (s, e) =>
pageLoadedTcs.TrySetResult(true);

webBrowser.DocumentText = HTML;
return pageLoadedTcs.Task;
}).Wait();

// save the picture
apartment.Invoke(() =>
{
webBrowser.Size = IMAGE_SIZE;
var rectangle = new Rectangle(0, 0, webBrowser.Width, webBrowser.Height);

// get reference DC
using (var screenGraphics = webBrowser.CreateGraphics())
{
var screenHdc = screenGraphics.GetHdc();
// create a metafile
using (var metafile = new Metafile(screenHdc, rectangle, MetafileFrameUnit.Pixel))
{
using (var graphics = Graphics.FromImage(metafile))
{
var hdc = graphics.GetHdc();
var rect = new Rectangle(0, 0, 320, 50);
OleDraw(webBrowser.ActiveXInstance, DVASPECT_CONTENT, hdc, ref rectangle);
graphics.ReleaseHdc(hdc);
}
// save the metafile as bitmap
metafile.Save(FILE_NAME, ImageFormat.Png);
}
screenGraphics.ReleaseHdc(screenHdc);
}
});

// dispose of webBrowser
apartment.Invoke(() => webBrowser.Dispose());
webBrowser = null;
}
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
}

// interop
const uint DVASPECT_CONTENT = 1;

[DllImport("ole32.dll", PreserveSig = false)]
static extern void OleDraw(
[MarshalAs(UnmanagedType.IUnknown)] object pUnk,
uint dwAspect,
IntPtr hdcDraw,
[In] ref System.Drawing.Rectangle lprcBounds);

// WebBrowser Feature Control
// http://msdn.microsoft.com/en-us/library/ie/ee330733(v=vs.85).aspx
static void SetFeatureBrowserFeature(string feature, uint value)
{
if (LicenseManager.UsageMode != LicenseUsageMode.Runtime)
return;
var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName);
Registry.SetValue(@"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\" + feature,
appName, value, RegistryValueKind.DWord);
}
}

// MessageLoopApartment
// more info: http://stackoverflow.com/a/21808747/1768303

public class MessageLoopApartment : IDisposable
{
Thread _thread; // the STA thread

TaskScheduler _taskScheduler; // the STA thread's task scheduler

public TaskScheduler TaskScheduler { get { return _taskScheduler; } }

/// <summary>MessageLoopApartment constructor</summary>
public MessageLoopApartment()
{
var tcs = new TaskCompletionSource<TaskScheduler>();

// start an STA thread and gets a task scheduler
_thread = new Thread(startArg =>
{
EventHandler idleHandler = null;

idleHandler = (s, e) =>
{
// handle Application.Idle just once
Application.Idle -= idleHandler;
// return the task scheduler
tcs.SetResult(TaskScheduler.FromCurrentSynchronizationContext());
};

// handle Application.Idle just once
// to make sure we're inside the message loop
// and SynchronizationContext has been correctly installed
Application.Idle += idleHandler;
Application.Run();
});

_thread.SetApartmentState(ApartmentState.STA);
_thread.IsBackground = true;
_thread.Start();
_taskScheduler = tcs.Task.Result;
}

/// <summary>shutdown the STA thread</summary>
public void Dispose()
{
if (_taskScheduler != null)
{
var taskScheduler = _taskScheduler;
_taskScheduler = null;

// execute Application.ExitThread() on the STA thread
Task.Factory.StartNew(
() => Application.ExitThread(),
CancellationToken.None,
TaskCreationOptions.None,
taskScheduler).Wait();

_thread.Join();
_thread = null;
}
}

/// <summary>Task.Factory.StartNew wrappers</summary>
public void Invoke(Action action)
{
Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Wait();
}

public TResult Invoke<TResult>(Func<TResult> action)
{
return Task.Factory.StartNew(action,
CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Result;
}
}
}


Related Topics



Leave a reply



Submit