Extract the Current Dom and Print It as a String, with Styles Intact

Extract the current DOM and print it as a string, with styles intact

I think this could be a solution (it took me nearly a whole day!).

It returns a string representing the DOM of any element,
with all external styles included in the "style" attributes except default values,
and does not permanently modify that element.

For example: console.log(document.body.serializeWithStyles());

You can load this code in Web Inspector command line or from a script tag in the body element but NOT in the head element because it requires the existence of document.body.

I have tested it on desktop Safari 5 (I don't have the mobile version).

It works like this:

For each element in the DOM:

1) caching the value of style.cssText property, which represents the inline style, in an array;

2) calling getComputedStyle on the element;

3) checking if we have the css default values lookup table corresponding to this element's tag name;

4) building it if not;

5) iterating through the result, finding which values are non default using the lookup table;

6) applying those non default style values to the element.

Then storing the outerHTML as the result;

For each element, restoring the inline styles from the cache;

Returning the previously stored result.

The code:

Element.prototype.serializeWithStyles = (function () {  

// Mapping between tag names and css default values lookup tables. This allows to exclude default values in the result.
var defaultStylesByTagName = {};

// Styles inherited from style sheets will not be rendered for elements with these tag names
var noStyleTags = {"BASE":true,"HEAD":true,"HTML":true,"META":true,"NOFRAME":true,"NOSCRIPT":true,"PARAM":true,"SCRIPT":true,"STYLE":true,"TITLE":true};

// This list determines which css default values lookup tables are precomputed at load time
// Lookup tables for other tag names will be automatically built at runtime if needed
var tagNames = ["A","ABBR","ADDRESS","AREA","ARTICLE","ASIDE","AUDIO","B","BASE","BDI","BDO","BLOCKQUOTE","BODY","BR","BUTTON","CANVAS","CAPTION","CENTER","CITE","CODE","COL","COLGROUP","COMMAND","DATALIST","DD","DEL","DETAILS","DFN","DIV","DL","DT","EM","EMBED","FIELDSET","FIGCAPTION","FIGURE","FONT","FOOTER","FORM","H1","H2","H3","H4","H5","H6","HEAD","HEADER","HGROUP","HR","HTML","I","IFRAME","IMG","INPUT","INS","KBD","KEYGEN","LABEL","LEGEND","LI","LINK","MAP","MARK","MATH","MENU","META","METER","NAV","NOBR","NOSCRIPT","OBJECT","OL","OPTION","OPTGROUP","OUTPUT","P","PARAM","PRE","PROGRESS","Q","RP","RT","RUBY","S","SAMP","SCRIPT","SECTION","SELECT","SMALL","SOURCE","SPAN","STRONG","STYLE","SUB","SUMMARY","SUP","SVG","TABLE","TBODY","TD","TEXTAREA","TFOOT","TH","THEAD","TIME","TITLE","TR","TRACK","U","UL","VAR","VIDEO","WBR"];

// Precompute the lookup tables.
for (var i = 0; i < tagNames.length; i++) {
if(!noStyleTags[tagNames[i]]) {
defaultStylesByTagName[tagNames[i]] = computeDefaultStyleByTagName(tagNames[i]);
}
}

function computeDefaultStyleByTagName(tagName) {
var defaultStyle = {};
var element = document.body.appendChild(document.createElement(tagName));
var computedStyle = getComputedStyle(element);
for (var i = 0; i < computedStyle.length; i++) {
defaultStyle[computedStyle[i]] = computedStyle[computedStyle[i]];
}
document.body.removeChild(element);
return defaultStyle;
}

function getDefaultStyleByTagName(tagName) {
tagName = tagName.toUpperCase();
if (!defaultStylesByTagName[tagName]) {
defaultStylesByTagName[tagName] = computeDefaultStyleByTagName(tagName);
}
return defaultStylesByTagName[tagName];
}

return function serializeWithStyles() {
if (this.nodeType !== Node.ELEMENT_NODE) { throw new TypeError(); }
var cssTexts = [];
var elements = this.querySelectorAll("*");
for ( var i = 0; i < elements.length; i++ ) {
var e = elements[i];
if (!noStyleTags[e.tagName]) {
var computedStyle = getComputedStyle(e);
var defaultStyle = getDefaultStyleByTagName(e.tagName);
cssTexts[i] = e.style.cssText;
for (var ii = 0; ii < computedStyle.length; ii++) {
var cssPropName = computedStyle[ii];
if (computedStyle[cssPropName] !== defaultStyle[cssPropName]) {
e.style[cssPropName] = computedStyle[cssPropName];
}
}
}
}
var result = this.outerHTML;
for ( var i = 0; i < elements.length; i++ ) {
elements[i].style.cssText = cssTexts[i];
}
return result;
}
})();

Export CSS of DOM elements

Here is the code for an exportStyles() method that should return a CSS string including all inline and external styles for a given element, except default values (which was the main difficulty).

For example: console.log(someElement.exportStyles());

Since you are using Chrome, I did not bother making it compatible with IE.
Actually it just needs that the browsers supports the getComputedStyle(element) method.

Element.prototype.exportStyles = (function () {  

// Mapping between tag names and css default values lookup tables. This allows to exclude default values in the result.
var defaultStylesByTagName = {};

// Styles inherited from style sheets will not be rendered for elements with these tag names
var noStyleTags = {"BASE":true,"HEAD":true,"HTML":true,"META":true,"NOFRAME":true,"NOSCRIPT":true,"PARAM":true,"SCRIPT":true,"STYLE":true,"TITLE":true};

// This list determines which css default values lookup tables are precomputed at load time
// Lookup tables for other tag names will be automatically built at runtime if needed
var tagNames = ["A","ABBR","ADDRESS","AREA","ARTICLE","ASIDE","AUDIO","B","BASE","BDI","BDO","BLOCKQUOTE","BODY","BR","BUTTON","CANVAS","CAPTION","CENTER","CITE","CODE","COL","COLGROUP","COMMAND","DATALIST","DD","DEL","DETAILS","DFN","DIV","DL","DT","EM","EMBED","FIELDSET","FIGCAPTION","FIGURE","FONT","FOOTER","FORM","H1","H2","H3","H4","H5","H6","HEAD","HEADER","HGROUP","HR","HTML","I","IFRAME","IMG","INPUT","INS","KBD","KEYGEN","LABEL","LEGEND","LI","LINK","MAP","MARK","MATH","MENU","META","METER","NAV","NOBR","NOSCRIPT","OBJECT","OL","OPTION","OPTGROUP","OUTPUT","P","PARAM","PRE","PROGRESS","Q","RP","RT","RUBY","S","SAMP","SCRIPT","SECTION","SELECT","SMALL","SOURCE","SPAN","STRONG","STYLE","SUB","SUMMARY","SUP","SVG","TABLE","TBODY","TD","TEXTAREA","TFOOT","TH","THEAD","TIME","TITLE","TR","TRACK","U","UL","VAR","VIDEO","WBR"];

// Precompute the lookup tables.
for (var i = 0; i < tagNames.length; i++) {
if(!noStyleTags[tagNames[i]]) {
defaultStylesByTagName[tagNames[i]] = computeDefaultStyleByTagName(tagNames[i]);
}
}

function computeDefaultStyleByTagName(tagName) {
var defaultStyle = {};
var element = document.body.appendChild(document.createElement(tagName));
var computedStyle = getComputedStyle(element);
for (var i = 0; i < computedStyle.length; i++) {
defaultStyle[computedStyle[i]] = computedStyle[computedStyle[i]];
}
document.body.removeChild(element);
return defaultStyle;
}

function getDefaultStyleByTagName(tagName) {
tagName = tagName.toUpperCase();
if (!defaultStylesByTagName[tagName]) {
defaultStylesByTagName[tagName] = computeDefaultStyleByTagName(tagName);
}
return defaultStylesByTagName[tagName];
}

return function exportStyles() {
if (this.nodeType !== Node.ELEMENT_NODE) {
throw new TypeError("The exportStyles method only works on elements, not on " + this.nodeType + " nodes.");
}
if (noStyleTags[this.tagName]) {
throw new TypeError("The exportStyles method does not work on " + this.tagName + " elements.");
}
var styles = {};
var computedStyle = getComputedStyle(this);
var defaultStyle = getDefaultStyleByTagName(this.tagName);
for (var i = 0; i < computedStyle.length; i++) {
var cssPropName = computedStyle[i];
if (computedStyle[cssPropName] !== defaultStyle[cssPropName]) {
styles[cssPropName] = computedStyle[cssPropName];
}
}

var a = ["{"];
for(var i in styles) {
a[a.length] = i + ": " + styles[i] + ";";
}
a[a.length] = "}"
return a.join("\r\n");
}

})();

This code is base on my answer for a slightly related question: Extract the current DOM and print it as a string, with styles intact

looking for render function will take HTML and apply styles to the string with react js

According to docs render can return ReactNode

And it looks like you want to use dangerouslySetInnerHTML

render: text => {
return <span dangerouslySetInnerHTML={{__html: text}}></span>
}

Creating a new DOM element from an HTML string using built-in DOM methods or Prototype

Note: most current browsers support HTML <template> elements, which provide a more reliable way of turning creating elements from strings. See Mark Amery's answer below for details.

For older browsers, and node/jsdom: (which doesn't yet support <template> elements at the time of writing), use the following method. It's the same thing the libraries use to do to get DOM elements from an HTML string (with some extra work for IE to work around bugs with its implementation of innerHTML):

function createElementFromHTML(htmlString) {
var div = document.createElement('div');
div.innerHTML = htmlString.trim();

// Change this to div.childNodes to support multiple top-level nodes.
return div.firstChild;
}

Note that unlike HTML templates this won't work for some elements that cannot legally be children of a <div>, such as <td>s.

If you're already using a library, I would recommend you stick to the library-approved method of creating elements from HTML strings:

  • Prototype has this feature built-into its update() method.
  • jQuery has it implemented in its jQuery(html) and jQuery.parseHTML methods.

Adding an input tag with js and dom appendChild seems to have no margin

Add a whitespace or give a class with margins

// withoutdocument.body.appendChild(document.createElement("input"));document.body.appendChild(document.createElement("input"));
document.body.appendChild(document.createElement("hr"));
// with whitespace
document.body.appendChild(document.createElement("input"));document.body.appendChild(document.createTextNode(" "));document.body.appendChild(document.createElement("input"));
document.body.appendChild(document.createElement("hr"));
// with cssconst input = document.createElement("input");input.classList.add("spaced")document.body.appendChild(input);document.body.appendChild(input.cloneNode(1));
.spaced { margin-right:5px; }

Extracting text from a contentEditable div

I forgot about this question until now, when Nico slapped a bounty on it.

I solved the problem by writing the function I needed myself, cribbing a function from the existing jQuery codebase and modifying it to work as I needed.

I've tested this function with Safari (WebKit), IE, Firefox and Opera. I didn't bother checking any other browsers since the whole contentEditable thing is non-standard. It is also possible that an update to any browser could break this function if they change how they implement contentEditable. So programmer beware.

function extractTextWithWhitespace(elems)
{
var lineBreakNodeName = "BR"; // Use <br> as a default
if ($.browser.webkit)
{
lineBreakNodeName = "DIV";
}
else if ($.browser.msie)
{
lineBreakNodeName = "P";
}
else if ($.browser.mozilla)
{
lineBreakNodeName = "BR";
}
else if ($.browser.opera)
{
lineBreakNodeName = "P";
}
var extractedText = extractTextWithWhitespaceWorker(elems, lineBreakNodeName);

return extractedText;
}

// Cribbed from jQuery 1.4.2 (getText) and modified to retain whitespace
function extractTextWithWhitespaceWorker(elems, lineBreakNodeName)
{
var ret = "";
var elem;

for (var i = 0; elems[i]; i++)
{
elem = elems[i];

if (elem.nodeType === 3 // text node
|| elem.nodeType === 4) // CDATA node
{
ret += elem.nodeValue;
}

if (elem.nodeName === lineBreakNodeName)
{
ret += "\n";
}

if (elem.nodeType !== 8) // comment node
{
ret += extractTextWithWhitespace(elem.childNodes, lineBreakNodeName);
}
}

return ret;
}

Render a string in HTML and preserve spaces and linebreaks

Just style the content with white-space: pre-wrap;.

div {    white-space: pre-wrap;}
<div>This is some text   with some extra spacing    and afew newlines along with some trailing spaces             and five leading spaces thrown infor                                              goodmeasure                                              </div>

Serialize HTMLDocument and then rendering it in the server?

Serializing a complete web page is as simple as:

var serialized = document.body.innerHTML;

If you really need the full document, including the head, then:

var serialized =
'<head>' +
document.getElementsByTagName('head')[0].innerHTML +
'</head><body>' +
document.body.innerHTML +
'</body>';

Now all you need to do is submit it via AJAX.

About server side rendering, it depends what you mean by rendering. I'm currently using wkhtmltopdf to implement a 'save as pdf' feature on my site. It uses webKit to render the HTML prior to generating the PDF so it fully supports CSS and javascript.

And if you need to save it to an image instead of a PDF file you can always use ghostscript to print the PDF to a JPG/PNG file.



Related Topics



Leave a reply



Submit