How to Calculate Md5 Hash of a File Using JavaScript

How to calculate md5 hash of a file using javascript

While there are JS implementations of the MD5 algorithm, older browsers are generally unable to read files from the local filesystem.

I wrote that in 2009. So what about new browsers?

With a browser that supports the FileAPI, you can read the contents of a file - the user has to have selected it, either with an <input> element or drag-and-drop. As of Jan 2013, here's how the major browsers stack up:

  • FF 3.6 supports FileReader, FF4 supports even more file based functionality
  • Chrome has supported the FileAPI since version 7.0.517.41
  • Internet Explorer 10 has partial FileAPI support
  • Opera 11.10 has partial support for FileAPI
  • Safari - I couldn't find a good official source for this, but this site suggests partial support from 5.1, full support for 6.0. Another article reports some inconsistencies with the older Safari versions

How?

See the answer below by Benny Neugebauer which uses the MD5 function of CryptoJS

Calculate MD5 hash of a large file using javascript

CryptoJS has a progressive api for hash digests. The rest is taken form alediaferia's answer with slight modifications.

function process() {  getMD5(    document.getElementById("my-file-input").files[0],    prog => console.log("Progress: " + prog)  ).then(    res => console.log(res),    err => console.error(err)  );}
function readChunked(file, chunkCallback, endCallback) { var fileSize = file.size; var chunkSize = 4 * 1024 * 1024; // 4MB var offset = 0; var reader = new FileReader(); reader.onload = function() { if (reader.error) { endCallback(reader.error || {}); return; } offset += reader.result.length; // callback for handling read chunk // TODO: handle errors chunkCallback(reader.result, offset, fileSize); if (offset >= fileSize) { endCallback(null); return; } readNext(); };
reader.onerror = function(err) { endCallback(err || {}); };
function readNext() { var fileSlice = file.slice(offset, offset + chunkSize); reader.readAsBinaryString(fileSlice); } readNext();}
function getMD5(blob, cbProgress) { return new Promise((resolve, reject) => { var md5 = CryptoJS.algo.MD5.create(); readChunked(blob, (chunk, offs, total) => { md5.update(CryptoJS.enc.Latin1.parse(chunk)); if (cbProgress) { cbProgress(offs / total); } }, err => { if (err) { reject(err); } else { // TODO: Handle errors var hash = md5.finalize(); var hashHex = hash.toString(CryptoJS.enc.Hex); resolve(hashHex); } }); });}
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/components/core.js"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/components/md5.js"></script><input id="my-file-input" type="file"><button onclick="process()">Process</button>

MD5 hash of a file using javascript

I used the spark-md5.js from https://github.com/satazor/SparkMD5
It is awesome and pretty fast. This is the best solution if some one is trying to calculate the md5 of any uploaded file.

How to generate an MD5 file hash in JavaScript/Node.js?

You could use crypto-js.

I would also recommend using SHA256, rather than MD5.

To install crypto-js via NPM:

npm install crypto-js

Alternatively you can use a CDN and reference the JS file.

Then to display a MD5 and SHA256 hash, you can do the following:

<script type="text/javascript">
var md5Hash = CryptoJS.MD5("Test");
var sha256Hash = CryptoJS.SHA256("Test1");

console.log(md5Hash.toString());
console.log(sha256Hash.toString());
</script>

Working example located here, JSFiddle

There are also other JS functions that will generate an MD5 hash, outlined below.

http://www.myersdaily.org/joseph/javascript/md5-text.html

http://pajhome.org.uk/crypt/md5/md5.html

function md5cycle(x, k) {
var a = x[0], b = x[1], c = x[2], d = x[3];

a = ff(a, b, c, d, k[0], 7, -680876936);
d = ff(d, a, b, c, k[1], 12, -389564586);
c = ff(c, d, a, b, k[2], 17, 606105819);
b = ff(b, c, d, a, k[3], 22, -1044525330);
a = ff(a, b, c, d, k[4], 7, -176418897);
d = ff(d, a, b, c, k[5], 12, 1200080426);
c = ff(c, d, a, b, k[6], 17, -1473231341);
b = ff(b, c, d, a, k[7], 22, -45705983);
a = ff(a, b, c, d, k[8], 7, 1770035416);
d = ff(d, a, b, c, k[9], 12, -1958414417);
c = ff(c, d, a, b, k[10], 17, -42063);
b = ff(b, c, d, a, k[11], 22, -1990404162);
a = ff(a, b, c, d, k[12], 7, 1804603682);
d = ff(d, a, b, c, k[13], 12, -40341101);
c = ff(c, d, a, b, k[14], 17, -1502002290);
b = ff(b, c, d, a, k[15], 22, 1236535329);

a = gg(a, b, c, d, k[1], 5, -165796510);
d = gg(d, a, b, c, k[6], 9, -1069501632);
c = gg(c, d, a, b, k[11], 14, 643717713);
b = gg(b, c, d, a, k[0], 20, -373897302);
a = gg(a, b, c, d, k[5], 5, -701558691);
d = gg(d, a, b, c, k[10], 9, 38016083);
c = gg(c, d, a, b, k[15], 14, -660478335);
b = gg(b, c, d, a, k[4], 20, -405537848);
a = gg(a, b, c, d, k[9], 5, 568446438);
d = gg(d, a, b, c, k[14], 9, -1019803690);
c = gg(c, d, a, b, k[3], 14, -187363961);
b = gg(b, c, d, a, k[8], 20, 1163531501);
a = gg(a, b, c, d, k[13], 5, -1444681467);
d = gg(d, a, b, c, k[2], 9, -51403784);
c = gg(c, d, a, b, k[7], 14, 1735328473);
b = gg(b, c, d, a, k[12], 20, -1926607734);

a = hh(a, b, c, d, k[5], 4, -378558);
d = hh(d, a, b, c, k[8], 11, -2022574463);
c = hh(c, d, a, b, k[11], 16, 1839030562);
b = hh(b, c, d, a, k[14], 23, -35309556);
a = hh(a, b, c, d, k[1], 4, -1530992060);
d = hh(d, a, b, c, k[4], 11, 1272893353);
c = hh(c, d, a, b, k[7], 16, -155497632);
b = hh(b, c, d, a, k[10], 23, -1094730640);
a = hh(a, b, c, d, k[13], 4, 681279174);
d = hh(d, a, b, c, k[0], 11, -358537222);
c = hh(c, d, a, b, k[3], 16, -722521979);
b = hh(b, c, d, a, k[6], 23, 76029189);
a = hh(a, b, c, d, k[9], 4, -640364487);
d = hh(d, a, b, c, k[12], 11, -421815835);
c = hh(c, d, a, b, k[15], 16, 530742520);
b = hh(b, c, d, a, k[2], 23, -995338651);

a = ii(a, b, c, d, k[0], 6, -198630844);
d = ii(d, a, b, c, k[7], 10, 1126891415);
c = ii(c, d, a, b, k[14], 15, -1416354905);
b = ii(b, c, d, a, k[5], 21, -57434055);
a = ii(a, b, c, d, k[12], 6, 1700485571);
d = ii(d, a, b, c, k[3], 10, -1894986606);
c = ii(c, d, a, b, k[10], 15, -1051523);
b = ii(b, c, d, a, k[1], 21, -2054922799);
a = ii(a, b, c, d, k[8], 6, 1873313359);
d = ii(d, a, b, c, k[15], 10, -30611744);
c = ii(c, d, a, b, k[6], 15, -1560198380);
b = ii(b, c, d, a, k[13], 21, 1309151649);
a = ii(a, b, c, d, k[4], 6, -145523070);
d = ii(d, a, b, c, k[11], 10, -1120210379);
c = ii(c, d, a, b, k[2], 15, 718787259);
b = ii(b, c, d, a, k[9], 21, -343485551);

x[0] = add32(a, x[0]);
x[1] = add32(b, x[1]);
x[2] = add32(c, x[2]);
x[3] = add32(d, x[3]);

}

function cmn(q, a, b, x, s, t) {
a = add32(add32(a, q), add32(x, t));
return add32((a << s) | (a >>> (32 - s)), b);
}

function ff(a, b, c, d, x, s, t) {
return cmn((b & c) | ((~b) & d), a, b, x, s, t);
}

function gg(a, b, c, d, x, s, t) {
return cmn((b & d) | (c & (~d)), a, b, x, s, t);
}

function hh(a, b, c, d, x, s, t) {
return cmn(b ^ c ^ d, a, b, x, s, t);
}

function ii(a, b, c, d, x, s, t) {
return cmn(c ^ (b | (~d)), a, b, x, s, t);
}

function md51(s) {
txt = '';
var n = s.length,
state = [1732584193, -271733879, -1732584194, 271733878], i;
for (i=64; i<=s.length; i+=64) {
md5cycle(state, md5blk(s.substring(i-64, i)));
}
s = s.substring(i-64);
var tail = [0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0];
for (i=0; i<s.length; i++)
tail[i>>2] |= s.charCodeAt(i) << ((i%4) << 3);
tail[i>>2] |= 0x80 << ((i%4) << 3);
if (i > 55) {
md5cycle(state, tail);
for (i=0; i<16; i++) tail[i] = 0;
}
tail[14] = n*8;
md5cycle(state, tail);
return state;
}

/* there needs to be support for Unicode here,
* unless we pretend that we can redefine the MD-5
* algorithm for multi-byte characters (perhaps
* by adding every four 16-bit characters and
* shortening the sum to 32 bits). Otherwise
* I suggest performing MD-5 as if every character
* was two bytes--e.g., 0040 0025 = @%--but then
* how will an ordinary MD-5 sum be matched?
* There is no way to standardize text to something
* like UTF-8 before transformation; speed cost is
* utterly prohibitive. The JavaScript standard
* itself needs to look at this: it should start
* providing access to strings as preformed UTF-8
* 8-bit unsigned value arrays.
*/
function md5blk(s) { /* I figured global was faster. */
var md5blks = [], i; /* Andy King said do it this way. */
for (i=0; i<64; i+=4) {
md5blks[i>>2] = s.charCodeAt(i)
+ (s.charCodeAt(i+1) << 8)
+ (s.charCodeAt(i+2) << 16)
+ (s.charCodeAt(i+3) << 24);
}
return md5blks;
}

var hex_chr = '0123456789abcdef'.split('');

function rhex(n)
{
var s='', j=0;
for(; j<4; j++)
s += hex_chr[(n >> (j * 8 + 4)) & 0x0F]
+ hex_chr[(n >> (j * 8)) & 0x0F];
return s;
}

function hex(x) {
for (var i=0; i<x.length; i++)
x[i] = rhex(x[i]);
return x.join('');
}

function md5(s) {
return hex(md51(s));
}

/* this function is much faster,
so if possible we use it. Some IEs
are the only ones I know of that
need the idiotic second function,
generated by an if clause. */

function add32(a, b) {
return (a + b) & 0xFFFFFFFF;
}

if (md5('hello') != '5d41402abc4b2a76b9719d911017c592') {
function add32(x, y) {
var lsw = (x & 0xFFFF) + (y & 0xFFFF),
msw = (x >> 16) + (y >> 16) + (lsw >> 16);
return (msw << 16) | (lsw & 0xFFFF);
}
}

Then simply use the MD5 function, as shown below:

alert(md5("Test string"));

Another working JS Fiddle here

Calculate client side MD5 hash of an image file to match one calculated by Firebase?

I would have commented but reputation doesn't allow me to.

KeEEFMfHt622EzCwL4893A== is base64 encoded. Yours is hex encoded.

I just used an online converted and it checks out that they match.

How to calculate md5 checksum of blob using CryptoJS

Use the following code to create a correct md5 hash:

  function calculateMd5(blob, callback) {
var reader = new FileReader();
reader.readAsArrayBuffer(blob);
reader.onloadend = function () {
var wordArray = CryptoJS.lib.WordArray.create(reader.result),
hash = CryptoJS.MD5(wordArray).toString();
// or CryptoJS.SHA256(wordArray).toString(); for SHA-2
console.log("MD5 Checksum", hash);
callback(hash);
};
}

Update (a bit simpler):

 function calculateMd5(blob, callback) {
var reader = new FileReader();
reader.readAsBinaryString(blob);
reader.onloadend = function () {
var hash = CryptoJS.MD5(reader.result).toString();
// or CryptoJS.SHA256(reader.result).toString(); for SHA-2
console.log("MD5 Checksum", hash);
callback(hash);
};
}

Be sure to include core.js, lib-typedarrays.js (important) and md5.js components from CryptoJS library.

Please see this fiddle for a complete example (because of origin access control it won't work on fiddle, try it on your local server).

MD5 checksum not calculated properly for files other than txt?

Response.text() reads the response stream and converts it to a string using a UTF-8 encoding. Arbitrary binary data that is not UTF-8 compliant will be corrupted in this process (e.g. images, videos, etc.), s. also the other answer.

This is prevented by using Response.arrayBuffer() instead, which simply stores the data unchanged in an ArrayBuffer.

Since CryptoJS works internally with WordArrays, thus a further conversion of the ArrayBuffer into a WordArray is necessary.

The following fix works on my machine:

(async () => {

const getMd5 = async(fileObject) => {
let md5 = "";
try {
const fileObjectUrl = URL.createObjectURL(blob);
const blobText = await fetch(fileObjectUrl)
.then((res) => res.blob())
.then((res) => new Response(res).arrayBuffer()); // Convert to ArrayBuffer
const hash = CryptoJS.MD5(CryptoJS.lib.WordArray.create(blobText)); // Import as WordArray
md5 = hash.toString(CryptoJS.enc.Hex);
} catch (err) {
console.log("Error occured getMd5:", err);
}
return md5;
};

const blob = new Blob([new Uint8Array([0x01, 0x02, 0x03, 0x7f, 0x80, 0x81, 0xfd, 0xfe, 0xff])]);
console.log(await(getMd5(blob)));

})();
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/4.0.0/crypto-js.min.js"></script>

Decoding a MD5 hash based on a given formula

  • You can estimate the length of the secret email based on the provided hash (which is a sum of length(email)-1 hashes). MD5 hashes are 128-bit long, and for any given input you expect roughly half of those bits to be on and half to be off. The average MD5 is therefore between 2^128 - 1 (all bits on) and 0, with an expected average of 2^127; so if your input is in the range 14*2^127 to 16*2^127, you should expect around 16 characters of e-mail to guess.
  • It is astronomically unlikely that, for a random input, the result of following their procedure of summing up hashes-of-altered-substrings will result in the desired output. So you can simply build strings of the expected length until one hashes to the expected result. Therefore, brute force can work, because if you get the right result, you (with overwhelming probability) have the right email.
  • Any information that reduces the search-space will make the process much, much faster. In particular, since you are looking for an e-mail address, it only makes sense to look for valid addresses - greatly reducing the search-space. If you suspect that they may be using the dijitalgaraj.com domain, that's a very large chunk of search-space that you no longer need to look into (16 characters, and 17 if you include the @ sign).
  • Building an efficient brute-force cracker will be a significant hurdle; but in general, the only way to brute-force faster is either to reduce the search-space (see above) or search faster. To search faster, you should go as close to the metal as possible (for example, use SIMD instructions to calculate several hashes at the same time in the CPU) and/or use as many machines as possible computing hashes in parallel. You tag the question as JS - but I would certainly not use JS for an efficient, cluster-friendly cracker.

what if instead of sum, you were concatenating hashes?

  • Then the problem becomes easy, because you can solve each round incrementally, instead of having to guess the full secret correctly all-at-once.

// let us hash a secret (this is where you start, except you do not know the 2nd argument)
const full_hashed_secret = full("me@home.com", "secret@somewhere.org");
console.log("will try to crack", full_hashed_secret);

// now break it into rounds
const rounds = unjoin(full_hashed_secret, 32);

// now start guessing chars one at a time
let known = "";
for (let i=0; i<rounds.length; i++) {
known += crack("me@home.com", rounds[i], known);
console.log("guessing for round", i, rounds[i], "->", known);
}

// last line should have answer: eureka!

// the brute-force part: find the next character(s) of the secret by trial and error
function crack(your_email_address, hash_of_round, prefix) {
// charset to use; whatever is valid in an e-mail (and yes, this is incomplete)
const charset = "abcdefghijklmnopqrstuvwxyz0123456789@.";
if (prefix === "") {
// no prefix, must guess 2 in a row
for (let i=0; i<charset.length; i++) {
prefix = charset.slice(i, i+1);
for (let j=0; j<charset.length; j++) {
let guess = prefix+charset.slice(j, j+1);
if (round(your_email_address, guess) === hash_of_round) {
return guess;
}
}
}
} else {
for (let j=0; j<charset.length; j++) {
let guess = charset.slice(j, j+1);
if (round(your_email_address, prefix+guess) === hash_of_round) {
return guess;
}
}
}
// no valid guesses, incomplete charset or bad prefix?
return "??";
}

// un-join a string, splitting it into substrings of length n
function unjoin(s, n) {
let parts = [];
for (let i=0; i<s.length; i+=n) {
parts.push(s.slice(i, i+n));
}
return parts;
}

// (increasing rounds) and then added all together as the hash
// with "added all together" understood as "concatenated together"
function full(your_email_address, secret) {
let parts = [];
for (let i=2; i<=secret.length; i++) {
parts.push(round(your_email_address, secret.substring(0,i)));
}
return parts.join("");
}

// one round: md5(md5(your_email_address) + x + md5(x))
// + is understood again as concatenation
// (but it would not make much of a difference to use addition)
function round(your_email_address, x) {
return md5(md5(your_email_address) + x + md5(x));
}

// from https://stackoverflow.com/a/60467595/15472
// probably not the fastest, but I only want to prove a point
function md5(inputString) {
var hc="0123456789abcdef";
function rh(n) {var j,s="";for(j=0;j<=3;j++) s+=hc.charAt((n>>(j*8+4))&0x0F)+hc.charAt((n>>(j*8))&0x0F);return s;}
function ad(x,y) {var l=(x&0xFFFF)+(y&0xFFFF);var m=(x>>16)+(y>>16)+(l>>16);return (m<<16)|(l&0xFFFF);}
function rl(n,c) {return (n<<c)|(n>>>(32-c));}
function cm(q,a,b,x,s,t) {return ad(rl(ad(ad(a,q),ad(x,t)),s),b);}
function ff(a,b,c,d,x,s,t) {return cm((b&c)|((~b)&d),a,b,x,s,t);}
function gg(a,b,c,d,x,s,t) {return cm((b&d)|(c&(~d)),a,b,x,s,t);}
function hh(a,b,c,d,x,s,t) {return cm(b^c^d,a,b,x,s,t);}
function ii(a,b,c,d,x,s,t) {return cm(c^(b|(~d)),a,b,x,s,t);}
function sb(x) {
var i;var nblk=((x.length+8)>>6)+1;var blks=new Array(nblk*16);for(i=0;i<nblk*16;i++) blks[i]=0;
for(i=0;i<x.length;i++) blks[i>>2]|=x.charCodeAt(i)<<((i%4)*8);
blks[i>>2]|=0x80<<((i%4)*8);blks[nblk*16-2]=x.length*8;return blks;
}
var i,x=sb(inputString),a=1732584193,b=-271733879,c=-1732584194,d=271733878,olda,oldb,oldc,oldd;
for(i=0;i<x.length;i+=16) {olda=a;oldb=b;oldc=c;oldd=d;
a=ff(a,b,c,d,x[i+ 0], 7, -680876936);d=ff(d,a,b,c,x[i+ 1],12, -389564586);c=ff(c,d,a,b,x[i+ 2],17, 606105819);
b=ff(b,c,d,a,x[i+ 3],22,-1044525330);a=ff(a,b,c,d,x[i+ 4], 7, -176418897);d=ff(d,a,b,c,x[i+ 5],12, 1200080426);
c=ff(c,d,a,b,x[i+ 6],17,-1473231341);b=ff(b,c,d,a,x[i+ 7],22, -45705983);a=ff(a,b,c,d,x[i+ 8], 7, 1770035416);
d=ff(d,a,b,c,x[i+ 9],12,-1958414417);c=ff(c,d,a,b,x[i+10],17, -42063);b=ff(b,c,d,a,x[i+11],22,-1990404162);
a=ff(a,b,c,d,x[i+12], 7, 1804603682);d=ff(d,a,b,c,x[i+13],12, -40341101);c=ff(c,d,a,b,x[i+14],17,-1502002290);
b=ff(b,c,d,a,x[i+15],22, 1236535329);a=gg(a,b,c,d,x[i+ 1], 5, -165796510);d=gg(d,a,b,c,x[i+ 6], 9,-1069501632);
c=gg(c,d,a,b,x[i+11],14, 643717713);b=gg(b,c,d,a,x[i+ 0],20, -373897302);a=gg(a,b,c,d,x[i+ 5], 5, -701558691);
d=gg(d,a,b,c,x[i+10], 9, 38016083);c=gg(c,d,a,b,x[i+15],14, -660478335);b=gg(b,c,d,a,x[i+ 4],20, -405537848);
a=gg(a,b,c,d,x[i+ 9], 5, 568446438);d=gg(d,a,b,c,x[i+14], 9,-1019803690);c=gg(c,d,a,b,x[i+ 3],14, -187363961);
b=gg(b,c,d,a,x[i+ 8],20, 1163531501);a=gg(a,b,c,d,x[i+13], 5,-1444681467);d=gg(d,a,b,c,x[i+ 2], 9, -51403784);
c=gg(c,d,a,b,x[i+ 7],14, 1735328473);b=gg(b,c,d,a,x[i+12],20,-1926607734);a=hh(a,b,c,d,x[i+ 5], 4, -378558);
d=hh(d,a,b,c,x[i+ 8],11,-2022574463);c=hh(c,d,a,b,x[i+11],16, 1839030562);b=hh(b,c,d,a,x[i+14],23, -35309556);
a=hh(a,b,c,d,x[i+ 1], 4,-1530992060);d=hh(d,a,b,c,x[i+ 4],11, 1272893353);c=hh(c,d,a,b,x[i+ 7],16, -155497632);
b=hh(b,c,d,a,x[i+10],23,-1094730640);a=hh(a,b,c,d,x[i+13], 4, 681279174);d=hh(d,a,b,c,x[i+ 0],11, -358537222);
c=hh(c,d,a,b,x[i+ 3],16, -722521979);b=hh(b,c,d,a,x[i+ 6],23, 76029189);a=hh(a,b,c,d,x[i+ 9], 4, -640364487);
d=hh(d,a,b,c,x[i+12],11, -421815835);c=hh(c,d,a,b,x[i+15],16, 530742520);b=hh(b,c,d,a,x[i+ 2],23, -995338651);
a=ii(a,b,c,d,x[i+ 0], 6, -198630844);d=ii(d,a,b,c,x[i+ 7],10, 1126891415);c=ii(c,d,a,b,x[i+14],15,-1416354905);
b=ii(b,c,d,a,x[i+ 5],21, -57434055);a=ii(a,b,c,d,x[i+12], 6, 1700485571);d=ii(d,a,b,c,x[i+ 3],10,-1894986606);
c=ii(c,d,a,b,x[i+10],15, -1051523);b=ii(b,c,d,a,x[i+ 1],21,-2054922799);a=ii(a,b,c,d,x[i+ 8], 6, 1873313359);
d=ii(d,a,b,c,x[i+15],10, -30611744);c=ii(c,d,a,b,x[i+ 6],15,-1560198380);b=ii(b,c,d,a,x[i+13],21, 1309151649);
a=ii(a,b,c,d,x[i+ 4], 6, -145523070);d=ii(d,a,b,c,x[i+11],10,-1120210379);c=ii(c,d,a,b,x[i+ 2],15, 718787259);
b=ii(b,c,d,a,x[i+ 9],21, -343485551);a=ad(a,olda);b=ad(b,oldb);c=ad(c,oldc);d=ad(d,oldd);
}
return rh(a)+rh(b)+rh(c)+rh(d);
}


Related Topics



Leave a reply



Submit