Compression/Decompression String With C#

Compression/Decompression string with C#

The code to compress/decompress a string

public static void CopyTo(Stream src, Stream dest) {
byte[] bytes = new byte[4096];

int cnt;

while ((cnt = src.Read(bytes, 0, bytes.Length)) != 0) {
dest.Write(bytes, 0, cnt);
}
}

public static byte[] Zip(string str) {
var bytes = Encoding.UTF8.GetBytes(str);

using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(mso, CompressionMode.Compress)) {
//msi.CopyTo(gs);
CopyTo(msi, gs);
}

return mso.ToArray();
}
}

public static string Unzip(byte[] bytes) {
using (var msi = new MemoryStream(bytes))
using (var mso = new MemoryStream()) {
using (var gs = new GZipStream(msi, CompressionMode.Decompress)) {
//gs.CopyTo(mso);
CopyTo(gs, mso);
}

return Encoding.UTF8.GetString(mso.ToArray());
}
}

static void Main(string[] args) {
byte[] r1 = Zip("StringStringStringStringStringStringStringStringStringStringStringStringStringString");
string r2 = Unzip(r1);
}

Remember that Zip returns a byte[], while Unzip returns a string. If you want a string from Zip you can Base64 encode it (for example by using Convert.ToBase64String(r1)) (the result of Zip is VERY binary! It isn't something you can print to the screen or write directly in an XML)

The version suggested is for .NET 2.0, for .NET 4.0 use the MemoryStream.CopyTo.

IMPORTANT: The compressed contents cannot be written to the output stream until the GZipStream knows that it has all of the input (i.e., to effectively compress it needs all of the data). You need to make sure that you Dispose() of the GZipStream before inspecting the output stream (e.g., mso.ToArray()). This is done with the using() { } block above. Note that the GZipStream is the innermost block and the contents are accessed outside of it. The same goes for decompressing: Dispose() of the GZipStream before attempting to access the data.

How to decompress a GZip Compressed String in C#?

The code shown works just fine, if we make reasonable assumptions about how it was compressed in the first place:

using System;
using System.IO;
using System.IO.Compression;
using System.Text;

static class P
{
static void Main()
{
Console.WriteLine(lipsum.Length); // 61125 chars of lipsum (not shown)
Console.WriteLine(Encoding.UTF8.GetByteCount(lipsum)); // 61125 bytes of lipsum
var bytes = Compress(lipsum);
Console.WriteLine(bytes.Length); // 16795 bytes compressed
var value = Decompress(bytes);
Console.WriteLine(value.Length); // 61125 bytes again when decompressed
Console.WriteLine(value == lipsum); // True - it worked fine
}
private static byte[] Compress(string value)
{
using (var memoryStream = new MemoryStream())
{
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Compress))
{
gZipStream.Write(Encoding.UTF8.GetBytes(value));
}
return memoryStream.ToArray();
}
}
private static string Decompress(byte[] bytes)
{
using (var memoryStream = new MemoryStream(bytes))
using (var gZipStream = new GZipStream(memoryStream, CompressionMode.Decompress))
using (var memoryStreamOutput = new MemoryStream())
{
gZipStream.CopyTo(memoryStreamOutput);
var outputBytes = memoryStreamOutput.ToArray();

string decompressed = Encoding.UTF8.GetString(outputBytes);
return decompressed;
}
}

// MASSIVELY TRUNCATED FOR POST!
const string lipsum = @"Lorem ipsum dolor sit amet, ... ac dolor ac hendrerit.";
}

Gzip compression and decompression in C#

Here is a rewrite of your code that should work the way you want it to.

I wrote it in LINQPad and it can be tested in that.

Note that there's very little error checking here. You should add checks to see if all read operations complete and has actually read what they were supposed to and similar checks.

The output

original: 256
This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test.

compressed: 56
AAEAAB+LCAAAAAAABAALycgsVgCiRIWS1OISPYWQEcYHANU9d5YAAQAA

decompressed: 256
This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test. This is a test.

The program

void Main()
{
var input = "This is a test. This is a test. ";
input += input;
input += input;
input += input;
string compressed = Compress(input);
string decompressed = Decompress(compressed);

input.Dump("original: " + input.Length);
compressed.Dump("compressed: " + compressed.Length);
decompressed.Dump("decompressed: " + decompressed.Length);
}

public static string Decompress(string input)
{
byte[] compressed = Convert.FromBase64String(input);
byte[] decompressed = Decompress(compressed);
return Encoding.UTF8.GetString(decompressed);
}

public static string Compress(string input)
{
byte[] encoded = Encoding.UTF8.GetBytes(input);
byte[] compressed = Compress(encoded);
return Convert.ToBase64String(compressed);
}

public static byte[] Decompress(byte[] input)
{
using (var source = new MemoryStream(input))
{
byte[] lengthBytes = new byte[4];
source.Read(lengthBytes, 0, 4);

var length = BitConverter.ToInt32(lengthBytes, 0);
using (var decompressionStream = new GZipStream(source,
CompressionMode.Decompress))
{
var result = new byte[length];
decompressionStream.Read(result, 0, length);
return result;
}
}
}

public static byte[] Compress(byte[] input)
{
using (var result = new MemoryStream())
{
var lengthBytes = BitConverter.GetBytes(input.Length);
result.Write(lengthBytes, 0, 4);

using (var compressionStream = new GZipStream(result,
CompressionMode.Compress))
{
compressionStream.Write(input, 0, input.Length);
compressionStream.Flush();

}
return result.ToArray();
}
}

How to compress / decompress string with using SevenZip - 7Zip

These are wrong:

System.Text.Encoding.UTF8.GetString(compressedData)
Encoding.UTF8.GetBytes(compressedText)

Compressed data isn't UTF-8. And you shouldn't try to treat it as text. Always store compressed data in binary, as a byte[]. If you need to pass it through a text-only channel, such as e-mail, use Base64 encoding.

Fundamentally though, change your thinking. Compression is not a function string -> string. It's byte[] -> byte[]. It's also valid to consider it as string -> byte[].

Decompress string in java from compressed string in C#

My C# code to compress is

 private string Compress(string text)
{
byte[] buffer = Encoding.UTF8.GetBytes(text);
MemoryStream ms = new MemoryStream();
using (GZipStream zip = new GZipStream(ms, CompressionMode.Compress, true))
{
zip.Write(buffer, 0, buffer.Length);
}

ms.Position = 0;
MemoryStream outStream = new MemoryStream();

byte[] compressed = new byte[ms.Length];
ms.Read(compressed, 0, compressed.Length);

byte[] gzBuffer = new byte[compressed.Length + 4];
System.Buffer.BlockCopy(compressed, 0, gzBuffer, 4, compressed.Length);
System.Buffer.BlockCopy(BitConverter.GetBytes(buffer.Length), 0, gzBuffer, 0, 4);
return Convert.ToBase64String(gzBuffer);
}

Java code to decompress the text is

private String Decompress(String compressedText)
{

byte[] compressed = compressedText.getBytes("UTF8");
compressed = org.apache.commons.codec.binary.Base64.decodeBase64(compressed);
byte[] buffer=new byte[compressed.length-4];
buffer = copyForDecompression(compressed,buffer, 4, 0);
final int BUFFER_SIZE = 32;
ByteArrayInputStream is = new ByteArrayInputStream(buffer);
GZIPInputStream gis = new GZIPInputStream(is, BUFFER_SIZE);
StringBuilder string = new StringBuilder();
byte[] data = new byte[BUFFER_SIZE];
int bytesRead;
while ((bytesRead = gis.read(data)) != -1)
{
string.append(new String(data, 0, bytesRead));
}
gis.close();
is.close();
return string.toString();
}
private byte[] copyForDecompression(byte[] b1,byte[] b2,int srcoffset,int dstoffset)
{
for(int i=0;i<b2.length && i<b1.length;i++)
{
b2[i]=b1[i+4];
}
return b2;
}

This code works perfectly fine for me.

Client side decompression back to string from C# compression of string

You need to use pako.Inflate in your frontend.

Additionally you need to remove the 4 bytes size you added to the front of the gzBuffer in the frontend before decoding.

Something like this should work:





// "cookies rule the world" compressed with your c# code

let sample = "FgAAAB+LCAAAAAAABABLzs/PzkwtVigqzUlVKMlIVSjPL8pJAQBkkN7rFgAAAA==";


// decode base64 & convert to Uint8 Array

let binary = atob(sample);

let bytes = Uint8Array.from(binary, c => c.charCodeAt(0));


// You appended the length at the start of gzBuffer, so you need to remove those bytes

bytes = bytes.slice(4);


// inflate the message & convert it to a string

let inflated = pako.inflate(bytes);

let message = String.fromCharCode.apply(null, inflated);

console.log(message);
<script src="https://raw.githubusercontent.com/danguer/blog-examples/master/js/base64-binary.js"></script>

<script src="https://unpkg.com/pako@1.0.10/dist/pako.min.js"></script>


Related Topics



Leave a reply



Submit