How to Split a Huge Zip File into Multiple Volumes

How to split a huge zip file into multiple volumes?

Check: http://saloon.javaranch.com/cgi-bin/ubb/ultimatebb.cgi?ubb=get_topic&f=38&t=004618

I am not aware of any public API that will help you do that.
(Although if you do not want to do it programatically, there are utilities like WinSplitter that will do it)

I have not tried it but, every ZipEntry while using ZippedInput/OutputStream has a compressed size. You may get a rough estimate of the size of the zipped file while creating it. If you need 2MB of zipped files, then you can stop writing to a file after the cumulative size of entries become 1.9MB, taking .1MB for Manifest file and other zip file specific elements.
So, in a nutshell, you can write a wrapper over the ZippedInputStream as follows:

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

public class ChunkedZippedOutputStream {

private ZipOutputStream zipOutputStream;

private final String path;
private final String name;

private long currentSize;
private int currentChunkIndex;
private final long MAX_FILE_SIZE = 16000000; // Whatever size you want
private final String PART_POSTFIX = ".part.";
private final String FILE_EXTENSION = ".zip";

public ChunkedZippedOutputStream(String path, String name) throws FileNotFoundException {
this.path = path;
this.name = name;
constructNewStream();
}

public void addEntry(ZipEntry entry) throws IOException {
long entrySize = entry.getCompressedSize();
if ((currentSize + entrySize) > MAX_FILE_SIZE) {
closeStream();
constructNewStream();
} else {
currentSize += entrySize;
zipOutputStream.putNextEntry(entry);
}
}

private void closeStream() throws IOException {
zipOutputStream.close();
}

private void constructNewStream() throws FileNotFoundException {
zipOutputStream = new ZipOutputStream(new FileOutputStream(new File(path, constructCurrentPartName())));
currentChunkIndex++;
currentSize = 0;
}

private String constructCurrentPartName() {
// This will give names is the form of <file_name>.part.0.zip, <file_name>.part.1.zip, etc.
return name + PART_POSTFIX + currentChunkIndex + FILE_EXTENSION;
}
}

The above program is just a hint of the approach and not a final solution by any means.

How do you uncompress a split volume zip in Java?

Here is the code you can start from. It extracts a single file entry from the multivolume zip archive:

package org.test.zip;

import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.SequenceInputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

public class Main {

public static void main(String[] args) throws IOException {
ZipInputStream is = new ZipInputStream(new SequenceInputStream(Collections.enumeration(
Arrays.asList(new FileInputStream("test.zip.001"), new FileInputStream("test.zip.002"), new FileInputStream("test.zip.003")))));
try {
for(ZipEntry entry = null; (entry = is.getNextEntry()) != null; ) {
OutputStream os = new BufferedOutputStream(new FileOutputStream(entry.getName()));
try {
final int bufferSize = 1024;
byte[] buffer = new byte[bufferSize];
for(int readBytes = -1; (readBytes = is.read(buffer, 0, bufferSize)) > -1; ) {
os.write(buffer, 0, readBytes);
}
os.flush();
} finally {
os.close();
}
}
} finally {
is.close();
}
}
}

Split a zip archive into multiple chunks

Here is what i use to send file to telegram channel by telegram bot.
The file size limit is 50MB in upload by telegram bot
The file size limit is 1500MB in upload by telegram client but you may add some text or other info so 1495 is more safe

#! /usr/bin/python3
# -*- coding:utf-8 -*-
# apt-get install p7zip-full

import subprocess
import os
import math
import logzero

logger = logzero.logger

MAX_SPLIT_SIZE = 1495

def file_split_7z(file_path, split_size=MAX_SPLIT_SIZE):
file_path_7z_list = []
# if origin file is 7z file rename it
origin_file_path = ""
if os.path.splitext(file_path)[1] == ".7z":
origin_file_path = file_path
file_path = os.path.splitext(origin_file_path)[0] + ".7zo"
os.rename(origin_file_path, file_path)
# do 7z compress
fz = os.path.getsize(file_path) / 1024 / 1024
pa = math.ceil(fz / split_size)
head, ext = os.path.splitext(os.path.abspath(file_path))
archive_head = "".join((head, ext.replace(".", "_"))) + ".7z"
for i in range(pa):
check_file_name = "{}.{:03d}".format(archive_head, i + 1)
if os.path.isfile(check_file_name):
logger.debug("remove exists file | {}".format(check_file_name))
os.remove(check_file_name)
cmd_7z = ["7z", "a", "-v{}m".format(split_size), "-y", "-mx0", archive_head, file_path]
proc = subprocess.Popen(cmd_7z, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = proc.communicate()
if b"Everything is Ok" not in out:
logger.error("7z output | {}".format(out.decode("utf-8")))
logger.error("7z error | {}".format(err.decode("utf-8")))
return file_path_7z_list

for i in range(pa):
file_path_7z_list.append("{}.{:03d}".format(archive_head, i + 1))
# if origin file is 7z file rename it back
if origin_file_path:
os.rename(file_path, origin_file_path)
return file_path_7z_list

def do_file_split(file_path, split_size=MAX_SPLIT_SIZE):
"""caculate split size
example max split size is 1495 file size is 2000
than the split part num should be int(2000 / 1495 + 0.5) = 2
so the split size should be 1000 + 1000 but not 1495 + 505
with the file size increase the upload risk would be increase too
"""
file_size = os.path.getsize(file_path) / 2 ** 20
split_part = math.ceil(file_size / split_size)
new_split_size = math.ceil(file_size / split_part)
logger.info("file size | {} | split num | {} | split size | {}".format(file_size, split_part, new_split_size))
file_path_7z_list = file_split_7z(file_path, split_size=new_split_size)
return file_path_7z_list

Apache Common Compress - Split Zip file

Must you use Apache Common Compress? Otherwise this answer might be interesting for you: how to create java zip archives with a max file size limit

.Net library for split volume zip files?

DotNetZip allows you to do this. From their documentation:

The library supports zip passwords, Unicode, ZIP64, stream input and output,
AES encryption, multiple compression levels, self-extracting archives,
spanned archives, and more.

how to unzip multiple volume zip files with apache ant?

Assuming that you had zipped the some-archive.bin file and then split the some-archive.zip file into multiple volumes, you could use the concat task with binary set to true and a destfile specified in order to merge the volumes back into a single zip file. Then unzip the some-archive.zip with the unzip task.

<target name="merge-and-unzip">

<concat destfile="some-archive.zip" binary="true">
<fileset dir=".">
<include name="some-archive.zip.*"/>
</fileset>
</concat>

<unzip dest="." src="some-archive.zip"/>

</target>


Related Topics



Leave a reply



Submit