Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ZIP64 Support to zip-stream.js #259

Open
wants to merge 8 commits into
base: master
Choose a base branch
from

Conversation

magnus-rattlehead
Copy link

Feel free to test. I used the Sintel movie and a 10GB file created by fallocate. Testing shows that the zip works. For some reason on my zip archiver (Ark from KDE), it complains about a corrupted zip, but is able to read and extract successfully. zip -T archive.zip confirms that it is valid. Feel free to test it.

@magnus-rattlehead
Copy link
Author

Nevermind. archiving software doesn't complain about corrupted zips.

@FallingHazard
Copy link

FallingHazard commented Dec 20, 2021

It would be awesome if this was built intro a transform stream like in: https://github.com/transcend-io/conflux/blob/master/src/write.js

@PidgeyBE
Copy link

PidgeyBE commented Apr 18, 2024

FYI, the solution here gave me corrupt zip files.
I asked ChatGPT to rewrite the original one and did a bit of cleanup to end up on a working file...
Tested with 10.000 files which result in a total zip file of 9GB.

zip-stream64.js

class Crc32 {
    constructor() {
        this.crc = -1
    }

    append(data) {
        var crc = this.crc | 0;
        var table = this.table
        for (var offset = 0, len = data.length | 0; offset < len; offset++) {
            crc = (crc >>> 8) ^ table[(crc ^ data[offset]) & 0xFF]
        }
        this.crc = crc
    }

    get() {
        return ~this.crc
    }
}

Crc32.prototype.table = (() => {
    var i;
    var j;
    var t;
    var table = []
    for (i = 0; i < 256; i++) {
        t = i
        for (j = 0; j < 8; j++) {
            t = (t & 1) ?
                (t >>> 1) ^ 0xEDB88320 :
                t >>> 1
        }
        table[i] = t
    }
    return table
})()

const getDataHelper = byteLength => {
    var uint8 = new Uint8Array(byteLength)
    return {
        array: uint8,
        view: new DataView(uint8.buffer)
    }
}

const pump = zipObj => zipObj.reader.read().then(chunk => {
    if (chunk.done) return zipObj.writeFooter()
    const outputData = chunk.value
    zipObj.crc.append(outputData)
    zipObj.uncompressedLength += outputData.length
    zipObj.compressedLength += outputData.length
    zipObj.ctrl.enqueue(outputData)
})

/**
 * [createWriter description]
 * @param  {Object} underlyingSource [description]
 * @return {Boolean}                  [description]
 */
function createWriter(underlyingSource) {
    const files = Object.create(null)
    const filenames = []
    const encoder = new TextEncoder()
    let offset = 0
    let activeZipIndex = 0
    let ctrl
    let activeZipObject, closed

    function next() {
        activeZipIndex++
        activeZipObject = files[filenames[activeZipIndex]]
        if (activeZipObject) processNextChunk()
        else if (closed) closeZip()
    }

    var zipWriter = {
        enqueue(fileLike) {
            if (closed) throw new TypeError('Cannot enqueue a chunk into a readable stream that is closed or has been requested to be closed')

            let name = fileLike.name.trim()
            const date = new Date(typeof fileLike.lastModified === 'undefined' ? Date.now() : fileLike.lastModified)

            if (fileLike.directory && !name.endsWith('/')) name += '/'
            if (files[name]) throw new Error('File already exists.')


            const nameBuf = encoder.encode(name)
            filenames.push(name)


            const zipObject = files[name] = {
                level: 0,
                ctrl,
                directory: !!fileLike.directory,
                nameBuf,
                comment: encoder.encode(fileLike.comment || ''),
                compressedLength: 0,
                uncompressedLength: 0,
                extraArray: null,

                writeHeader() {
                    var header = getDataHelper(26)
                    var data = getDataHelper(30 + nameBuf.length)

                    zipObject.header = header
                    zipObject.offset = offset
                    if (zipObject.level !== 0 && !zipObject.directory) {
                        header.view.setUint16(4, 0x0800)
                    }
                    header.view.setUint32(0, 0x14000808)

                    header.view.setUint16(0, 45, true)

                    header.view.setUint16(6, (((date.getHours() << 6) | date.getMinutes()) << 5) | date.getSeconds() / 2, true)
                    header.view.setUint16(8, ((((date.getFullYear() - 1980) << 4) | (date.getMonth() + 1)) << 5) | date.getDate(), true)
                    header.view.setUint16(22, nameBuf.length, true)
                    data.view.setUint32(0, 0x504b0304)
                    data.array.set(header.array, 4)
                    data.array.set(nameBuf, 30)
                    offset += data.array.length
                    ctrl.enqueue(data.array)
                },

                writeFooter() {
                    zipObject.header.view.setUint16(0, 45)

                    var footer = getDataHelper(24)
                    footer.view.setUint32(0, 0x504b0708)

                    if (zipObject.crc) {
                        zipObject.header.view.setUint32(10, zipObject.crc.get(), true)
                        footer.view.setUint32(4, zipObject.crc.get(), true)
                    }

                    let zip64Extra = getDataHelper(28)
                    zipObject.header.view.setUint32(14, 0xffffffff, true)
                    zipObject.header.view.setUint32(18, 0xffffffff, true)
                    footer.view.setBigUint64(8, BigInt(zipObject.compressedLength), true)
                    footer.view.setBigInt64(16, BigInt(zipObject.uncompressedLength), true)
                    zip64Extra.view.setUint16(0, 0x0001, true)
                    zip64Extra.view.setUint16(2, 24, true)
                    zip64Extra.view.setBigUint64(4, BigInt(zipObject.uncompressedLength), true)
                    zip64Extra.view.setBigUint64(12, BigInt(zipObject.compressedLength), true)
                    zip64Extra.view.setBigUint64(20, BigInt(files[name].offset), true)
                    zipObject.extraArray = zip64Extra.array


                    ctrl.enqueue(footer.array)
                    offset += zipObject.compressedLength + footer.array.length
                    next()
                },
                fileLike
            }

            if (!activeZipObject) {
                activeZipObject = zipObject
                processNextChunk()
            }
        },
        close() {
            if (closed) throw new TypeError('Cannot close a readable stream that has already been requested to be closed')
            if (!activeZipObject) closeZip()
            closed = true
        }
    }

    function closeZip() {
        var length = 0
        var index = 0
        var indexFilename, file, cdOffset, totalEntries = filenames.length
        for (indexFilename = 0; indexFilename < totalEntries; indexFilename++) {
            file = files[filenames[indexFilename]]
            length += 46 + file.nameBuf.length + file.comment.length
            if (file.extraArray) {
                length += file.extraArray.length
            }
        }
        cdOffset = offset

        const data = getDataHelper(length + 56 + 20 + 22)
        for (indexFilename = 0; indexFilename < totalEntries; indexFilename++) {
            file = files[filenames[indexFilename]]
            data.view.setUint32(index, 0x504b0102)
            data.view.setUint16(index + 4, 0x1400)
            data.array.set(file.header.array, index + 6)
            if (file.extraArray) {
                data.view.setUint16(index + 30, file.extraArray.length, true)
            }
            data.view.setUint16(index + 32, file.comment.length, true)
            if (file.directory) {
                data.view.setUint8(index + 38, 0x10)
            }
            if (file.offset >= 0xffffffff)
                data.view.setUint32(index + 42, 0xffffffff, true)
            else
                data.view.setUint32(index + 42, file.offset, true)

            data.array.set(file.nameBuf, index + 46)
            var extraLength = 0
            if (file.extraArray) {
                extraLength = file.extraArray.length
                data.array.set(file.extraArray, index + 46 + file.nameBuf.length)
            }
            data.array.set(file.comment, index + 46 + file.nameBuf.length + extraLength)
            index += 46 + file.nameBuf.length + file.comment.length + extraLength
        }

        // Zip64 End of Central Directory record
        // 0: Signature
        data.view.setUint32(index, 0x504b0606);
        // 4: Size of zip64 EOCD
        data.view.setBigUint64(index + 4, BigInt(44), true);
        // 12: Version made By
        data.view.setUint16(index + 12, 45, true);
        // 14: version needed to extract
        data.view.setUint16(index + 14, 45, true);
        // 16: number of this disk
        // 20: number of the disk with the start of CD
        // 24: total number of entries in the central directory on this disk
        data.view.setBigUint64(index + 24, BigInt(totalEntries), true);
        // 32: total number of entries in the central directory
        data.view.setBigUint64(index + 32, BigInt(totalEntries), true);
        // 40: size of the central directory
        data.view.setBigUint64(index + 40, BigInt(length), true);
        // 48: Offset of start of central directory
        data.view.setBigUint64(index + 48, BigInt(cdOffset), true);
        index += 56

        // Zip64 End of Central Directory locator
        // 0: Signature
        data.view.setUint32(index, 0x504b0607);
        // 4: number of the disk with the zip64 EOCD
        // 8: Offset of the zip64 EOCD
        data.view.setBigUint64(index + 8, BigInt(cdOffset + length), true);
        // 16: total number of disks
        data.view.setUint32(index + 16, 1, true);
        index += 20

        // EOCD must set these values to 0xffff and 0xffffffff when using ZIP64 format
        totalEntries = 0xffff;
        cdOffset = 0xffffffff;

        data.view.setUint32(index, 0x504b0506)
        data.view.setUint16(index + 8, totalEntries, true)
        data.view.setUint16(index + 10, totalEntries, true)
        data.view.setUint32(index + 12, length, true)
        data.view.setUint32(index + 16, cdOffset, true)
        ctrl.enqueue(data.array)
        ctrl.close()
    }

    function processNextChunk() {
        if (!activeZipObject) return
        if (activeZipObject.directory) return activeZipObject.writeFooter(activeZipObject.writeHeader())
        if (activeZipObject.reader) return pump(activeZipObject)
        if (activeZipObject.fileLike.stream) {
            activeZipObject.crc = new Crc32()
            activeZipObject.reader = activeZipObject.fileLike.stream().getReader()
            activeZipObject.writeHeader()
        } else next()
    }
    return new ReadableStream({
        start: c => {
            ctrl = c
            underlyingSource.start && Promise.resolve(underlyingSource.start(zipWriter))
        },
        pull() {
            return processNextChunk() || (
                underlyingSource.pull &&
                Promise.resolve(underlyingSource.pull(zipWriter))
            )
        }
    })
}

window.ZIP = createWriter

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants