decompress-zip.js 9.81 KB
'use strict';

// The zip file spec is at http://www.pkware.com/documents/casestudies/APPNOTE.TXT
// TODO: There is fair chunk of the spec that I have ignored. Need to add
// assertions everywhere to make sure that we are not dealing with a ZIP type
// that I haven't designed for. Things like spanning archives, non-DEFLATE
// compression, encryption, etc.
var fs = require('graceful-fs');
var Q = require('q');
var path = require('path');
var util = require('util');
var events = require('events');
var structures = require('./structures');
var signatures = require('./signatures');
var extractors = require('./extractors');
var FileDetails = require('./file-details');

var fstat = Q.denodeify(fs.fstat);
var read = Q.denodeify(fs.read);
var fopen = Q.denodeify(fs.open);

function DecompressZip(filename) {
    events.EventEmitter.call(this);

    this.filename = filename;
    this.stats = null;
    this.fd = null;
    this.chunkSize = 1024 * 1024; // Buffer up to 1Mb at a time
    this.dirCache = {};

    // When we need a resource, we should check if there is a promise for it
    // already and use that. If the promise is already fulfilled we don't do the
    // async work again and we get to queue up dependant tasks.
    this._p = {}; // _p instead of _promises because it is a lot easier to read
}

util.inherits(DecompressZip, events.EventEmitter);

DecompressZip.prototype.openFile = function () {
    return fopen(this.filename, 'r');
};

DecompressZip.prototype.closeFile = function () {
    if (this.fd) {
        fs.closeSync(this.fd);
        this.fd = null;
    }
};

DecompressZip.prototype.statFile = function (fd) {
    this.fd = fd;
    return fstat(fd);
};

DecompressZip.prototype.list = function () {
    var self = this;

    this.getFiles()
    .then(function (files) {
        var result = [];

        files.forEach(function (file) {
            result.push(file.path);
        });

        self.emit('list', result);
    })
    .fail(function (error) {
        self.emit('error', error);
    })
    .fin(self.closeFile.bind(self));

    return this;
};

DecompressZip.prototype.extract = function (options) {
    var self = this;

    options = options || {};
    options.path = options.path || process.cwd();
    options.filter = options.filter || null;
    options.follow = !!options.follow;
    options.strip = +options.strip || 0;
    options.restrict = options.restrict !== false;


    this.getFiles()
    .then(function (files) {
        var copies = [];
        if (options.restrict) {
          files = files.map(function (file) {
            var destination = path.join(options.path, file.path);
            // The destination path must not be outside options.path
            if (destination.indexOf(options.path) !== 0) {
              throw new Error('You cannot extract a file outside of the target path');
            }
            return file;
          });
        }
        if (options.filter) {
            files = files.filter(options.filter);
        }

        if (options.follow) {
            copies = files.filter(function (file) {
                return file.type === 'SymbolicLink';
            });
            files = files.filter(function (file) {
                return file.type !== 'SymbolicLink';
            });
        }

        if (options.strip) {
            files = files.map(function (file) {
                if (file.type !== 'Directory') {
                    // we don't use `path.sep` as we're using `/` in Windows too
                    var dir = file.parent.split('/');
                    var filename = file.filename;

                    if (options.strip > dir.length) {
                        throw new Error('You cannot strip more levels than there are directories');
                    } else {
                        dir = dir.slice(options.strip);
                    }

                    file.path = path.join(dir.join(path.sep), filename);
                    return file;
                }
            });
        }

        return self.extractFiles(files, options)
        .then(self.extractFiles.bind(self, copies, options));
    })
    .then(function (results) {
        self.emit('extract', results);
    })
    .fail(function (error) {
        self.emit('error', error);
    })
    .fin(self.closeFile.bind(self));

    return this;
};

// Utility methods
DecompressZip.prototype.getSearchBuffer = function (stats) {
    var size = Math.min(stats.size, this.chunkSize);
    this.stats = stats;
    return this.getBuffer(stats.size - size, stats.size);
};

DecompressZip.prototype.getBuffer = function (start, end) {
    var size = end - start;
    return read(this.fd, new Buffer(size), 0, size, start)
    .then(function (result) {
        return result[1];
    });
};

DecompressZip.prototype.findEndOfDirectory = function (buffer) {
    var index = buffer.length - 3;
    var chunk = '';

    // Apparently the ZIP spec is not very good and it is impossible to
    // guarantee that you have read a zip file correctly, or to determine
    // the location of the CD without hunting.
    // Search backwards through the buffer, as it is very likely to be near the
    // end of the file.
    while (index > Math.max(buffer.length - this.chunkSize, 0) && chunk !== signatures.END_OF_CENTRAL_DIRECTORY) {
        index--;
        chunk = buffer.readUInt32LE(index);
    }

    if (chunk !== signatures.END_OF_CENTRAL_DIRECTORY) {
        throw new Error('Could not find the End of Central Directory Record');
    }

    return buffer.slice(index);
};

// Directory here means the ZIP Central Directory, not a folder
DecompressZip.prototype.readDirectory = function (recordBuffer) {
    var record = structures.readEndRecord(recordBuffer);

    return this.getBuffer(record.directoryOffset, record.directoryOffset + record.directorySize)
    .then(structures.readDirectory.bind(null));
};

DecompressZip.prototype.getFiles = function () {
    if (!this._p.getFiles) {
        this._p.getFiles = this.openFile()
        .then(this.statFile.bind(this))
        .then(this.getSearchBuffer.bind(this))
        .then(this.findEndOfDirectory.bind(this))
        .then(this.readDirectory.bind(this))
        .then(this.readFileEntries.bind(this));
    }

    return this._p.getFiles;
};

DecompressZip.prototype.readFileEntries = function (directory) {
    var promises = [];
    var files = [];
    var self = this;

    directory.forEach(function (directoryEntry, index) {
        var start = directoryEntry.relativeOffsetOfLocalHeader;
        var end = Math.min(self.stats.size, start + structures.maxFileEntrySize);
        var fileDetails = new FileDetails(directoryEntry);

        var promise = self.getBuffer(start, end)
        .then(structures.readFileEntry.bind(null))
        .then(function (fileEntry) {
            var maxSize;

            if (fileDetails.compressedSize > 0) {
                maxSize = fileDetails.compressedSize;
            } else {
                maxSize = self.stats.size;

                if (index < directory.length - 1) {
                    maxSize = directory[index + 1].relativeOffsetOfLocalHeader;
                }

                maxSize -= start + fileEntry.entryLength;
            }

            fileDetails._offset = start + fileEntry.entryLength;
            fileDetails._maxSize = maxSize;

            self.emit('file', fileDetails);
            files[index] = fileDetails;
        });

        promises.push(promise);
    });

    return Q.all(promises)
    .then(function () {
        return files;
    });
};

DecompressZip.prototype.extractFiles = function (files, options, results) {
    var promises = [];
    var self = this;

    results = results || [];
    var fileIndex = 0;
    files.forEach(function (file) {
        var promise = self.extractFile(file, options)
        .then(function (result) {
            self.emit('progress', fileIndex++, files.length);
            results.push(result);
        });

        promises.push(promise);
    });

    return Q.all(promises)
    .then(function () {
        return results;
    });
};

DecompressZip.prototype.extractFile = function (file, options) {
    var destination = path.join(options.path, file.path);

    // Possible compression methods:
    //    0 - The file is stored (no compression)
    //    1 - The file is Shrunk
    //    2 - The file is Reduced with compression factor 1
    //    3 - The file is Reduced with compression factor 2
    //    4 - The file is Reduced with compression factor 3
    //    5 - The file is Reduced with compression factor 4
    //    6 - The file is Imploded
    //    7 - Reserved for Tokenizing compression algorithm
    //    8 - The file is Deflated
    //    9 - Enhanced Deflating using Deflate64(tm)
    //   10 - PKWARE Data Compression Library Imploding (old IBM TERSE)
    //   11 - Reserved by PKWARE
    //   12 - File is compressed using BZIP2 algorithm
    //   13 - Reserved by PKWARE
    //   14 - LZMA (EFS)
    //   15 - Reserved by PKWARE
    //   16 - Reserved by PKWARE
    //   17 - Reserved by PKWARE
    //   18 - File is compressed using IBM TERSE (new)
    //   19 - IBM LZ77 z Architecture (PFS)
    //   97 - WavPack compressed data
    //   98 - PPMd version I, Rev 1

    if (file.type === 'Directory') {
        return extractors.folder(file, destination, this);
    }

    if (file.type === 'File') {
        switch (file.compressionMethod) {
        case 0:
            return extractors.store(file, destination, this);

        case 8:
            return extractors.deflate(file, destination, this);

        default:
            throw new Error('Unsupported compression type');
        }
    }

    if (file.type === 'SymbolicLink') {
        if (options.follow) {
            return extractors.copy(file, destination, this, options.path);
        } else {
            return extractors.symlink(file, destination, this, options.path);
        }
    }

    throw new Error('Unsupported file type "' + file.type + '"');
};

module.exports = DecompressZip;