// Inflater.cs // // Copyright (C) 2001 Mike Krueger // Copyright (C) 2004 John Reilly // // This file was translated from java, it was part of the GNU Classpath // Copyright (C) 2001 Free Software Foundation, Inc. // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License // as published by the Free Software Foundation; either version 2 // of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // // Linking this library statically or dynamically with other modules is // making a combined work based on this library. Thus, the terms and // conditions of the GNU General Public License cover the whole // combination. // // As a special exception, the copyright holders of this library give you // permission to link this library with independent modules to produce an // executable, regardless of the license terms of these independent // modules, and to copy and distribute the resulting executable under // terms of your choice, provided that you also meet, for each linked // independent module, the terms and conditions of the license of that // module. An independent module is a module which is not derived from // or based on this library. If you modify this library, you may extend // this exception to your version of the library, but you are not // obligated to do so. If you do not wish to do so, delete this // exception statement from your version. using System; using ICSharpCode.SharpZipLib.Checksums; using ICSharpCode.SharpZipLib.Zip.Compression.Streams; namespace ICSharpCode.SharpZipLib.Zip.Compression { /// /// Inflater is used to decompress data that has been compressed according /// to the "deflate" standard described in rfc1951. /// /// By default Zlib (rfc1950) headers and footers are expected in the input. /// You can use constructor public Inflater(bool noHeader) passing true /// if there is no Zlib header information /// /// The usage is as following. First you have to set some input with /// SetInput(), then Inflate() it. If inflate doesn't /// inflate any bytes there may be three reasons: /// /// Once the first output byte is produced, a dictionary will not be /// needed at a later stage. /// /// author of the original java version : John Leuner, Jochen Hoenicke /// public class Inflater { #region Constants/Readonly /// /// Copy lengths for literal codes 257..285 /// static readonly int[] CPLENS = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258 }; /// /// Extra bits for literal codes 257..285 /// static readonly int[] CPLEXT = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; /// /// Copy offsets for distance codes 0..29 /// static readonly int[] CPDIST = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 }; /// /// Extra bits for distance codes /// static readonly int[] CPDEXT = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 }; /// /// These are the possible states for an inflater /// const int DECODE_HEADER = 0; const int DECODE_DICT = 1; const int DECODE_BLOCKS = 2; const int DECODE_STORED_LEN1 = 3; //const int DECODE_STORED_LEN2 = 4; const int DECODE_STORED = 5; const int DECODE_DYN_HEADER = 6; const int DECODE_HUFFMAN = 7; const int DECODE_HUFFMAN_LENBITS = 8; const int DECODE_HUFFMAN_DIST = 9; const int DECODE_HUFFMAN_DISTBITS = 10; const int DECODE_CHKSUM = 11; const int FINISHED = 12; public const int DEFLATED = 8; /// /// Written to Zip file to identify a stored block /// public const int STORED_BLOCK = 0; /// /// Identifies static tree in Zip file /// public const int STATIC_TREES = 1; /// /// Identifies dynamic tree in Zip file /// public const int DYN_TREES = 2; #endregion #region Instance Fields /// /// This variable contains the current state. /// int mode; /// /// The adler checksum of the dictionary or of the decompressed /// stream, as it is written in the header resp. footer of the /// compressed stream. /// Only valid if mode is DECODE_DICT or DECODE_CHKSUM. /// int readAdler; /// /// The number of bits needed to complete the current state. This /// is valid, if mode is DECODE_DICT, DECODE_CHKSUM, /// DECODE_HUFFMAN_LENBITS or DECODE_HUFFMAN_DISTBITS. /// int neededBits; int repLength; int repDist; protected int uncomprLen; /// /// True, if the last block flag was set in the last block of the /// inflated stream. This means that the stream ends after the /// current block. /// bool isLastBlock; /// /// The total number of inflated bytes. /// long totalOut; /// /// The total number of bytes set with setInput(). This is not the /// value returned by the TotalIn property, since this also includes the /// unprocessed input. /// long totalIn; /// /// This variable stores the noHeader flag that was given to the constructor. /// True means, that the inflated stream doesn't contain a Zlib header or /// footer. /// bool noHeader; protected StreamManipulator input; OutputWindow outputWindow; InflaterDynHeader dynHeader; InflaterHuffmanTree litlenTree, distTree; Adler32 adler; #endregion #region Constructors /// /// Creates a new inflater or RFC1951 decompressor /// RFC1950/Zlib headers and footers will be expected in the input data /// public Inflater() : this(false) { } /// /// Creates a new inflater. /// /// /// True if no RFC1950/Zlib header and footer fields are expected in the input data /// /// This is used for GZIPed/Zipped input. /// /// For compatibility with /// Sun JDK you should provide one byte of input more than needed in /// this case. /// public Inflater(bool noHeader) { this.noHeader = noHeader; this.adler = new Adler32(); input = new StreamManipulator(); outputWindow = new OutputWindow(); mode = noHeader ? DECODE_BLOCKS : DECODE_HEADER; } #endregion /// /// Resets the inflater so that a new stream can be decompressed. All /// pending input and output will be discarded. /// public void Reset() { mode = noHeader ? DECODE_BLOCKS : DECODE_HEADER; totalIn = 0; totalOut = 0; input.Reset(); outputWindow.Reset(); dynHeader = null; litlenTree = null; distTree = null; isLastBlock = false; adler.Reset(); } /// /// Decodes a zlib/RFC1950 header. /// /// /// False if more input is needed. /// /// /// The header is invalid. /// private bool DecodeHeader() { int header = input.PeekBits(16); if (header < 0) { return false; } input.DropBits(16); // The header is written in "wrong" byte order header = ((header << 8) | (header >> 8)) & 0xffff; if (header % 31 != 0) { throw new SharpZipBaseException("Header checksum illegal"); } if ((header & 0x0f00) != (DEFLATED << 8)) { throw new SharpZipBaseException("Compression Method unknown"); } /* Maximum size of the backwards window in bits. * We currently ignore this, but we could use it to make the * inflater window more space efficient. On the other hand the * full window (15 bits) is needed most times, anyway. int max_wbits = ((header & 0x7000) >> 12) + 8; */ if ((header & 0x0020) == 0) { // Dictionary flag? mode = DECODE_BLOCKS; } else { mode = DECODE_DICT; neededBits = 32; } return true; } /// /// Decodes the dictionary checksum after the deflate header. /// /// /// False if more input is needed. /// private bool DecodeDict() { while (neededBits > 0) { int dictByte = input.PeekBits(8); if (dictByte < 0) { return false; } input.DropBits(8); readAdler = (readAdler << 8) | dictByte; neededBits -= 8; } return false; } /// /// Decodes the huffman encoded symbols in the input stream. /// /// /// false if more input is needed, true if output window is /// full or the current block ends. /// /// /// if deflated stream is invalid. /// private bool DecodeHuffman() { int free = outputWindow.GetFreeSpace(); while (free >= 258) { int symbol; switch (mode) { case DECODE_HUFFMAN: // This is the inner loop so it is optimized a bit while (((symbol = litlenTree.GetSymbol(input)) & ~0xff) == 0) { outputWindow.Write(symbol); if (--free < 258) { return true; } } if (symbol < 257) { if (symbol < 0) { return false; } else { // symbol == 256: end of block distTree = null; litlenTree = null; mode = DECODE_BLOCKS; return true; } } try { repLength = CPLENS[symbol - 257]; neededBits = CPLEXT[symbol - 257]; } catch (Exception) { throw new SharpZipBaseException("Illegal rep length code"); } goto case DECODE_HUFFMAN_LENBITS; // fall through case DECODE_HUFFMAN_LENBITS: if (neededBits > 0) { mode = DECODE_HUFFMAN_LENBITS; int i = input.PeekBits(neededBits); if (i < 0) { return false; } input.DropBits(neededBits); repLength += i; } mode = DECODE_HUFFMAN_DIST; goto case DECODE_HUFFMAN_DIST; // fall through case DECODE_HUFFMAN_DIST: symbol = distTree.GetSymbol(input); if (symbol < 0) { return false; } try { repDist = CPDIST[symbol]; neededBits = CPDEXT[symbol]; } catch (Exception) { throw new SharpZipBaseException("Illegal rep dist code"); } goto case DECODE_HUFFMAN_DISTBITS; // fall through case DECODE_HUFFMAN_DISTBITS: if (neededBits > 0) { mode = DECODE_HUFFMAN_DISTBITS; int i = input.PeekBits(neededBits); if (i < 0) { return false; } input.DropBits(neededBits); repDist += i; } outputWindow.Repeat(repLength, repDist); free -= repLength; mode = DECODE_HUFFMAN; break; default: throw new SharpZipBaseException("Inflater unknown mode"); } } return true; } /// /// Decodes the adler checksum after the deflate stream. /// /// /// false if more input is needed. /// /// /// If checksum doesn't match. /// private bool DecodeChksum() { while (neededBits > 0) { int chkByte = input.PeekBits(8); if (chkByte < 0) { return false; } input.DropBits(8); readAdler = (readAdler << 8) | chkByte; neededBits -= 8; } if ((int) adler.Value != readAdler) { throw new SharpZipBaseException("Adler chksum doesn't match: " + (int)adler.Value + " vs. " + readAdler); } mode = FINISHED; return false; } /// /// Decodes the deflated stream. /// /// /// false if more input is needed, or if finished. /// /// /// if deflated stream is invalid. /// private bool Decode() { switch (mode) { case DECODE_HEADER: return DecodeHeader(); case DECODE_DICT: return DecodeDict(); case DECODE_CHKSUM: return DecodeChksum(); case DECODE_BLOCKS: if (isLastBlock) { if (noHeader) { mode = FINISHED; return false; } else { input.SkipToByteBoundary(); neededBits = 32; mode = DECODE_CHKSUM; return true; } } int blockType; if (!ReadHeader(ref isLastBlock, out blockType)) { return false; } switch (blockType){ case STORED_BLOCK: input.SkipToByteBoundary(); mode = DECODE_STORED_LEN1; break; case STATIC_TREES: litlenTree = InflaterHuffmanTree.defLitLenTree; distTree = InflaterHuffmanTree.defDistTree; mode = DECODE_HUFFMAN; break; case DYN_TREES: dynHeader = new InflaterDynHeader(); mode = DECODE_DYN_HEADER; break; default: throw new SharpZipBaseException("Unknown block type " + blockType); } return true; case DECODE_STORED_LEN1: if (!DecodeStoredLength()) { return false; } mode = DECODE_STORED; goto case DECODE_STORED; // fall through case DECODE_STORED: { int more = outputWindow.CopyStored(input, uncomprLen); uncomprLen -= more; if (uncomprLen == 0) { mode = DECODE_BLOCKS; return true; } return !input.IsNeedingInput; } case DECODE_DYN_HEADER: if (!dynHeader.Decode(input)) { return false; } litlenTree = dynHeader.BuildLitLenTree(); distTree = dynHeader.BuildDistTree(); mode = DECODE_HUFFMAN; goto case DECODE_HUFFMAN; // fall through case DECODE_HUFFMAN: case DECODE_HUFFMAN_LENBITS: case DECODE_HUFFMAN_DIST: case DECODE_HUFFMAN_DISTBITS: return DecodeHuffman(); case FINISHED: return false; default: throw new SharpZipBaseException("Inflater.Decode unknown mode"); } } protected virtual bool ReadHeader(ref bool isLastBlock, out int blockType) { int type = input.PeekBits(3); if (type < 0) { blockType = -1; return false; } input.DropBits(3); if ((type & 1) != 0) { isLastBlock = true; } blockType = type >> 1; return true; } protected virtual bool DecodeStoredLength() { if ((uncomprLen = input.PeekBits(16)) < 0) { return false; } input.DropBits(16); int nlen = input.PeekBits(16); if (nlen < 0) { return false; } input.DropBits(16); if (nlen != (uncomprLen ^ 0xffff)) { throw new SharpZipBaseException("broken uncompressed block"); } return true; } /// /// Sets the preset dictionary. This should only be called, if /// needsDictionary() returns true and it should set the same /// dictionary, that was used for deflating. The getAdler() /// function returns the checksum of the dictionary needed. /// /// /// The dictionary. /// public void SetDictionary(byte[] buffer) { SetDictionary(buffer, 0, buffer.Length); } /// /// Sets the preset dictionary. This should only be called, if /// needsDictionary() returns true and it should set the same /// dictionary, that was used for deflating. The getAdler() /// function returns the checksum of the dictionary needed. /// /// /// The dictionary. /// /// /// The index into buffer where the dictionary starts. /// /// /// The number of bytes in the dictionary. /// /// /// No dictionary is needed. /// /// /// The adler checksum for the buffer is invalid /// public void SetDictionary(byte[] buffer, int index, int count) { if ( buffer == null ) { throw new ArgumentNullException("buffer"); } if ( index < 0 ) { throw new ArgumentOutOfRangeException("index"); } if ( count < 0 ) { throw new ArgumentOutOfRangeException("count"); } if (!IsNeedingDictionary) { throw new InvalidOperationException("Dictionary is not needed"); } adler.Update(buffer, index, count); if ((int)adler.Value != readAdler) { throw new SharpZipBaseException("Wrong adler checksum"); } adler.Reset(); outputWindow.CopyDict(buffer, index, count); mode = DECODE_BLOCKS; } /// /// Sets the input. This should only be called, if needsInput() /// returns true. /// /// /// the input. /// public void SetInput(byte[] buffer) { SetInput(buffer, 0, buffer.Length); } /// /// Sets the input. This should only be called, if needsInput() /// returns true. /// /// /// The source of input data /// /// /// The index into buffer where the input starts. /// /// /// The number of bytes of input to use. /// /// /// No input is needed. /// /// /// The index and/or count are wrong. /// public void SetInput(byte[] buffer, int index, int count) { input.SetInput(buffer, index, count); totalIn += (long)count; } /// /// Inflates the compressed stream to the output buffer. If this /// returns 0, you should check, whether IsNeedingDictionary(), /// IsNeedingInput() or IsFinished() returns true, to determine why no /// further output is produced. /// /// /// the output buffer. /// /// /// The number of bytes written to the buffer, 0 if no further /// output can be produced. /// /// /// if buffer has length 0. /// /// /// if deflated stream is invalid. /// public int Inflate(byte[] buffer) { if ( buffer == null ) { throw new ArgumentNullException("buffer"); } return Inflate(buffer, 0, buffer.Length); } /// /// Inflates the compressed stream to the output buffer. If this /// returns 0, you should check, whether needsDictionary(), /// needsInput() or finished() returns true, to determine why no /// further output is produced. /// /// /// the output buffer. /// /// /// the offset in buffer where storing starts. /// /// /// the maximum number of bytes to output. /// /// /// the number of bytes written to the buffer, 0 if no further output can be produced. /// /// /// if count is less than 0. /// /// /// if the index and / or count are wrong. /// /// /// if deflated stream is invalid. /// public int Inflate(byte[] buffer, int offset, int count) { if ( buffer == null ) { throw new ArgumentNullException("buffer"); } if ( count < 0 ) { #if NETCF_1_0 throw new ArgumentOutOfRangeException("count"); #else throw new ArgumentOutOfRangeException("count", "count cannot be negative"); #endif } if ( offset < 0 ) { #if NETCF_1_0 throw new ArgumentOutOfRangeException("offset"); #else throw new ArgumentOutOfRangeException("offset", "offset cannot be negative"); #endif } if ( offset + count > buffer.Length ) { throw new ArgumentException("count exceeds buffer bounds"); } // Special case: count may be zero if (count == 0) { if (!IsFinished) { // -jr- 08-Nov-2003 INFLATE_BUG fix.. Decode(); } return 0; } int bytesCopied = 0; do { if (mode != DECODE_CHKSUM) { /* Don't give away any output, if we are waiting for the * checksum in the input stream. * * With this trick we have always: * IsNeedingInput() and not IsFinished() * implies more output can be produced. */ int more = outputWindow.CopyOutput(buffer, offset, count); if ( more > 0 ) { adler.Update(buffer, offset, more); offset += more; bytesCopied += more; totalOut += (long)more; count -= more; if (count == 0) { return bytesCopied; } } } } while (Decode() || ((outputWindow.GetAvailable() > 0) && (mode != DECODE_CHKSUM))); return bytesCopied; } /// /// Returns true, if the input buffer is empty. /// You should then call setInput(). /// NOTE: This method also returns true when the stream is finished. /// public bool IsNeedingInput { get { return input.IsNeedingInput; } } /// /// Returns true, if a preset dictionary is needed to inflate the input. /// public bool IsNeedingDictionary { get { return mode == DECODE_DICT && neededBits == 0; } } /// /// Returns true, if the inflater has finished. This means, that no /// input is needed and no output can be produced. /// public bool IsFinished { get { return mode == FINISHED && outputWindow.GetAvailable() == 0; } } /// /// Gets the adler checksum. This is either the checksum of all /// uncompressed bytes returned by inflate(), or if needsDictionary() /// returns true (and thus no output was yet produced) this is the /// adler checksum of the expected dictionary. /// /// /// the adler checksum. /// public int Adler { get { return IsNeedingDictionary ? readAdler : (int) adler.Value; } } /// /// Gets the total number of output bytes returned by Inflate(). /// /// /// the total number of output bytes. /// public long TotalOut { get { return totalOut; } } /// /// Gets the total number of processed compressed input bytes. /// /// /// The total number of bytes of processed input bytes. /// public long TotalIn { get { return totalIn - (long)RemainingInput; } } /// /// Gets the number of unprocessed input bytes. Useful, if the end of the /// stream is reached and you want to further process the bytes after /// the deflate stream. /// /// /// The number of bytes of the input which have not been processed. /// public int RemainingInput { // TODO: This should be a long? get { return input.AvailableBytes; } } } }