main
Raw Download raw file
  1// Copyright 2014-2022 Ulrich Kunitz. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package lzma
  6
  7import (
  8	"errors"
  9	"fmt"
 10	"io"
 11)
 12
 13// decoder decodes a raw LZMA stream without any header.
 14type decoder struct {
 15	// dictionary; the rear pointer of the buffer will be used for
 16	// reading the data.
 17	Dict *decoderDict
 18	// decoder state
 19	State *state
 20	// range decoder
 21	rd *rangeDecoder
 22	// start stores the head value of the dictionary for the LZMA
 23	// stream
 24	start int64
 25	// size of uncompressed data
 26	size int64
 27	// end-of-stream encountered
 28	eos bool
 29	// EOS marker found
 30	eosMarker bool
 31}
 32
 33// newDecoder creates a new decoder instance. The parameter size provides
 34// the expected byte size of the decompressed data. If the size is
 35// unknown use a negative value. In that case the decoder will look for
 36// a terminating end-of-stream marker.
 37func newDecoder(br io.ByteReader, state *state, dict *decoderDict, size int64) (d *decoder, err error) {
 38	rd, err := newRangeDecoder(br)
 39	if err != nil {
 40		return nil, err
 41	}
 42	d = &decoder{
 43		State: state,
 44		Dict:  dict,
 45		rd:    rd,
 46		size:  size,
 47		start: dict.pos(),
 48	}
 49	return d, nil
 50}
 51
 52// Reopen restarts the decoder with a new byte reader and a new size. Reopen
 53// resets the Decompressed counter to zero.
 54func (d *decoder) Reopen(br io.ByteReader, size int64) error {
 55	var err error
 56	if d.rd, err = newRangeDecoder(br); err != nil {
 57		return err
 58	}
 59	d.start = d.Dict.pos()
 60	d.size = size
 61	d.eos = false
 62	return nil
 63}
 64
 65// decodeLiteral decodes a single literal from the LZMA stream.
 66func (d *decoder) decodeLiteral() (op operation, err error) {
 67	litState := d.State.litState(d.Dict.byteAt(1), d.Dict.head)
 68	match := d.Dict.byteAt(int(d.State.rep[0]) + 1)
 69	s, err := d.State.litCodec.Decode(d.rd, d.State.state, match, litState)
 70	if err != nil {
 71		return nil, err
 72	}
 73	return lit{s}, nil
 74}
 75
 76// errEOS indicates that an EOS marker has been found.
 77var errEOS = errors.New("EOS marker found")
 78
 79// readOp decodes the next operation from the compressed stream. It
 80// returns the operation. If an explicit end of stream marker is
 81// identified the eos error is returned.
 82func (d *decoder) readOp() (op operation, err error) {
 83	// Value of the end of stream (EOS) marker
 84	const eosDist = 1<<32 - 1
 85
 86	state, state2, posState := d.State.states(d.Dict.head)
 87
 88	b, err := d.State.isMatch[state2].Decode(d.rd)
 89	if err != nil {
 90		return nil, err
 91	}
 92	if b == 0 {
 93		// literal
 94		op, err := d.decodeLiteral()
 95		if err != nil {
 96			return nil, err
 97		}
 98		d.State.updateStateLiteral()
 99		return op, nil
100	}
101	b, err = d.State.isRep[state].Decode(d.rd)
102	if err != nil {
103		return nil, err
104	}
105	if b == 0 {
106		// simple match
107		d.State.rep[3], d.State.rep[2], d.State.rep[1] =
108			d.State.rep[2], d.State.rep[1], d.State.rep[0]
109
110		d.State.updateStateMatch()
111		// The length decoder returns the length offset.
112		n, err := d.State.lenCodec.Decode(d.rd, posState)
113		if err != nil {
114			return nil, err
115		}
116		// The dist decoder returns the distance offset. The actual
117		// distance is 1 higher.
118		d.State.rep[0], err = d.State.distCodec.Decode(d.rd, n)
119		if err != nil {
120			return nil, err
121		}
122		if d.State.rep[0] == eosDist {
123			d.eosMarker = true
124			return nil, errEOS
125		}
126		op = match{n: int(n) + minMatchLen,
127			distance: int64(d.State.rep[0]) + minDistance}
128		return op, nil
129	}
130	b, err = d.State.isRepG0[state].Decode(d.rd)
131	if err != nil {
132		return nil, err
133	}
134	dist := d.State.rep[0]
135	if b == 0 {
136		// rep match 0
137		b, err = d.State.isRepG0Long[state2].Decode(d.rd)
138		if err != nil {
139			return nil, err
140		}
141		if b == 0 {
142			d.State.updateStateShortRep()
143			op = match{n: 1, distance: int64(dist) + minDistance}
144			return op, nil
145		}
146	} else {
147		b, err = d.State.isRepG1[state].Decode(d.rd)
148		if err != nil {
149			return nil, err
150		}
151		if b == 0 {
152			dist = d.State.rep[1]
153		} else {
154			b, err = d.State.isRepG2[state].Decode(d.rd)
155			if err != nil {
156				return nil, err
157			}
158			if b == 0 {
159				dist = d.State.rep[2]
160			} else {
161				dist = d.State.rep[3]
162				d.State.rep[3] = d.State.rep[2]
163			}
164			d.State.rep[2] = d.State.rep[1]
165		}
166		d.State.rep[1] = d.State.rep[0]
167		d.State.rep[0] = dist
168	}
169	n, err := d.State.repLenCodec.Decode(d.rd, posState)
170	if err != nil {
171		return nil, err
172	}
173	d.State.updateStateRep()
174	op = match{n: int(n) + minMatchLen, distance: int64(dist) + minDistance}
175	return op, nil
176}
177
178// apply takes the operation and transforms the decoder dictionary accordingly.
179func (d *decoder) apply(op operation) error {
180	var err error
181	switch x := op.(type) {
182	case match:
183		err = d.Dict.writeMatch(x.distance, x.n)
184	case lit:
185		err = d.Dict.WriteByte(x.b)
186	default:
187		panic("op is neither a match nor a literal")
188	}
189	return err
190}
191
192// decompress fills the dictionary unless no space for new data is
193// available. If the end of the LZMA stream has been reached io.EOF will
194// be returned.
195func (d *decoder) decompress() error {
196	if d.eos {
197		return io.EOF
198	}
199	for d.Dict.Available() >= maxMatchLen {
200		op, err := d.readOp()
201		switch err {
202		case nil:
203			// break
204		case errEOS:
205			d.eos = true
206			if !d.rd.possiblyAtEnd() {
207				return errDataAfterEOS
208			}
209			if d.size >= 0 && d.size != d.Decompressed() {
210				return errSize
211			}
212			return io.EOF
213		case io.EOF:
214			d.eos = true
215			return io.ErrUnexpectedEOF
216		default:
217			return err
218		}
219		if err = d.apply(op); err != nil {
220			return err
221		}
222		if d.size >= 0 && d.Decompressed() >= d.size {
223			d.eos = true
224			if d.Decompressed() > d.size {
225				return errSize
226			}
227			if !d.rd.possiblyAtEnd() {
228				switch _, err = d.readOp(); err {
229				case nil:
230					return errSize
231				case io.EOF:
232					return io.ErrUnexpectedEOF
233				case errEOS:
234					break
235				default:
236					return err
237				}
238			}
239			return io.EOF
240		}
241	}
242	return nil
243}
244
245// Errors that may be returned while decoding data.
246var (
247	errDataAfterEOS = errors.New("lzma: data after end of stream marker")
248	errSize         = errors.New("lzma: wrong uncompressed data size")
249)
250
251// Read reads data from the buffer. If no more data is available io.EOF is
252// returned.
253func (d *decoder) Read(p []byte) (n int, err error) {
254	var k int
255	for {
256		// Read of decoder dict never returns an error.
257		k, err = d.Dict.Read(p[n:])
258		if err != nil {
259			panic(fmt.Errorf("dictionary read error %s", err))
260		}
261		if k == 0 && d.eos {
262			return n, io.EOF
263		}
264		n += k
265		if n >= len(p) {
266			return n, nil
267		}
268		if err = d.decompress(); err != nil && err != io.EOF {
269			return n, err
270		}
271	}
272}
273
274// Decompressed returns the number of bytes decompressed by the decoder.
275func (d *decoder) Decompressed() int64 {
276	return d.Dict.pos() - d.start
277}