closed-social
/
gitea

package unsnap
import (	"bytes"	"encoding/binary"	"fmt"	"io"	"io/ioutil"	"os"
	"hash/crc32"
	snappy "github.com/golang/snappy"	// The C library can be used, but this makes the binary dependent
	// lots of extraneous c-libraries; it is no longer stand-alone. Yuck.
	//
	// Therefore we comment out the "dgryski/go-csnappy" path and use the
	// "github.com/golang/snappy/snappy" above instead. If you are
	// performance limited and can deal with distributing more libraries,
	// then this is easy to swap.
	//
	// If you swap, note that some of the tests won't pass
	// because snappy-go produces slightly different (but still
	// conformant) encodings on some data. Here are bindings
	// to the C-snappy:
	// snappy "github.com/dgryski/go-csnappy"
)
// SnappyFile: create a drop-in-replacement/wrapper for an *os.File that handles doing the unsnappification online as more is read from it

type SnappyFile struct {	Fname string
	Reader io.Reader	Writer io.Writer
	// allow clients to substitute us for an os.File and just switch
	// off compression if they don't want it.
	SnappyEncodeDecodeOff bool // if true, we bypass straight to Filep

	EncBuf FixedSizeRingBuf // holds any extra that isn't yet returned, encoded
	DecBuf FixedSizeRingBuf // holds any extra that isn't yet returned, decoded

	// for writing to stream-framed snappy
	HeaderChunkWritten bool
	// Sanity check: we can only read, or only write, to one SnappyFile.
	// EncBuf and DecBuf are used differently in each mode. Verify
	// that we are consistent with this flag.
	Writing bool}
var total int
// for debugging, show state of buffers
func (f *SnappyFile) Dump() {	fmt.Printf("EncBuf has length %d and contents:\n%s\n", len(f.EncBuf.Bytes()), string(f.EncBuf.Bytes()))	fmt.Printf("DecBuf has length %d and contents:\n%s\n", len(f.DecBuf.Bytes()), string(f.DecBuf.Bytes()))}
func (f *SnappyFile) Read(p []byte) (n int, err error) {
	if f.SnappyEncodeDecodeOff {		return f.Reader.Read(p)	}
	if f.Writing {		panic("Reading on a write-only SnappyFile")	}
	// before we unencrypt more, try to drain the DecBuf first
	n, _ = f.DecBuf.Read(p)	if n > 0 {		total += n		return n, nil	}
	//nEncRead, nDecAdded, err := UnsnapOneFrame(f.Filep, &f.EncBuf, &f.DecBuf, f.Fname)
	_, _, err = UnsnapOneFrame(f.Reader, &f.EncBuf, &f.DecBuf, f.Fname)	if err != nil && err != io.EOF {		panic(err)	}
	n, _ = f.DecBuf.Read(p)
	if n > 0 {		total += n		return n, nil	}	if f.DecBuf.Readable == 0 {		if f.DecBuf.Readable == 0 && f.EncBuf.Readable == 0 {			// only now (when EncBuf is empty) can we give io.EOF.
			// Any earlier, and we leave stuff un-decoded!
			return 0, io.EOF		}	}	return 0, nil}
func Open(name string) (file *SnappyFile, err error) {	fp, err := os.Open(name)	if err != nil {		return nil, err	}	// encoding in snappy can apparently go beyond the original size, so
	// we make our buffers big enough, 2*max snappy chunk => 2 * CHUNK_MAX(65536)

	snap := NewReader(fp)	snap.Fname = name	return snap, nil}
func NewReader(r io.Reader) *SnappyFile {	return &SnappyFile{		Reader:  r,		EncBuf:  *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snappy encoded bytes
		DecBuf:  *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snapppy decoded bytes
		Writing: false,	}}
func NewWriter(w io.Writer) *SnappyFile {	return &SnappyFile{		Writer:  w,		EncBuf:  *NewFixedSizeRingBuf(65536),     // on writing: temp for testing compression
		DecBuf:  *NewFixedSizeRingBuf(65536 * 2), // on writing: final buffer of snappy framed and encoded bytes
		Writing: true,	}}
func Create(name string) (file *SnappyFile, err error) {	fp, err := os.Create(name)	if err != nil {		return nil, err	}	snap := NewWriter(fp)	snap.Fname = name	return snap, nil}
func (f *SnappyFile) Close() error {	if f.Writing {		wc, ok := f.Writer.(io.WriteCloser)		if ok {			return wc.Close()		}		return nil	}	rc, ok := f.Reader.(io.ReadCloser)	if ok {		return rc.Close()	}	return nil}
func (f *SnappyFile) Sync() error {	file, ok := f.Writer.(*os.File)	if ok {		return file.Sync()	}	return nil}
// for an increment of a frame at a time:
// read from r into encBuf (encBuf is still encoded, thus the name), and write unsnappified frames into outDecodedBuf
//  the returned n: number of bytes read from the encrypted encBuf
func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedSizeRingBuf, fname string) (nEnc int64, nDec int64, err error) {	//	b, err := ioutil.ReadAll(r)
	//	if err != nil {
	//		panic(err)
	//	}

	nEnc = 0	nDec = 0
	// read up to 65536 bytes from r into encBuf, at least a snappy frame
	nread, err := io.CopyN(encBuf, r, 65536) // returns nwrotebytes, err
	nEnc += nread	if err != nil {		if err == io.EOF {			if nread == 0 {				if encBuf.Readable == 0 {					return nEnc, nDec, io.EOF				}				// else we have bytes in encBuf, so decode them!
				err = nil			} else {				// continue below, processing the nread bytes
				err = nil			}		} else {			panic(err)		}	}
	// flag for printing chunk size alignment messages
	verbose := false
	const snappyStreamHeaderSz = 10	const headerSz = 4	const crc32Sz = 4	// the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
	// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt

	chunk := (*encBuf).Bytes()
	// however we exit, advance as
	//	defer func() { (*encBuf).Next(N) }()

	// 65536 is the max size of a snappy framed chunk. See
	// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
	// buf := make([]byte, 65536)

	//	fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
	//	fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)

	//fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])

	fourbytes := make([]byte, 4)	chunkCount := 0
	for nDec < 65536 {		if len(chunk) == 0 {			break		}		chunkCount++		fourbytes[3] = 0		copy(fourbytes, chunk[1:4])		chunksz := binary.LittleEndian.Uint32(fourbytes)		chunk_type := chunk[0]
		switch true {		case chunk_type == 0xff:			{ // stream identifier

				streamHeader := chunk[:snappyStreamHeaderSz]				if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {					panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")				} else {					//fmt.Printf("got streaming snappy magic header just fine.\n")
				}				chunk = chunk[snappyStreamHeaderSz:]				(*encBuf).Advance(snappyStreamHeaderSz)				nEnc += snappyStreamHeaderSz				continue			}		case chunk_type == 0x00:			{ // compressed data
				if verbose {					fmt.Fprintf(os.Stderr, "chunksz is %d  while  total bytes avail are: %d\n", int(chunksz), len(chunk)-4)				}
				crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])				section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
				dec, ok := snappy.Decode(nil, section)				if ok != nil {					// we've probably truncated a snappy frame at this point
					// ok=snappy: corrupt input
					// len(dec) == 0
					//
					panic(fmt.Sprintf("could not decode snappy stream: '%s' and len dec=%d and ok=%v\n", fname, len(dec), ok))
					// get back to caller with what we've got so far
					return nEnc, nDec, nil				}				//	fmt.Printf("ok, b is %#v , %#v\n", ok, dec)

				// spit out decoded text
				// n, err := w.Write(dec)
				//fmt.Printf("len(dec) = %d,   outDecodedBuf.Readable=%d\n", len(dec), outDecodedBuf.Readable)
				bnb := bytes.NewBuffer(dec)				n, err := io.Copy(outDecodedBuf, bnb)				if err != nil {					//fmt.Printf("got n=%d, err= %s ; when trying to io.Copy(outDecodedBuf: N=%d, Readable=%d)\n", n, err, outDecodedBuf.N, outDecodedBuf.Readable)
					panic(err)				}				if n != int64(len(dec)) {					panic("could not write all bytes to outDecodedBuf")				}				nDec += n
				// verify the crc32 rotated checksum
				m32 := masked_crc32c(dec)				if m32 != crc {					panic(fmt.Sprintf("crc32 masked failiure. expected: %v but got: %v", crc, m32))				} else {					//fmt.Printf("\nchecksums match: %v == %v\n", crc, m32)
				}
				// move to next header
				inc := (headerSz + int(chunksz))				chunk = chunk[inc:]				(*encBuf).Advance(inc)				nEnc += int64(inc)				continue			}		case chunk_type == 0x01:			{ // uncompressed data

				//n, err := w.Write(chunk[(headerSz+crc32Sz):(headerSz + int(chunksz))])
				n, err := io.Copy(outDecodedBuf, bytes.NewBuffer(chunk[(headerSz+crc32Sz):(headerSz+int(chunksz))]))				if verbose {					//fmt.Printf("debug: n=%d  err=%v  chunksz=%d  outDecodedBuf='%v'\n", n, err, chunksz, outDecodedBuf)
				}				if err != nil {					panic(err)				}				if n != int64(chunksz-crc32Sz) {					panic("could not write all bytes to stdout")				}				nDec += n
				inc := (headerSz + int(chunksz))				chunk = chunk[inc:]				(*encBuf).Advance(inc)				nEnc += int64(inc)				continue			}		case chunk_type == 0xfe:			fallthrough // padding, just skip it
		case chunk_type >= 0x80 && chunk_type <= 0xfd:			{ //  Reserved skippable chunks
				//fmt.Printf("\nin reserved skippable chunks, at nEnc=%v\n", nEnc)
				inc := (headerSz + int(chunksz))				chunk = chunk[inc:]				nEnc += int64(inc)				(*encBuf).Advance(inc)				continue			}
		default:			panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))		}
	} // end for{}

	return nEnc, nDec, err	//return int64(N), nil
}
// for whole file at once:
//
// receive on stdin a stream of bytes in the snappy-streaming framed
//  format, defined here: http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
// Grab each frame, run it through the snappy decoder, and spit out
//  each frame all joined back-to-back on stdout.
//
func Unsnappy(r io.Reader, w io.Writer) (err error) {	b, err := ioutil.ReadAll(r)	if err != nil {		panic(err)	}
	// flag for printing chunk size alignment messages
	verbose := false
	const snappyStreamHeaderSz = 10	const headerSz = 4	const crc32Sz = 4	// the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
	// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt

	chunk := b[:]
	// 65536 is the max size of a snappy framed chunk. See
	// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
	//buf := make([]byte, 65536)

	//	fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
	//	fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)

	//fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])

	fourbytes := make([]byte, 4)	chunkCount := 0
	for {		if len(chunk) == 0 {			break		}		chunkCount++		fourbytes[3] = 0		copy(fourbytes, chunk[1:4])		chunksz := binary.LittleEndian.Uint32(fourbytes)		chunk_type := chunk[0]
		switch true {		case chunk_type == 0xff:			{ // stream identifier

				streamHeader := chunk[:snappyStreamHeaderSz]				if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {					panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")				} else {					//fmt.Printf("got streaming snappy magic header just fine.\n")
				}				chunk = chunk[snappyStreamHeaderSz:]				continue			}		case chunk_type == 0x00:			{ // compressed data
				if verbose {					fmt.Fprintf(os.Stderr, "chunksz is %d  while  total bytes avail are: %d\n", int(chunksz), len(chunk)-4)				}
				//crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
				section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
				dec, ok := snappy.Decode(nil, section)				if ok != nil {					panic("could not decode snappy stream")				}				//	fmt.Printf("ok, b is %#v , %#v\n", ok, dec)

				// spit out decoded text
				n, err := w.Write(dec)				if err != nil {					panic(err)				}				if n != len(dec) {					panic("could not write all bytes to stdout")				}
				// TODO: verify the crc32 rotated checksum?

				// move to next header
				chunk = chunk[(headerSz + int(chunksz)):]				continue			}		case chunk_type == 0x01:			{ // uncompressed data

				//crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
				section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
				n, err := w.Write(section)				if err != nil {					panic(err)				}				if n != int(chunksz-crc32Sz) {					panic("could not write all bytes to stdout")				}
				chunk = chunk[(headerSz + int(chunksz)):]				continue			}		case chunk_type == 0xfe:			fallthrough // padding, just skip it
		case chunk_type >= 0x80 && chunk_type <= 0xfd:			{ //  Reserved skippable chunks
				chunk = chunk[(headerSz + int(chunksz)):]				continue			}
		default:			panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))		}
	} // end for{}

	return nil}
// 0xff 0x06 0x00 0x00 sNaPpY
var SnappyStreamHeaderMagic = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}
const CHUNK_MAX = 65536const _STREAM_TO_STREAM_BLOCK_SIZE = CHUNK_MAXconst _STREAM_IDENTIFIER = `sNaPpY`const _COMPRESSED_CHUNK = 0x00const _UNCOMPRESSED_CHUNK = 0x01const _IDENTIFIER_CHUNK = 0xffconst _RESERVED_UNSKIPPABLE0 = 0x02 // chunk ranges are [inclusive, exclusive)
const _RESERVED_UNSKIPPABLE1 = 0x80const _RESERVED_SKIPPABLE0 = 0x80const _RESERVED_SKIPPABLE1 = 0xff
// the minimum percent of bytes compression must save to be enabled in automatic
// mode
const _COMPRESSION_THRESHOLD = .125
var crctab *crc32.Table
func init() {	crctab = crc32.MakeTable(crc32.Castagnoli) // this is correct table, matches the crc32c.c code used by python
}
func masked_crc32c(data []byte) uint32 {
	// see the framing format specification, http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
	var crc uint32 = crc32.Checksum(data, crctab)	return (uint32((crc>>15)|(crc<<17)) + 0xa282ead8)}
func ReadSnappyStreamCompressedFile(filename string) ([]byte, error) {
	snappyFile, err := Open(filename)	if err != nil {		return []byte{}, err	}
	var bb bytes.Buffer	_, err = bb.ReadFrom(snappyFile)	if err == io.EOF {		err = nil	}	if err != nil {		panic(err)	}
	return bb.Bytes(), err}