#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <ctype.h>
#include <time.h>

/* Pu-crunch 1997 by Pasi 'Albert' Ojala, albert@cs.tut.fi */



/* #define BIG */
/*
    Define BIG for >64k files.
    It will use even more *huge* amounts of memory.

    Note:
    Although this version uses memory proportionally to the file length,
    it is possible to use fixed-size buffers. The LZ77 history buffer
    (and backSkip) needs to be as long as is needed, the other buffers
    minimally need to be about three times the length of the maximum
    LZ77 match. Writing the compressor this way would probably make it a
    little slower, and automatic selection of e.g. escape bits might not be
    practical.

    Adjusting the number of escape bits to adapt to local
    changes in the data would be worth investigating.

    Also, the memory needed for rle/elr tables could probably be reduced
    by using a sparse table implementation. Because of the RLE property
    only the starting and ending points (or lengths) need be saved. The
    speed should not decrease too much, because the tables are used in
    LZ77 string match also.... Well, actually no, as the RLE/LZ77 optimize
    needs to change the RLE lengths inside RLE's...

    The elr array can be reduced to half by storing only the byte that
    is before a run of bytes if we have the full backSkip table..

    Because the lzlen maximum value is 256, we could reduce the table
    from unsigned short to unsigned char by encoding 0->0, 2->1, .. 256->255.
    lzlen of the value 1 is never used anyway..

 */

/*#define ENABLE_VERBOSE*/  /* -v outputs the lz77/rle data to stdout */
#define HASH_STAT	    /* gives statistics about the hash compares */
#define BACKSKIP_FULL       /* full backSkip table - enables RESCAN. If */
                            /* not defined, backSkip only uses max 128kB */
#define RESCAN		    /* rescans LZ77 matches for a closer match. */
/*#define COND_PROB*/	    /* shows conditional probabilities for the next byte */
/*#define ESC_STATS*/
/*#define SHOW_GREEDY*/     /* compare to a greedy selection */
/*#define HASH_COMPARE*/    /* Use a 3-byte-to-1-byte hash to skip impossible matches */
/* takes "inbytes" bytes, reduces string compares from 16% to 8% */

const char version[] = "\0$VER: pucrunch 1.3 18-Sep-97\n";


/*
    26.2.1997
    One byte-pair history buffer gives >30% shorter time
    compared to a single-byte history buffer. (28->17)

    28.2.1997
    Calculate hash values (byte) for each threes of
    bytes for faster search, and use the 2-byte history to
    locate the last occurrance of the 2 bytes to get the
    minimal LZ sequence. -> 50% shorter time (17 -> 8)

    'Reworded' some of the code to help the compiler generate
    better code, although it still is not quite 'optimal'..
    Progress reports halved.
    Checks the hash value at old maxval before checking the
    actual bytes. -> 20% shorter time (8 -> 6.5)

    77% shorter time total (28->6.5)

    1.3.1997
    Made all internal functions static. Now uses register
    parameters for calls. Removed "lastPair" init, because
    calloc() handles it (saves 65536 byte-writes).
    7% shorter time (6.5->6)

    Removed the hash buffer. Added a table which extends the
    lastPair functionality. The table backSkip chains the
    positions with the same char pairs.
    80% shorter time (6 min -> 1min 10s)

    I put the hash buffer back. However, I don't know the
    effect, because I also added counters for the byte
    and hash value compares to see how many are actually
    done. The counter updates of course slows down the
    compression a bit.


    5.3.1997
    Tried reverse LZ, i.e. mirrored history buffer. Gained
    some bytes, but not really worth it, i.e. the compress
    time increases hugely.. (Works best with sin/cos tables
    and other symmetrical data.)

    6.3.1997
    Tried to have a code to use the last LZ copy position
    (offset added to the lastly used LZ copy position).
    On bs.run I gained 57 bytes, but in fact the net gain
    was only 2 bytes (uncruncher becomes ~25 bytes longer,
    and the lengthening of the 0-rle/long rle codes takes
    away the rest 30).

    10.3.1997
    Discovered that my representation of integers 1-63 is
    in fact an Elias Gamma Code. Thought of using Fibonacci
    code instead, but it would be much worse (~500 bytes on
    bs.run, ~300 bytes on delenn.run) without even counting
    the expansion of the uncrunch code.

    11.3.1997
    Replaced the linear code in 0-rle with the same 'Gamma'
    code than is used elsewhere. The optimizer now picks the
    codings differently and gains ~15 bytes on bs.run, but
    only 4 on delenn.run.

    12.3.1997
    'huffman' coded RLE byte -> ~70 bytes gain for bs.run.
    -> no separate 0-rle. The RLE bytes used are ranked, and
    top 15 are put into a table, which is indexed by the
    Elias Gamma Code. Other RLE bytes get a prefix "1111".

    15.3.1997
    The number of escape bits used is again selectable.
    Using only one escape bit for delenn.run gains ~150 bytes.
    If #-option is not selected, automatically selects the
    number of escape bits (is a bit slow).

    16.3.1997
    Changed some arrays to short. 17 x inlen + 64kB memory
    used. OptimizeEscape() only needs two 16-element arrays now
    and is slightly faster. The RLE byte code was changed
    to contain the first 31 top bytes instead of 15.

    31.3.1997
    Tried to use BASIC ROM as a 'codebook', but the results
    were not so good. For mostly-graphics files there are no
    long matches -> no net gain, for mostly-code files the
    file itself gives a better codebook.. Not to mention that
    using the BASIC ROM as a codebook is not 100% compatible.

    1.4.1997
    Tried maxlen 128, but it only gained 17 bytes on ivanova.run,
    and lost ~15 byte on bs.run. This also increased the LZPOS
    maximum value from ~16k to ~32k, but that also had little
    effect.

    2.4.1997
    Changed to coding so that LZ77 has the priority when RLE
    and LZ77 are equally 'good'. 2-byte LZ matches are coded
    in a special way without big loss in efficiency, and codes
    also RLE/Escape.
	bs.run		85 bytes (26584 -> 26499)
	delenn.run	-6 bytes (19977 -> 19983)
	sheridan.run	94 bytes (12778 -> 12684)
	ivanova.run	77 bytes (10054 -> 9977)

    5.4.1997
    Tried 'histogram normalization' on LZLEN, but it really
    did not gain much of anything, not even counting the mapping
    table from index to value that is needed.

    11.4.1997
    8..14 bit LZPOS base part. 'Automatic' selection. Some
    more bytes are gained if the proper selection is done
    before the LZ/RLELEN optimization. However, it can't
    really be done automatically before that, because it
    is a recursive process and the original LZ/RLE lengths
    are lost in the first optimization..

    11.4.1997
    Tried again maxlen 128, but it still worked the same
    as before -> negligible gain/loss.

    14.4.1997
    LZ77 searched for only locations that has RLE < 8
    (instead of < 64): -> time 21 min->7 min, size 9924 -> 10048
    for RLE < 16: -> 21 min->11 min, 9924 -> 9977
    for RLE < 32: -> 21 min->18 min, 9924 -> 9943
    ==> Use the original 'optimal' system..

    22.4.1997
    Found a way to speed up the 'almost pathological'
    cases by using the RLE table to skip the matching
    beginnings.

    2.5.1997
    Switched to maximum length of 128 mainly to get better
    results on the Calgary Corpus test suite. Only loses a
    dozen bytes on some of my 'real' test files (bs/delenn),
    and gains a dozen bytes on the others..

    25.5.1997
    Made the maximum length adjustable. %5, %6, and %7
    select 64, 128 and 256 respectively. The decompression
    code now allows escape bits from 0 to 8.

    1.6.1997
    Optimized the escape optimization routine. It now takes
    almost no time at all. It used a whole lot of time on
    large escape bit values before. The speedup came from
    a couple of generic data structure optimizations and
    loop removals by 'informal' deductions.

    3.6.1997
    Figured out another, better way to speed up the
    'pathological' cases. Reduced the run time to a fraction
    of the original time. 'All' 64k files are compressed
    under one minute on my 25 MHz 68030. Compression of
    ivanova.run (one of my 'problem' cases) was reduced from
    about 15 minutes (the first version used 1 hours) to 47
    seconds. The compression of bs.run has been reduced from
    28 minutes (the first version) to 24 seconds. An excellent
    example of how changes in the algorithm level gives the
    most impressive speedups.

    18 x inlen bytes of memory is needed during compression.

    6.6.1997
    Changed the command line switches to use the 'standard'
    approach.

    11.6.1997
    Now determines the number of bytes needed for temporary
    data expansion (i.e. escaped bytes). Warns if there is not
    enough memory to allow successful decompression on a C64.

    Also, now it's possible to decompress the files compressed
    with the program (must be the same version). (-u)

    15.6.1997
    Some cleaning up, small improvement in OptimizeLength().
    AmigaOS version string added.

    17.6.1997
    Only checks the lengths that are power of two's in
    OptimizeLength(), because it does not seem to be any (much)
    worse than checking every length. (Smaller than found maximum
    lengths are checked because they may result in a shorter file.)
    This version (compiled with optimizations on) only 'wastes'
    27 seconds on ivanova.run.

    18.6.1997
    Removed 2 bytes from the decrunch code (begins to be quite
    tight now unless some features are removed) and simultaneously
    removed a not-yet-occurred hidden bug.

    19.6.1997
    Another 2 bytes removed from the decrunch code.

    23.6.1997
    Checked the theoretical gain from using the lastly outputted
    byte (conditional probabilities) to set the probabilities for
    normal/LZ77/RLE selection. The number of bits needed to code
    the selection is from 0.0 to 1.58, but even using arithmetic
    code to encode it, the original escape system is only 82 bits
    worse (ivanova.run), 7881/7963 bits total. The former figure is
    calculated from the entropy, the latter includes LZ77/RLE/escape
    select bits and actual escapes.

    14.7.1997
    One byte removed from the decrunch code.

    18.7.1997
    In LZ77 match we now check if a longer match (further away)
    really gains more bits. Increase in match length can make
    the code 2 bits longer. Increase in match offset can make
    the code even longer (2 bits for each magnitude). Also, if
    LZPOS low part is longer than 8, the extra bits make the code
    longer if the length becomes longer than two.
    ivanova -5 bytes, sheridan -14, delenn -26, bs -29

    When generating the output rescans for the LZ77 matches.
    This is because the optimization can shorten the matches
    and a shorter match may be found much nearer than the
    original longer match. Because longer offsets usually use
    more bits than shorter ones, we get some bits off for each
    match of this kind. Actually, the rescan should be done
    in OptimizeLength() to get the most out of it, but it is
    too much work right now. (and would make the optimize even
    slower)

    29.8.1997
    4 bytes removed from the decrunch code. I have to thank
    Tim Rogers <timr@eurodltd.co.uk> for helping with 2 of them.

    12.9.1997
    Because SuperCPU doesn't work correctly with inc/dec $d030,
    I made the 2 MHz user-selectable and off by default. (-f)

    The compression of bs.run (compiled with optimizations on)
    has been reduced from 28 minutes (the first version) to
    12 seconds (14000% speed increase!).

    13.9.1997
    Today I found out that most of my fast string matching algorithm
    matches the one developed by [Fenwick and Gutmann, 1994]*.
    It's quite frustrating to see that you are not a genius
    after all and someone else has had the same idea :-)
    However, using the RLE table to help still seems to be an
    original idea, which helps immensely on the worst cases.
    I still haven't read their paper on this, so I'll just
    have to get it and see..

      *	[Fenwick and Gutmann, 1994]. P.M. Fenwick and P.C. Gutmann,
	"Fast LZ77 String Matching", Dept of Computer Science,
	The University of Auckland, Tech Report 102, Sep 1994

    14.9.1997
    The new decompression code can decompress files from
    $258 to $ffff (or actually all the way upto $1002d :-).
    The drawback is: the decompression code became 17 bytes
    longer. However, the old decompression code is used if
    the wrap option is not needed.

    16.9.1997
    The backSkip table can now be fixed size (64 kWord) instead of
    growing enormous for "BIG" files. Unfortunately, if the fixed-size
    table is used, the LZ77 rescan is impractical (well, just a little
    slow, as we would need to recreate the backSkip table again).
    On the other hand the rescan did not gain so many bytes in the
    first place (percentage). The define BACKSKIP_FULL enables
    the old behavior (default). Note also, that for smaller files
    than 64kB (the primary target files) the default consumes less
    memory.

    The hash value compare that is used to discard impossible matches
    does not help much. Although it halves the number of strings to
    consider (compared to a direct one-byte compare), speedwise the
    difference is negligible. I suppose a mismatch is found very
    quickly when the strings are compared starting from the third
    charater (the two first characters are equal, because we have
    a 'full hash table'). According to one test file, on average
    3.8 byte-compares are done for each potential match. A define
    HASH_COMPARE enables the hash version of the compare, in which
    case "inlen" bytes more memory is used.

    After removing the hash compare my algorithm quite closely
    follows the [Fenwick and Gutmann, 1994] fast string matching
    algorithm (except the RLE trick). (Although I still haven't
    read it.)

    14 x inlen + 256 kB of memory is used (with no HASH_COMPARE
    and without BACKSKIP_FULL).

    18.9.1997
    One byte removed from the decompression code (both versions).


 */


static int maxGamma = 6, reservedBytes = 8;
static int escBits = 2, escMask = 0xc0;
static int extraLZPosBits = 0, rleUsed = 31;

/*
-------->
    z..zx.....x							normal (zz != ee)
    e..e	value(LEN)	value(POSHI+1)	8+b(POSLO)	LZ77
    e..e	0 (2)		0 (2-256)	8b(POSLO)	LZ77
    e..e	100 (3)		111111 111111			END of FILE (LEN=3)

    e..e010	n..ne.....e					escape + new escape
    e..e011	value(LEN)	bytecode			Short RLE 2..
    e..e011	111..111 8b(LENLO) value(LENHI+1) bytecode	Long RLE
		(values 64.. not used (may not be availble) in bytecode)

e..e011 0 0			RLE=2, rank 1 (saves 11.. bit)
e..e011 0 10 x			RLE=2, rank 2-3 (saves 9.. bit)
e..e011 0 11 0xx		RLE=2, rank 4-7 (saves 7.. bit)
e..e011 0 11 10xxx		RLE=2, rank 8-15 (saves 5.. bit)
e..e011 0 11 110xxxx		RLE=2, rank 16-31 (saves 3..2..1 bit)
e..e011 0 11 111xxxxx xxx	RLE=2, not ranked (loses 1..10 bits)
e..e011 0 11 1111xxxxxx		RLE=2, not used (not available for -m5)

LZ77, len=2 (pos<=256) saves 4 bits (2-bit escape)
LZ77, len=3 saves 10..1 bits (pos 2..15616)
LZ77, len=4 saves 18..9 bits
LZ77, len=5 saves 24..15 bits

RLE, len=2 saves 9..1(..-5) bits (bytecode rank 1..not ranked)
RLE, len=3 saves 15..2 bits
RLE, len=4 saves 23..10 bits
RLE, len=5 saves 29..16 bits

*/

/*
Value:

Elias Gamma Code rediscovered, just the prefix bits are reversed, plus
there is a length limit (1 bit gained for each value in the last group)
; 0000000	not possible
; 0000001	0		1			-6 bits
; 000001x	10	x	2-3			-4 bits
; 00001xx	110 	xx	4-7			-2 bits
; 0001xxx	1110 	xxx	8-15			+0 bits
; 001xxxx	11110	xxxx	16-31			+2 bits
; 01xxxxx	111110	xxxxx	32-63			+4 bits
; 1xxxxxx	111111	xxxxxx	64-127			+5 bits

*/


static unsigned char headerUncrunchNoWrap[] =
{
0x01,0x08,0x0B,0x08,0xEF,0x00,0x9E,0x32,
0x30,0x36,0x31,0x00,0x00,0x00,0x78,0xEE,
0x30,0xD0,0xE6,0x01,0xA2,0x53,0xBD,0xF2,
0x08,0x9D,0xFF,0x01,0xCA,0xD0,0xF7,0xA2,
0xB3,0xBD,0x40,0x08,0x9D,0xF6,0x00,0xCA,
0xD0,0xF7,0xA0,0xAA,0xCA,0xBD,0xAA,0xAA,
0x9D,0x00,0xFF,0x8A,0xD0,0xF6,0xCE,0x31,
0x08,0xCE,0x2E,0x08,0x88,0xD0,0xED,0x4C,
0x15,0x01,0x80,0x00,0x8D,0xAA,0xAA,0xE6,
0xFA,0xD0,0x02,0xE6,0xFB,0x60,0xA4,0xF8,
0xA2,0x02,0x20,0x2F,0x02,0x85,0xF8,0x98,
0xA2,0x06,0x20,0x2F,0x02,0x20,0xF9,0x00,
0xA0,0x00,0x98,0xA2,0x02,0x20,0x2F,0x02,
0xC5,0xF8,0xD0,0xEC,0x20,0x11,0x02,0x85,
0xC3,0xC9,0x01,0xD0,0x3F,0x98,0x20,0x25,
0x02,0x4A,0x90,0x46,0x20,0x25,0x02,0x4A,
0x90,0xCC,0xC8,0x20,0x11,0x02,0x85,0xC3,
0xC9,0x40,0x90,0x0B,0xA2,0x02,0x20,0x26,
0x02,0x85,0xC3,0x20,0x11,0x02,0xA8,0x20,
0x11,0x02,0xAA,0xBD,0x32,0x02,0xE0,0x20,
0x90,0x04,0x8A,0x20,0x23,0x02,0xA6,0xC3,
0xE8,0x20,0xF9,0x00,0xCA,0xD0,0xFA,0x88,
0xF0,0xAE,0xD0,0xF5,0x20,0x11,0x02,0xC9,
0x7F,0xF0,0x26,0xE9,0x00,0xA2,0x00,0x20,
0x2F,0x02,0x85,0x2E,0xA2,0x08,0x20,0x26,
0x02,0x65,0xFA,0x85,0x2D,0xA5,0xFB,0xE5,
0x2E,0x85,0x2E,0xA6,0xC3,0xE8,0xB1,0x2D,
0x20,0xF9,0x00,0xC8,0xCA,0xD0,0xF7,0xF0,
0xCF,0xA9,0x37,0x85,0x01,0xCE,0x30,0xD0,
0xA5,0xFA,0x85,0x2D,0xA5,0xFB,0x85,0x2E,
0x58,0x4C,0xAA,0xAA,0x48,0xAD,0xAA,0xAA,
0xEE,0x02,0x02,0xD0,0x03,0xEE,0x03,0x02,
0x2A,0x85,0xF7,0x68,0x60,0xE8,0x8A,0x06,
0xF7,0xD0,0x03,0x20,0x00,0x02,0x90,0x12,
0xE8,0xE0,0x07,0xD0,0xF2,0xF0,0x0B,0xA2,
0x02,0xE8,0x06,0xF7,0xD0,0x03,0x20,0x00,
0x02,0x2A,0xCA,0xD0,0xF5,0x18,0x60,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00
};


int SavePackNoWrap(unsigned char *data, int size, char *target,
		   int start, int exec, int escape, unsigned char *rleValues,
		   int endAddr, int extraLZPosBits, int enable2MHz)
{
    FILE *fp = NULL;
    unsigned char *header = headerUncrunchNoWrap;
    int i;

    if(!data)
	return 10;

    if(!target)
	fp = stdout;

#ifndef BIG
    if(0x801 + sizeof(headerUncrunchNoWrap) - 2 + size > 0xfe00)
    {
	fprintf(stderr, "Packed file's max size is 0x%04x (0x%04x)!\n",
		0xfe00-0x801-(sizeof(headerUncrunchNoWrap)-2), size);
	return 10;
    }
#endif /* BIG */

    if(!enable2MHz)
    {
	header[0x80e -0x7ff] = 0x2c; /* bit $d030 */
	header[0x8e4 -0x7ff] = 0x2c; /* bit $d030 */
    }
    header[0x82a -0x7ff] = (size>>8) + 1;
    header[0x82d -0x7ff] = (0x801 + (sizeof(headerUncrunchNoWrap)-2+rleUsed-31) + size - 0x100) & 0xff;
    header[0x82e -0x7ff] = ((0x801 + (sizeof(headerUncrunchNoWrap)-2+rleUsed-31) + size - 0x100)>>8);
    header[0x830 -0x7ff] = (endAddr - 0x100) & 0xff;
    header[0x831 -0x7ff] = ((endAddr - 0x100) >> 8);


    header[0x842 -0x7ff] = (escape>>(8-escBits));

    header[0x844 -0x7ff] = (start & 0xff);	/* OUTPOS */
    header[0x845 -0x7ff] = (start >> 8);

    header[0x850 -0x7ff] = escBits;
    header[0x858 -0x7ff] = 8-escBits;

    header[0x863 -0x7ff] = escBits;

    header[0x888 -0x7ff] = (1<<maxGamma); /* Short/Long RLE */

    header[0x88c -0x7ff] = (8-maxGamma); /* Long RLE */

    header[0x8b7 -0x7ff] = (2<<maxGamma)-1; /* EOF (maxGammaValue) */
    header[0x8bd -0x7ff] = extraLZPosBits;

    /*header[0x8e1 -0x7ff] = memconfig; */
    /*header[0x8ef -0x7ff] = $58/$78 cli/sei; */
    header[0x8f1 -0x7ff] = (exec & 0xff);
    header[0x8f2 -0x7ff] = (exec >> 8);


    header[0x8f5 -0x7ff] = (endAddr - size) & 0xff;	/* INPOS */
    header[0x8f6 -0x7ff] = ((endAddr - size) >> 8);
    header[0x911 -0x7ff] = maxGamma + 1;

    for(i=1;i<32;i++)
	header[sizeof(headerUncrunchNoWrap) - 32 + i] = rleValues[i];

    if(fp || (fp = fopen(target, "wb")))
    {
	fwrite(header, 1, sizeof(headerUncrunchNoWrap)+rleUsed-31, fp);
	fwrite(data, size, 1, fp);
	if(fp != stdout)
	    fclose(fp);
    }
    else
    {
	fprintf(stderr, "Could not open %s for writing\n", target);
	return 10;
    }
    return 0;
}


/* Includes loading address */
static unsigned char headerUncrunch[] =
{
0x01,0x08,0x0B,0x08,0xEF,0x00,0x9E,0x32,
0x30,0x36,0x31,0x00,0x00,0x00,0x78,0xEE,
0x30,0xD0,0xE6,0x01,0xA2,0x00,0xBD,0xAA,
0xAA,0x95,0x4B,0xCA,0x10,0xF8,0xA2,0x58,
0xBD,0xFE,0x08,0x9D,0xFF,0x01,0xCA,0xD0,
0xF7,0xA2,0xB5,0xBD,0x4A,0x08,0x9D,0xF6,
0x00,0xCA,0xD0,0xF7,0xA0,0xAA,0xCA,0xBD,
0xAA,0xAA,0x9D,0x00,0xFF,0x8A,0xD0,0xF6,
0xCE,0x3B,0x08,0xCE,0x38,0x08,0x88,0xD0,
0xED,0x4C,0x15,0x01,0x80,0x00,0x8D,0xAA,
0xAA,0xE6,0xFA,0xD0,0x02,0xE6,0xFB,0x60,
0xA4,0xF8,0xA2,0x02,0x20,0x34,0x02,0x85,
0xF8,0x98,0xA2,0x06,0x20,0x34,0x02,0x20,
0xF9,0x00,0xA0,0x00,0x98,0xA2,0x02,0x20,
0x34,0x02,0xC5,0xF8,0xD0,0xEC,0x20,0x18,
0x02,0x85,0xC3,0xC9,0x01,0xD0,0x41,0x98,
0x20,0x2A,0x02,0x4A,0x90,0x48,0x20,0x2A,
0x02,0x4A,0x90,0xCC,0xC8,0x20,0x18,0x02,
0x85,0xC3,0xC9,0x40,0x90,0x0B,0xA2,0x02,
0x20,0x2B,0x02,0x85,0xC3,0x20,0x18,0x02,
0xA8,0x20,0x18,0x02,0xAA,0xBD,0x37,0x02,
0xE0,0x20,0x90,0x06,0x8A,0xA2,0x03,0x20,
0x2B,0x02,0xA6,0xC3,0xE8,0x20,0xF9,0x00,
0xCA,0xD0,0xFA,0x88,0xF0,0xAC,0xD0,0xF5,
0x20,0x18,0x02,0xC9,0x7F,0xF0,0x26,0xE9,
0x00,0xA2,0x00,0x20,0x34,0x02,0x85,0x2E,
0xA2,0x08,0x20,0x2B,0x02,0x65,0xFA,0x85,
0x2D,0xA5,0xFB,0xE5,0x2E,0x85,0x2E,0xA6,
0xC3,0xE8,0xB1,0x2D,0x20,0xF9,0x00,0xC8,
0xCA,0xD0,0xF7,0xF0,0xCF,0xA9,0x37,0x85,
0x01,0xCE,0x30,0xD0,0xA5,0xFA,0x85,0x2D,
0xA5,0xFB,0x85,0x2E,0x58,0x4C,0xAA,0xAA,
0x48,0xAD,0xAA,0xAA,0x2A,0x85,0xF7,0xEE,
0x02,0x02,0xD0,0x0A,0xEE,0x03,0x02,0xD0,
0x05,0xA9,0x4B,0x8D,0x02,0x02,0x68,0x60,
0xE8,0x8A,0x06,0xF7,0xD0,0x03,0x20,0x00,
0x02,0x90,0x10,0xE8,0xE0,0x07,0xD0,0xF2,
0xF0,0x09,0xE8,0x06,0xF7,0xD0,0x03,0x20,
0x00,0x02,0x2A,0xCA,0xD0,0xF5,0x18,0x60,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,0x00,0x00,0x00

};


int SavePack(unsigned char *data, int size, char *target,
	     int start, int exec, int escape, unsigned char *rleValues,
	     int endAddr, int extraLZPosBits, int enable2MHz)
{
    FILE *fp = NULL;
    unsigned char *header = headerUncrunch;
    int i, overlap = 0;

#ifndef BIG
    if(endAddr <= 0x10000)
#endif /* BIG */
	return SavePackNoWrap(data, size, target, start, exec, escape,
			      rleValues, endAddr, extraLZPosBits,
			      enable2MHz);
    if(!data)
	return 10;

    if(!target)
	fp = stdout;

#ifndef BIG
    if(0x801 + sizeof(headerUncrunch) - 2 + size > 0xfe00)
    {
	fprintf(stderr, "Packed file's max size is 0x%04x (0x%04x)!\n",
		0xfe00-0x801-(sizeof(headerUncrunch)-2), size);
	return 10;
    }
#endif /* BIG */

    if(!enable2MHz)
    {
	header[0x80e -0x7ff] = 0x2c; /* bit $d030 */
	header[0x8f0 -0x7ff] = 0x2c; /* bit $d030 */
    }

    if(endAddr > 0x10000)
    {
	overlap = endAddr - 0x10000;
	endAddr = 0x10000;

	/*
	    Make the decrunch code wrap from $ffff to $004b.
	    The decrunch code first copies the data that would exceed
	    $ffff to $004b and then copy the rest of it to end at $ffff.
	 */

	if(overlap > 22)
	{
	    fprintf(stderr,
		    "Warning: data overlap is %d, but only 22 supported!\n",
		    overlap);
	    fprintf(stderr, "The data from $61 to $%02x is overwritten.\n",
		    overlap - 22 + 0x61);
	}
    }

    header[0x814 -0x7ff] = overlap?(overlap-1):0;
    header[0x816 -0x7ff] = (0x801 + (sizeof(headerUncrunch)-2+rleUsed-31) + size - overlap) & 0xff;
    header[0x817 -0x7ff] = ((0x801 + (sizeof(headerUncrunch)-2+rleUsed-31) + size - overlap)>>8);

    header[0x834 -0x7ff] = (size>>8) + 1;
    header[0x837 -0x7ff] = (0x801 + (sizeof(headerUncrunch)-2+rleUsed-31) + size - 0x100 - overlap) & 0xff;
    header[0x838 -0x7ff] = ((0x801 + (sizeof(headerUncrunch)-2+rleUsed-31) + size - 0x100 - overlap)>>8);
    header[0x83a -0x7ff] = (endAddr - 0x100) & 0xff;
    header[0x83b -0x7ff] = ((endAddr - 0x100) >> 8);


    header[0x84c -0x7ff] = (escape>>(8-escBits));

    header[0x84e -0x7ff] = (start & 0xff);	/* OUTPOS */
    header[0x84f -0x7ff] = (start >> 8);

    header[0x85a -0x7ff] = escBits;
    header[0x862 -0x7ff] = 8-escBits;

    header[0x86d -0x7ff] = escBits;

    header[0x892 -0x7ff] = (1<<maxGamma); /* Short/Long RLE */

    header[0x896 -0x7ff] = (8-maxGamma); /* Long RLE */

    header[0x8c3 -0x7ff] = (2<<maxGamma)-1; /* EOF (maxGammaValue) */
    header[0x8c9 -0x7ff] = extraLZPosBits;

    /*header[0x8ed -0x7ff] = memconfig; */
    /*header[0x8fb -0x7ff] = $58/$78 cli/sei; */
    header[0x8fd -0x7ff] = (exec & 0xff);
    header[0x8fe -0x7ff] = (exec >> 8);


    header[0x901 -0x7ff] = (endAddr + overlap - size) & 0xff;	/* INPOS */
    header[0x902 -0x7ff] = ((endAddr + overlap - size) >> 8);
    header[0x924 -0x7ff] = maxGamma + 1;

    for(i=1;i<32;i++)
	header[sizeof(headerUncrunch) - 32 + i] = rleValues[i];

    if(fp || (fp = fopen(target, "wb")))
    {
	fwrite(header, 1, sizeof(headerUncrunch)+rleUsed-31, fp);
	fwrite(data, size, 1, fp);
	if(fp != stdout)
	    fclose(fp);
    }
    else
    {
	fprintf(stderr, "Could not open %s for writing\n", target);
	return 10;
    }
    return 0;
}


#ifdef ENABLE_VERBOSE
#define F_VERBOSE (1<<0)
#endif
#define F_STATS   (1<<1)
#define F_AUTO    (1<<2)
#define F_NOOPT   (1<<3)
#define F_AUTOEX  (1<<4)
#define F_SKIP    (1<<5)
#define F_2MHZ    (1<<6)

#define F_UNPACK  (1<<14)
#define F_ERROR   (1<<15)

#ifndef min
#define min(a,b) ((a<b)?(a):(b))
#endif


#define LRANGE		(((2<<maxGamma)-3)*256)	/* 0..125, 126 -> 1..127 */
#define MAXLZLEN	(2<<maxGamma)
#define MAXRLELEN	(((2<<maxGamma)-2)*256)	/* 0..126 -> 1..127 */
#define DEFAULT_LZLEN	LRANGE

static int lrange, maxlzlen, maxrlelen;



#ifdef BIG
#define OUT_SIZE 800000
#else
#define OUT_SIZE 65536
#endif /* BIG */
static unsigned char outBuffer[OUT_SIZE];
static int outPointer = 0;

void PutBit(int bit, int flush)
{
    static int bitMask = 0x80;

    if(flush && (bitMask==0x80))
	return;

    if(bit && outPointer < OUT_SIZE)
	outBuffer[outPointer] |= bitMask;
    bitMask >>= 1;
    if(!bitMask)
    {
	bitMask = 0x80;
	outPointer++;
    }
}


void PutValue(int value)
{
    int bits = 0, count = 0;

    while(value>1)
    {
	bits = (bits<<1) | (value & 1);	/* bits is reversed compared to value */
	value >>= 1;
	count++;
	PutBit(1, 0);
    }
    if(count<maxGamma)
	PutBit(0, 0);
    while(count--)
    {
	PutBit((bits & 1), 0);	/* output is again reversed -> the same as value */
	bits >>= 1;
    }
}

#if 0
int LenValue(int value)
{
    int count = 0;

    while(value>1)
    {
	value >>= 1;
	count += 2;
    }
    if(count<maxGamma)
	return count + 1;
    return count;
}
#else
int RealLenValue(int value)
{
    int count = 0;

    if(value<2)		/* 1 */
	count = 0;
    else if(value<4)	/* 2-3 */
	count = 1;
    else if(value<8)	/* 4-7 */
	count = 2;
    else if(value<16)	/* 8-15 */
	count = 3;
    else if(value<32)	/* 16-31 */
	count = 4;
    else if(value<64)	/* 32-63 */
	count = 5;
    else if(value<128)	/* 64-127 */
	count = 6;
    else if(value<256)	/* 128-255 */
	count = 7;

    if(count<maxGamma)
	return 2*count + 1;
    return 2*count;
}
static int lenValue[256];
void InitValueLen(void);
void InitValueLen()
{
    int i;
    for(i=1;i<256;i++)
	lenValue[i] = RealLenValue(i);
}
#define LenValue(a) (lenValue[a])

#endif


void PutNBits(int byte, int bits)
{
    while(bits--)
	PutBit((byte & (1<<bits)), 0);
}


static int gainedEscaped = 0;
static int gainedRle = 0, gainedSRle = 0, gainedLRle = 0;
static int gainedLz = 0, gainedRlecode = 0;

static int timesEscaped = 0, timesNormal = 0;
static int timesRle = 0, timesSRle = 0, timesLRle = 0;
static int timesLz = 0;

static int lenStat[8][4];


int OutputNormal(int *esc, unsigned char *data, int newesc)
{
    timesNormal++;
    if((data[0] & escMask) == *esc)
    {
	PutNBits((*esc>>(8-escBits)), escBits);	/* escBits>=0 */
	PutBit(0, 0);
	PutBit(1, 0);
	PutBit(0, 0);

	*esc = newesc;
	PutNBits((*esc>>(8-escBits)), escBits);	/* escBits>=0 */
	PutNBits(data[0], 8-escBits);

	gainedEscaped += escBits + 3;
	timesEscaped++;
	return 1;
    }
    PutNBits(data[0], 8);
    return 0;
}



void OutputEof(int *esc);
void OutputEof(int *esc)
{
    int i;

    /* EOF marker */
    PutNBits((*esc>>(8-escBits)), escBits);	/* escBits>=0 */
    PutValue(3);	/* >2 */
    PutValue((2<<maxGamma)-1);

    /* flush */
    for(i=0;i<7;i++)
	PutBit(1, 1);
}


static unsigned char rleValues[32] = {1,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
				0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
static int rleHist[256];

void PutRleByte(unsigned char data)
{
    int index;

    for(index = 1; index < 32; index++)
    {
	if(data == rleValues[index])
	{
	    if(index==1)
		lenStat[0][3]++;
	    else if(index<=3)
		lenStat[1][3]++;
	    else if(index<=7)
		lenStat[2][3]++;
	    else if(index<=15)
		lenStat[3][3]++;
	    else if(index<=31)
		lenStat[4][3]++;

	    gainedRlecode += 8 - LenValue(index);

	    PutValue(index);
	    return;
	}
    }
/*fprintf(stderr, "RLECode n: 0x%02x\n", data);*/
    PutValue(32 + (data>>3));

    gainedRlecode -= LenValue(32+(data>>3)) + 3;

    PutNBits(data, 3);

    lenStat[5][3]++;
    /* Note: values 64..127 are not used if maxGamma>5 */
}


#if 0
int LenRleByte(unsigned char data)
{
    int index;

    for(index = 1; index < 32; index++)
    {
	if(data == rleValues[index])
	{
	    return LenValue(index);
	}
    }
    return LenValue(32 + 0) + 3;
}
#else
static unsigned char rleLen[256];
void InitRleLen(void);
void InitRleLen()
{
    int i;

    for(i=0;i<256;i++)
	rleLen[i] = LenValue(32 + 0) + 3;
    for(i=1;i<32;i++)
	rleLen[rleValues[i]] = LenValue(i);
}
#define LenRleByte(d) (rleLen[d])
#endif


int LenRle(int len, unsigned char data)
{
    int out = 0;

    do
    {
	if(len == 1)
	{
	    out += escBits + 3 + 8;
	    len = 0;
	}
	else if(len <= (1<<maxGamma))
	{
	    out += escBits + 3 + LenValue(len-1) + LenRleByte(data);
	    len = 0;
	}
	else
	{
	    int tmp = min(len, maxrlelen);
	    out += escBits + 3 + maxGamma + 8 +
			LenValue(((tmp-1)>>8)+1) + LenRleByte(data);

	    len -= tmp;
	}
    } while(len);
    return out;
}


int OutputRle(int *esc, unsigned char *data, int rlelen)
{
    int len = rlelen, tmp;

    while(len)
    {
	if(len >= 2 && len <= (1<<maxGamma))
	{
	    /* Short RLE */
	    if(len==2)
		lenStat[0][2]++;
	    else if(len<=4)
		lenStat[1][2]++;
	    else if(len<=8)
		lenStat[2][2]++;
	    else if(len<=16)
		lenStat[3][2]++;
	    else if(len<=32)
		lenStat[4][2]++;
	    else if(len<=64)
		lenStat[5][2]++;
	    else if(len<=128)
		lenStat[6][2]++;
	    else if(len<=256)
		lenStat[6][2]++;

	    PutNBits((*esc>>(8-escBits)), escBits);	/* escBits>=0 */
	    PutBit(0, 0);
	    PutBit(1, 0);
	    PutBit(1, 0);
	    PutValue(len-1);
	    PutRleByte(*data);

	    tmp = 8*len -escBits -3 -LenValue(len-1) -LenRleByte(*data);
	    gainedRle += tmp;
	    gainedSRle += tmp;

	    timesRle++;
	    timesSRle++;
	    return 0;
	}
	if(len<3)
	{
	    while(len--)
		OutputNormal(esc, data, *esc);
	    return 0;
	}

	if(len <= maxrlelen)
	{
	    /* Run-length encoding */
	    PutNBits((*esc>>(8-escBits)), escBits);	/* escBits>=0 */

	    PutBit(0, 0);
	    PutBit(1, 0);
	    PutBit(1, 0);

	    PutValue((1<<maxGamma) + (((len-1)&0xff)>>(8-maxGamma)));

	    PutNBits((len-1), 8-maxGamma);
	    PutValue(((len-1)>>8) + 1);
	    PutRleByte(*data);

	    tmp = 8*len -escBits -3 -maxGamma -8 -LenValue(((len-1)>>8)+1) -LenRleByte(*data);
	    gainedRle += tmp;
	    gainedLRle += tmp;

	    timesRle++;
	    timesLRle++;
	    return 0;
	}

	/* Run-length encoding */
	PutNBits((*esc>>(8-escBits)), escBits);	/* escBits>=0 */

	PutBit(0, 0);
	PutBit(1, 0);
	PutBit(1, 0);

	PutValue((1<<maxGamma) + (((maxrlelen-1)&0xff)>>(8-maxGamma)));

	PutNBits((maxrlelen-1) & 0xff, 8-maxGamma);
	PutValue(((maxrlelen-1)>>8)+1);
	PutRleByte(*data);

	tmp = 8*maxrlelen -escBits -3 -maxGamma -8 -LenValue(((maxrlelen-1)>>8)+1) -LenRleByte(*data);
	gainedRle += tmp;
	gainedLRle += tmp;
	timesRle++;
	timesLRle++;
	len -= maxrlelen;
	data += maxrlelen;
    }
    return 0;
}


int LenLz(int lzlen, int lzpos)
{
    if(lzlen==2)
    {
	if(lzpos <= 256)
	    return escBits + 2 + 8;
	else
	    return 100000;
    }

    if(lzlen<=maxlzlen)
	return escBits + 8 + extraLZPosBits +
		LenValue(((lzpos-1)>>(8+extraLZPosBits))+1) +
		LenValue(lzlen-1);

    fprintf(stderr, "Error: lzlen too short/long (%d)\n", lzlen);
    return 100000;
}



int OutputLz(int *esc, int lzlen, int lzpos, char *data, int curpos)
{
    if(lzlen==2)
	lenStat[0][1]++;
    else if(lzlen<=4)
	lenStat[1][1]++;
    else if(lzlen<=8)
	lenStat[2][1]++;
    else if(lzlen<=16)
	lenStat[3][1]++;
    else if(lzlen<=32)
	lenStat[4][1]++;
    else if(lzlen<=64)
	lenStat[5][1]++;
    else if(lzlen<=128)
	lenStat[6][1]++;
    else if(lzlen<=256)
	lenStat[7][1]++;

    if(lzlen >= 2 && lzlen <= maxlzlen)
    {
	int tmp;

	PutNBits((*esc>>(8-escBits)), escBits);	/* escBits>=0 */

	tmp = ((lzpos-1)>>(8+extraLZPosBits))+2;
	if(tmp==2)
	    lenStat[0][0]++;
	else if(tmp<=4)
	    lenStat[1][0]++;
	else if(tmp<=8)
	    lenStat[2][0]++;
	else if(tmp<=16)
	    lenStat[3][0]++;
	else if(tmp<=32)
	    lenStat[4][0]++;
	else if(tmp<=64)
	    lenStat[5][0]++;
	else if(tmp<=128)
	    lenStat[6][0]++;
	else if(tmp<=256)
	    lenStat[6][0]++;

	if(lzlen==2)
	{
	    PutValue(lzlen-1);
	    PutBit(0, 0);
	    if(lzpos > 256)
		fprintf(stderr, "Error at %d: lzpos too long (%d) for lzlen==2\n", curpos, lzpos);
	}
	else
	{
	    PutValue(lzlen-1);
	    PutValue( ((lzpos-1) >> (8+extraLZPosBits)) +1);
	    PutNBits( ((lzpos-1) >> 8), extraLZPosBits);
	}
	PutNBits(((lzpos-1) & 0xff) ^ 0xff, 8);

	gainedLz += 8*lzlen -LenLz(lzlen, lzpos);
	timesLz++;
	return 3;
    }
    fprintf(stderr, "Error: lzlen too short/long (%d)\n", lzlen);
    return lzlen;
}



static unsigned short *rle, *elr, *lzlen, *lzpos;
static int *length, inlen;
static unsigned char *indata, *mode, *newesc;


static int lzopt = 0;
/* Non-recursive version */
/* NOTE! IMPORTANT! the "length" array length must be inlen+1 */

int OptimizeLength(int optimize)
{
    int i;

    length[inlen] = 0;		/* one off the end, our 'target' */
    for(i=inlen-1;i>=0;i--)
    {
    	int r1 = 8 + length[i+1], r2, r3;

	if(!lzlen[i] && !rle[i])
	{
	    length[i] = r1;
	    mode[i] = 0;
	    continue;
	}

	/* If rle>maxlzlen, skip to the start of the rle-maxlzlen.. */
	if(rle[i] > maxlzlen && elr[i] > 1)
	{
	    int z = elr[i];

	    i -= elr[i];

	    r2 = LenRle(rle[i], indata[i]) + length[i+ rle[i]];
	    if(optimize)
	    {
		int ii, mini = rle[i], minv = r2;

		int bot = rle[i] - (1<<maxGamma);
		if(bot < 2)
		    bot = 2;

		for(ii=mini-1;ii>=bot;ii--)
		{
		    int v = LenRle(ii, indata[i]) + length[i + ii];
		    if(v < minv)
		    {
			minv = v;
			mini = ii;
		    }
		}
		if(minv != r2)
		{
		    lzopt += r2 - minv;
		    rle[i] = mini;
		    r2 = minv;
		}
	    }
	    length[i] = r2;
	    mode[i] = 2;

	    for(;z>=0;z--)
	    {
		length[i+z] = r2;
		mode[i+z] = 2;
	    }
	    continue;
	}
	r3 = r2 = r1 + 1000; /* r3 >= r2 > r1 */

	if(rle[i])
	{
	    r2 = LenRle(rle[i], indata[i]) + length[i+ rle[i]];

	    if(optimize)
	    {
		int ii, mini = rle[i], minv = r2;

#if 0
		int bot = rle[i] - (1<<maxGamma);
		if(bot < 2)
		    bot = 2;

		for(ii=mini-1;ii>=bot;ii--)
		{
		    int v = LenRle(ii, indata[i]) + length[i + ii];
		    if(v < minv)
		    {
			minv = v;
			mini = ii;
		    }
		}
#else
		/* Does not really miss many 'minimums' this way,
		   at least not globally..
		   Makes the assumption that the Elias Gamma Code is
		   used, i.e. values of the form 2^n are 'optimal' */
		ii = 2;
		while(rle[i] > ii)
		{
		    int v = LenRle(ii, indata[i]) + length[i + ii];
		    if(v < minv)
		    {
			minv = v;
			mini = ii;
		    }
		    ii <<= 1;
		}
#endif
		if(minv != r2)
		{
/*printf("%05d RL %d %d\n", i, rle[i], mini);*/
		    lzopt += r2 - minv;
		    rle[i] = mini;
		    r2 = minv;
		}
	    }
	}
	if(lzlen[i])
	{
	    r3 = LenLz(lzlen[i], lzpos[i]) + length[i + lzlen[i]];

	    if(optimize && lzlen[i]>2)
	    {
		int ii, mini = lzlen[i], minv = r3,
			topLen = LenLz(lzlen[i], lzpos[i]) - LenValue(lzlen[i]-1);

#if 0
		int bot = 3;
		if(lzpos[i] <= 256)
		    bot = 2;

		for(ii=mini-1;ii>=bot;ii--)
		{
		    int v = topLen + LenValue(ii-1) + length[i + ii];
		    if(v < minv)
		    {
			minv = v;
			mini = ii;
		    }
		}
#else
		/* Does not really miss many 'minimums' this way,
		   at least not globally..
		   Makes the assumption that the Elias Gamma Code is
		   used, i.e. values of the form 2^n are 'optimal' */
		ii = 2;
		if(lzpos[i]>256)
		    ii = 4;
		while(lzlen[i] > ii)
		{
		    int v = topLen + LenValue(ii-1) + length[i + ii];
		    if(v < minv)
		    {
			minv = v;
			mini = ii;
		    }
		    ii <<= 1;
		}
#endif
		if(minv != r3 && minv < r2)
		{
/*printf("@%05d LZ %d %4x -> %d %4x\n", i, lzlen[i], lzpos[i], mini, lzpos[i]);*/
		    lzopt += r3 - minv;
		    lzlen[i] = mini;
		    r3 = minv;
		}
	    }
	}

	if(r2 <= r1)
	{
	    if(r2 <= r3)
	    {
		length[i] = r2;
		mode[i] = 2;
	    }
	    else
	    {
		length[i] = r3;
		mode[i] = 1;
	    }
	}
	else
	{
	    if(r3 <= r1)
	    {
		length[i] = r3;
		mode[i] = 1;
	    }
	    else
	    {
		length[i] = r1;
		mode[i] = 0;
	    }
	}
    }
    return length[0];
}


/*
    The algorithm in the OptimizeEscape() works as follows:
    1) Only unpacked bytes are processed, they are marked
       with mode 3. We proceed from the end to the beginning.
       Variable A (old/new length) is updated.
    2) At each unpacked byte, one and only one possible
       escape matches. A new escape code must be selected
       for this case. The optimal selection is the one which
       provides the shortest number of escapes to the end
       of the file,
	i.e. A[esc] = 1+min(A[0], A[1], .. A[states-1]).
       For other states A[esc] = A[esc];
       If we change escape in this byte, the new escape is
       the one with the smallest value in A.
    3) The starting escape is selected from the possibilities
       and mode 0 is restored to all mode 3 locations.

 */

int OptimizeEscape(int *startEscape, int *nonNormal)
{
    int i, j, states = (1<<escBits), minp = 0, minv = 0, other = 0;
    int a[256]; /* needs int */
    int b[256]; /* Remembers the # of escaped for each */

    for(i=0;i<256;i++)
	b[i] = a[i] = -1;

    if(states>256)
    {
	fprintf(stderr, "Escape optimize: only 256 states (%d)!\n",
		states);
	return 0;
    }

    /* Mark those bytes that are actually outputted */
    for(i=0;i<inlen;)
    {
	switch(mode[i])
	{
	case 1:
	    other++;
	    i += lzlen[i];
	    break;

	case 2:
	    other++;
	    i += rle[i];
	    break;

	case 0:
	default:
	    mode[i++] = 3; /* mark it used so we can identify it */
	    break;
	}
    }

    for(;i>=0;i--)
    {
	/* Using a table to skip non-normal bytes does not help.. */
	if(mode[i]==3)
	{
	    int k;

	    /* Change the tag values back to normal */
	    mode[i] = 0;

	    /*
		k are the matching bytes,
		minv is the minimum value,
		minp is the minimum index
	     */

	    k = (indata[i] >> (8-escBits));
	    newesc[i] = (minp << (8-escBits));
	    a[k] = 1 + minv;
	    b[indata[i]>>(8-escBits)] = b[minp] + 1;
	    if(k==minp)
	    {
		/* Minimum changed -> need to find a new minimum */
		/* a[k] may still be the minimum */
		minv++;
		for(k=0;k<states;k++)
		{
		    if(a[k] < minv)
		    {
			minv = a[k];
			minp = k;
			/*
			    There may be others, but the first one that
			    is smaller than the old minimum is equal to
			    any other new minimum.
			 */
			break;
		    }
		}
	    }
	}
    }

    /* Select the best value for the initial escape */
    if(startEscape)
    {
	i = inlen;	/* make it big enough */
	for(j=0;j<states;j++)
	{
	    if(a[j] <= i)
	    {
		*startEscape = (j<<(8-escBits));
		i = a[j];
	    }
	}
    }
    if(nonNormal)
	*nonNormal = other;
    return b[*startEscape>>(8-escBits)];
}


/* Initialize the RLE byte code table according to all RLE's found so far */
/* O(n) */
void InitRle(int);
void InitRle(int flags)
{
    int p, mr, mv, i;

    for(i=1;i<32;i++)
    {
	mr = -1;
	mv = 0;

	for(p=0;p<256;p++)
	{
	    if(rleHist[p] > mv)
	    {
		mv = rleHist[p];
		mr = p;
	    }
	}
	if(mv>0)
	{
	    rleValues[i] = mr;
	    rleHist[mr] = -1;
	}
	else
	    break;
    }
    InitRleLen();
}


/* Initialize the RLE byte code table according to RLE's actually used */
/* O(n) */
void OptimizeRle(int);
void OptimizeRle(int flags)
{
    int p, mr, mv, i;

    if((flags & F_STATS))
	fprintf(stderr, "RLE Byte Code Re-Tune, RLE Ranks:\n");
    for(p=0;p<256;p++)
	rleHist[p] = 0;

    for(p=0;p<inlen;)
    {
	switch(mode[p])
	{
	case 0: /* normal */
	case 3:
	    p++;
	    break;

	case 1: /* lz */
	    p += lzlen[p];
	    break;

	case 2: /* rle */
	    rleHist[indata[p]]++;
	    p += rle[p];
	    break;

	default:
	    p++;
	    break;
	}
    }

    for(i=1;i<32;i++)
    {
	mr = -1;
	mv = 0;

	for(p=0;p<256;p++)
	{
	    if(rleHist[p] > mv)
	    {
		mv = rleHist[p];
		mr = p;
	    }
	}
	if(mv>0)
	{
	    rleValues[i] = mr;
	    if((flags & F_STATS))
	    {
		fprintf(stderr, " %2d.0x%02x %-3d ", i, mr, mv);
		if(((i - 1) % 6)==0)
		    fprintf(stderr, "\n");
	    }
	    rleHist[mr] = -1;
	}
	else
	    break;
    }
    rleUsed = i-1;

    if((flags & F_STATS))
	if(((i - 1) % 6)!=1)
	    fprintf(stderr, "\n");
    InitRleLen();
}


static const unsigned char *up_Data;
static int up_Mask, up_Byte;
void up_SetInput(const unsigned char *data)
{
    up_Data = data;
    up_Mask = 0x80;
    up_Byte = 0;
}
int up_GetBits(int bits)
{
    int val = 0;

    while(bits--)
    {
	val <<= 1;
	if((*up_Data & up_Mask))
	   val |= 1;
	up_Mask >>= 1;
	if(!up_Mask)
	{
	    up_Mask = 0x80;
	    up_Data++;
	    up_Byte++;
	}
    }
    return val;
}
int up_GetValue(void)
{
    int i = 0;

    while(i<maxGamma)
    {
	if(!up_GetBits(1))
	    break;
	i++;
    }
    return (1<<i) | up_GetBits(i);
}


int UnPack(int loadAddr, const unsigned char *data, const char *file, int flags)
{
    long size, startEsc, endAddr, execAddr, headerSize, startAddr, error = 0;
    FILE *fp;
    int i, cli, memConf, overlap = 0, mode = 0;
    long timeused = clock();
    const char *byteCodeVec = NULL;

    if(loadAddr != 0x801)
    {
	fprintf(stderr, "Error: Loading address not 0x0801 (0x%04x)\n",
		loadAddr);
	return 20;
    }
    for(i=2;i<sizeof(headerUncrunch)-31;i++)
    {
	if(headerUncrunch[i] != data[i-2])
	    error++;
    }
    if(error > 22)
    {
	error = 0;
	for(i=2;i<sizeof(headerUncrunchNoWrap)-31;i++)
	{
	    if(headerUncrunchNoWrap[i] != data[i-2])
		error++;
	}
	if(error > 22)
	{
	    fprintf(stderr, "Error: The file is not compressed with this program.\n");
	    return 20;
	}
	mode = 1;
    }
    error = 0;

    if(mode==1)
    {
	maxGamma = data[0x911 -0x801] - 1;
	if(maxGamma < 5 || maxGamma > 7)
	{
	    fprintf(stderr, "Error: Broken archive, maxGamma %d.\n",
		    maxGamma);
	    return 20;
	}

	lrange = LRANGE;
	maxlzlen = MAXLZLEN;
	maxrlelen = MAXRLELEN;

	startEsc = data[0x842 -0x801];
	startAddr = data[0x844 -0x801] | (data[0x845 -0x801]<<8);
	escBits = data[0x850 -0x801];
	if(escBits < 0 || escBits > 8)
	{
	    fprintf(stderr, "Error: Broken archive, escBits %d.\n",
		    escBits);
	    return 20;
	}
	extraLZPosBits = data[0x8bd -0x801];
	if(extraLZPosBits < 0 || extraLZPosBits > 4)
	{
	    fprintf(stderr, "Error: Broken archive, extraLZPosBits %d.\n",
		    extraLZPosBits);
	    return 20;
	}
	endAddr = 0x100 + (data[0x830 -0x801] | (data[0x831 -0x801]<<8));
	size    = endAddr - (data[0x8f5 -0x801] | (data[0x8f6 -0x801]<<8));
	headerSize = ((data[0x82d -0x801] | (data[0x82e -0x801]<<8))
			+ 0x100 - size - 0x801) & 0xffff;
	execAddr = data[0x8f1 -0x801] | (data[0x8f2 -0x801]<<8);

	memConf = data[0x8e1 -0x801];
	cli = data[0x8ef -0x801];

	byteCodeVec = &data[sizeof(headerUncrunchNoWrap) - 32 -2];
    }
    else
    {
	overlap = data[0x814 -0x801];
	maxGamma = data[0x924 -0x801] - 1;
	if(maxGamma < 5 || maxGamma > 7)
	{
	    fprintf(stderr, "Error: Broken archive, maxGamma %d.\n",
		    maxGamma);
	    return 20;
	}
	if(data[0x892 -0x801] != (1<<maxGamma) ||
	   data[0x896 -0x801] != (8-maxGamma) ||
	   data[0x8c3 -0x801] != (2<<maxGamma)-1)
	{
	    fprintf(stderr, "Error: Broken archive, maxGamma (%d) mismatch.\n",
		    maxGamma);
	    return 20;
	}

	lrange = LRANGE;
	maxlzlen = MAXLZLEN;
	maxrlelen = MAXRLELEN;

	startEsc = data[0x84c -0x801];
	startAddr = data[0x84e -0x801] | (data[0x84f -0x801]<<8);
	escBits = data[0x85a -0x801];
	if(escBits < 0 || escBits > 8)
	{
	    fprintf(stderr, "Error: Broken archive, escBits %d.\n",
		    escBits);
	    return 20;
	}
	if(data[0x862 -0x801] != 8-escBits ||
	   data[0x86d -0x801] != escBits)
	{
	    fprintf(stderr, "Error: Broken archive, escBits (%d) mismatch.\n",
		    escBits);
	    return 20;
	}

	extraLZPosBits = data[0x8c9 -0x801];
	if(extraLZPosBits < 0 || extraLZPosBits > 4)
	{
	    fprintf(stderr, "Error: Broken archive, extraLZPosBits %d.\n",
		    extraLZPosBits);
	    return 20;
	}
	endAddr = 0x100 + (data[0x83a -0x801] | (data[0x83b -0x801]<<8));
	size    = endAddr - (data[0x901 -0x801] | (data[0x902 -0x801]<<8));
	headerSize = ((data[0x837 -0x801] | (data[0x838 -0x801]<<8))
			+ 0x100 - size - 0x801) & 0xffff;
	execAddr = data[0x8fd -0x801] | (data[0x8fe -0x801]<<8);

	memConf = data[0x8ed -0x801];
	cli = data[0x8fb -0x801];
	byteCodeVec = &data[sizeof(headerUncrunch) - 32 -2];
    }

    if((flags & F_STATS))
    {
	fprintf(stderr, "Start 0x%04lx, exec 0x%04lx, %s, $01=$%02x\n",
		startAddr, execAddr, (cli==0x58)?"cli":"sei", memConf);
	fprintf(stderr, "Escape bits %d, starting escape 0x%02lx\n",
		escBits, (startEsc<<(8-escBits)));
	fprintf(stderr, "Decompressor size %ld, max length %d, LZPOS LO bits %d\n",
		headerSize+2, 2<<maxGamma, extraLZPosBits+8);
    }


    outPointer = 0;
    up_SetInput(data + headerSize);
    while(1)
    {
	int sel;

#ifndef BIG
	if(startAddr + outPointer >= up_Byte + endAddr - size)
	{
	    if(!error)
		fprintf(stderr, "Error: Target %5ld exceeds source %5ld..\n",
			startAddr + outPointer, up_Byte + endAddr - size);
	    error++;
	}
	if(up_Byte > size + overlap)
	{
	    fprintf(stderr, "Error: No EOF symbol found (%d > %d).\n",
		    up_Byte, size + overlap);
	    error++;
	}
#endif /* BIG */

	if(escBits)
	    sel = up_GetBits(escBits);
	else
	    sel = startEsc;
	if(sel == startEsc)
	{
	    int lzPos, lzLen = up_GetValue(), i;

	    if(lzLen != 1)
	    {
		int lzPosHi = up_GetValue()-1, lzPosLo;

		if(lzPosHi == (2<<maxGamma)-2)
		    break; /* EOF */

		lzPosHi =
		    (lzPosHi<<extraLZPosBits) | up_GetBits(extraLZPosBits);
		lzPosLo = up_GetBits(8) ^ 0xff;
		lzPos = (lzPosHi<<8) | lzPosLo;
	    }
	    else
	    {
		if(up_GetBits(1))
		{
		    int rleLen, byteCode, byte;

		    if(!up_GetBits(1))
		    {
			int newEsc = up_GetBits(escBits);

			outBuffer[outPointer++] =
			    (startEsc<<(8-escBits)) | up_GetBits(8-escBits);
			startEsc = newEsc;
			if(outPointer >= OUT_SIZE)
			{
			    fprintf(stderr, "Error: Broken archive, "
				    "output buffer overrun at %d.\n",
				    outPointer);
			    return 20;
			}
			continue;
		    }
		    rleLen = up_GetValue();
		    if(rleLen >= (1<<maxGamma))
		    {
			rleLen = ((rleLen-(1<<maxGamma))<<(8-maxGamma)) |
			    up_GetBits(8-maxGamma);
			rleLen |= ((up_GetValue()-1)<<8);
		    }
		    byteCode = up_GetValue();
		    if(byteCode < 32)
			byte = byteCodeVec[byteCode];
		    else
		    {
			byte = ((byteCode-32)<<3) | up_GetBits(3);
		    }
/*
fprintf(stdout, "%5ld %5ld RLE %5d 0x%02x\n", outPointer, up_Byte, rleLen+1,
	byte);*/
		    for(i=0;i<=rleLen;i++)
		    {
			outBuffer[outPointer++] = byte;
			if(outPointer >= OUT_SIZE)
			{
			    fprintf(stderr, "Error: Broken archive, "
				    "output buffer overrun at %d.\n",
				    outPointer);
			    return 20;
			}
		    }
		    continue;
		}
		lzPos = up_GetBits(8) ^ 0xff;
	    }
/*fprintf(stdout, "%5ld %5ld LZ %3d 0x%04x\n",
	outPointer, up_Byte, lzLen+1, lzPos+1);*/
	    for(i=0;i<=lzLen;i++)
	    {
		if(outPointer - lzPos -1 < 0)
		{
		    fprintf(stderr, "Error: Broken archive, "
			    "LZ copy position underrun at %d (%d). "
			    "lzLen %d.\n",
			    outPointer, lzPos+1, lzLen+1);
		    return 20;
		}
		outBuffer[outPointer] = outBuffer[outPointer - lzPos - 1];
		outPointer++;
		if(outPointer >= OUT_SIZE)
		{
		    fprintf(stderr, "Error: Broken archive, "
			    "output buffer overrun at %d.\n",
			    outPointer);
		    return 20;
		}
	    }
	}
	else
	{
	    int byte = (sel<<(8-escBits)) | up_GetBits(8-escBits);
	    outBuffer[outPointer++] = byte;
	    if(outPointer >= OUT_SIZE)
	    {
		fprintf(stderr, "Error: Broken archive, "
			"output buffer overrun at %d.\n",
			outPointer);
		return 20;
	    }
	}
    }
    if(error)
	fprintf(stderr, "Error: Target exceeded source %5ld times.\n",
		error);

    if((file && (fp = fopen(file, "wb"))) || (fp = stdout))
    {
	unsigned char tmp[2];
	tmp[0] = startAddr & 0xff;
	tmp[1] = (startAddr >> 8);

	fwrite(tmp, 2, 1, fp);
	fwrite(outBuffer, outPointer, 1, fp);
	if(fp != stdout)
	    fclose(fp);

	timeused = clock() - timeused;
	if(!timeused)
	    timeused++;
	fprintf(stderr, "Decompressed %d bytes in %4.2f seconds (%4.2f kB/s)\n",
		outPointer,
		(double)timeused/CLOCKS_PER_SEC,
		(double)CLOCKS_PER_SEC*outPointer/timeused/1024.0);

	return error;
    }
    fprintf(stderr, "Could not open file \"%s\" for writing.\n", file);
    return 20;
}



int PackLz77(int lzsz, int flags, int *startEscape, int endAddr)
{
    int i, j, outlen, p, rleCur = 0, headerSize;
    int escape;
#ifdef HASH_COMPARE
    unsigned char *hashValue;
    unsigned char *a;
    int k;
#endif /* HASH_COMPARE */
    unsigned short *backSkip;

#ifdef BIG
    unsigned int *lastPair;
#else
    unsigned short *lastPair;
#endif /* BIG */

#ifdef BACKSKIP_FULL
#ifdef RESCAN
    int rescan = 0;
#endif /* RESCAN */
#endif /* BACKSKIP_FULL */

#ifdef HASH_STAT
    unsigned long compares = 0, hashChecks = 0, hashEqual = 0;
#endif /* HASH_STAT */

#ifdef COND_PROB
    static unsigned short P[3][256] = {0};
#endif /* COND_PROB */

#ifdef ESC_STATS
    static unsigned short EP[256][256] = {0};
#endif /* ESC_STAT */

    if(lzsz < 0 || lzsz > lrange)
    {
	fprintf(stderr, "LZ range must be from 0 to %d (was %d). Set to %d.\n",
		lrange, lzsz, lrange);
	lzsz = lrange;
    }
    if(lzsz > 65535)
    {
	fprintf(stderr,
		"LZ range must be from 0 to 65535 (was %d). Set to 65535.\n",
		lzsz);
	lzsz = 65535;
    }
    if(!lzsz)
	fprintf(stderr, "Warning: zero LZ range. Only RLE packing used.\n");

    InitRleLen();
    length = (int *)calloc(sizeof(int), inlen + 1);
    mode   = (unsigned char *)calloc(sizeof(unsigned char), inlen);
    rle    = (unsigned short *)calloc(sizeof(unsigned short), inlen);
    elr    = (unsigned short *)calloc(sizeof(unsigned short), inlen);
    lzlen  = (unsigned short *)calloc(sizeof(unsigned short), inlen);
    lzpos  = (unsigned short *)calloc(sizeof(unsigned short), inlen);
    newesc = (unsigned char *)calloc(sizeof(unsigned char), inlen);
#ifdef BACKSKIP_FULL
    backSkip  = (unsigned short *)calloc(sizeof(unsigned short), inlen);
#else
    backSkip  = (unsigned short *)calloc(sizeof(unsigned short), 65536);
#endif /* BACKSKIP_FULL */
#ifdef HASH_COMPARE
    hashValue = (unsigned char *)calloc(sizeof(unsigned char), inlen);
#endif /* HASH_COMPARE */
#ifdef BIG
    lastPair  = (unsigned int *)calloc(sizeof(unsigned int), 256*256);
#else
    lastPair  = (unsigned short *)calloc(sizeof(unsigned short), 256*256);
#endif /* BIG */


    /* error checking */
    if(!length || !mode || !rle || !elr || !lzlen || !lzpos || !newesc ||
	!lastPair || !backSkip
#ifdef HASH_COMPARE
	|| !hashValue
#endif /* HASH_COMPARE */
	)
    {
	fprintf(stderr, "Memory allocation failed!\n");
	goto errorexit;
    }

#ifdef HASH_COMPARE
    i = 0;
    j = 0;
    a = indata + inlen;
    for(p=inlen-1;p>=0;p--)
    {
	k = j;
	j = i;
	i = *--a;	/* Only one read per position */

	/* Without hash: 18.56%, end+middle: 12.68% */
	/* hashValue[p] = i*2 ^ j*3 ^ k*5; */ /* 8.56% */
	/* hashValue[p] = i ^ j*2 ^ k*3; */   /* 8.85% */
	/* hashValue[p] = i + j + k; */       /* 9.33% */
	/* hashValue[p] = i + j*2 + k*3; */   /* 8.25% */
	/* hashValue[p] = i*2 + j*3 + k*5; */ /* 8.29% */
	/* hashValue[p] = i*3 + j*5 + k*7; */ /* 7.95% */
	hashValue[p] = i*3 + j*5 + k*7; /* 7.95 % */
    }
#endif /* HASH_COMPARE */

    /* Detect all RLE and LZ77 jump possibilities */
    for(p=0;p<inlen;p++,rleCur--)
    {
#ifndef BIG
	if(!(p&511))
	{
	    fprintf(stderr, "\r%d ", p);
	    fflush(stderr);	/* for SAS/C */
	}
#endif /* BIG */
	/* check run-length code - must be done, LZ77 search needs it! */
	if(rleCur <= 0)
	{
	    /*
		There are so few RLE's and especially so few
		long RLE's that byte-by-byte is good enough.
	     */
	    unsigned char *a = indata + p;
	    int val = *a++; /* if this were uchar, it would go to stack..*/
	    int top = inlen - p;
	    int rlelen = 1;

	    /* Loop for the whole RLE */
	    while(rlelen<top && *a++ == (unsigned char)val
#ifdef BIG
		  && rlelen < 65535
#endif /* BIG */
		 )
	    {
		rlelen++;
	    }
#ifdef HASH_STAT
	    compares += rlelen;
#endif /* HASH_STAT */

	    if(rlelen>=2)
	    {
		rleHist[indata[p]]++;

		for(i=0;i<rlelen-1;i++)
		{
		    rle[p+i] = rlelen-i;
		    elr[p+i] = i;	/* For RLE backward skipping */
		}
		rleCur = rlelen;
		if(rlelen>maxlzlen)
		{
		    /* Jump over some unnecessary memory references */
		    p += rlelen - maxlzlen - 1;
		    rleCur -= rlelen - maxlzlen - 1;
		    continue;
		}
	    }
	}

	/* check LZ77 code */
	if(rle[p]<maxlzlen && p+rle[p]+1<inlen)
	{
	    int bot = p - lzsz, maxval, maxpos, rlep = rleCur;
#ifdef HASH_COMPARE
	    unsigned char hashCompare = hashValue[p];
#else
	    unsigned char valueCompare = indata[p+2];
#endif /* HASH_COMPARE */

	    /*
		There's always 1 equal byte, although it may
		not be marked as RLE.
	     */
	    if(rlep <= 0)
		rlep = 1;
	    if(bot < 0)
		bot = 0;
	    bot += (rlep-1);

	    /*
		First get the shortest possible match (if any).
		If there is no 2-byte match, don't look further,
		because there can't be a longer match.
	     */
	    i = (int)lastPair[ (indata[p]<<8) | indata[p+1] ] -1;
	    if(i>=0 && i>=bot)
	    {
		/* Got a 2-byte match at least */
		maxval = 2;
		maxpos = p-i;

		/*
		    A..AB	rlep # of A's, B is something else..

		    Search for bytes that are in p + (rlep-1), i.e.
		    the last rle byte ('A') and the non-matching one
		    ('B'). When found, check if the rle in the compare
		    position (i) is long enough (i.e. the same number
		    of A's at p and i-rlep+1).

		    There are dramatically less matches for AB than for
		    AA, so we get a huge speedup with this approach.
		    We are still guaranteed to find the most recent
		    longest match there is.
		 */

		i = (int)lastPair[ (indata[p+(rlep-1)]<<8) | indata[p+rlep]] -1;
		while(i>=bot && i>=rlep-1)
		{
		    /* Equal number of A's ? */
		    if(!(rlep-1) || rle[i+1 - rlep]==rlep)	/* 'head' matches */
		    {   /* rlep==1  (rlep-1)==0 */
			/* ivanova.run: 443517 rlep==1,
			   709846 rle[i+1-rlep]==rlep */

			/*
			    Check the hash values corresponding to the last
			    two bytes of the currently longest match and
			    the first new matching(?) byte. If the hash
			    values don't match, don't bother to check the
			    data itself.
			 */
#ifdef HASH_STAT
			hashChecks++;
#endif /* HASH_STAT */
			if(
#ifdef HASH_COMPARE
			   hashValue[i+maxval-rlep-1] == hashCompare
#else
			   indata[i+maxval-rlep-1+2] == valueCompare
#endif /* HASH_COMPARE */
			  )
			{
			    unsigned char *a = indata + i+2;	/* match  */
			    unsigned char *b = indata + p+rlep-1+2;	/* curpos */
			    unsigned char *c = indata + inlen;	/* memtop */
#ifdef HASH_STAT
			    hashEqual++;
#endif /* HASH_STAT */
			    /* the 2 first bytes ARE the same.. */
			    j = 2;
			    while(b!=c && *a++==*b++)
			    {
				j++;
			    }

#ifdef HASH_STAT
			    compares += j - 1;
#endif /* HASH_STAT */
			    if(j + rlep-1 > maxval)
			    {
				int tmplen = j+rlep-1, tmppos = p-i+rlep-1;
				if(tmplen > maxlzlen)
				    tmplen = maxlzlen;

				if(tmplen*8 - LenLz(tmplen, tmppos) >=
				   maxval*8 - LenLz(maxval, maxpos))
				{
				    maxval = tmplen;
				    maxpos = tmppos;
#ifdef HASH_COMPARE
				    hashCompare = hashValue[p+maxval-2];
#else
				    valueCompare = indata[p+maxval];
#endif /* HASH_COMPARE */
				}
#if 0
				else
				{
				    printf("@%5d %d*8 - LzLen(%d, %4x) == %d < ",
					   p, tmplen, tmplen, tmppos,
					   tmplen*8 - LenLz(tmplen, tmppos));
				    printf("%d*8 - LzLen(%d, %4x) == %d\n",
					   maxval, maxval, maxpos,
					   maxval*8 - LenLz(maxval, maxpos));
				}
#endif
				if(maxval == maxlzlen)
				    break;
			    }
			}
		    }
#ifdef BACKSKIP_FULL
		    if(backSkip[i] == 0)
			break; /* No previous occurrances (near enough) */
		    i -= (int)backSkip[i];
#else
		    if(backSkip[i & 0xffff] == 0)
			break; /* No previous occurrances (near enough) */
		    i -= (int)backSkip[i & 0xffff];
#endif /* BACKSKIP_FULL */
		}

		/*
		    If there is 'A' in the previous position also,
		    RLE-like LZ77 is possible, although rarely
		    shorter than real RLE.
		 */
		if(p && rle[p-1]-1 >= maxval)
		{
		    maxval = rle[p-1]-1;
		    maxpos = 1;
		}
		/*
		    Last, try to find as long as possible match
		    for the RLE part only.
		 */
		if(rlep > maxval)
		{
		    bot = p - lzsz;
		    if(bot < 0)
			bot = 0;

		    /* Note: indata[p] == indata[p+1] */
		    i = (int)lastPair[indata[p]*257] -1;
		    while(i>= 0 /*rlep-2*/ && i>=bot)
		    {
			if(elr[i] + 2 > maxval)
			{
			    maxval = min(elr[i] + 2, rlep);
			    maxpos = p - i + (maxval-2);
			    if(maxval == rlep)
				break; /* Got enough */
			}
			i -= elr[i];
#ifdef BACKSKIP_FULL
			if(backSkip[i] == 0)
			    break; /* No previous occurrances (near enough) */
			i -= (int)backSkip[i];
#else
			if(backSkip[i & 0xffff] == 0)
			    break; /* No previous occurrances (near enough) */
			i -= (int)backSkip[i & 0xffff];
#endif /* BACKSKIP_FULL */
		    }
		}
		if(p+maxval > inlen)
		{
		    fprintf(stderr,
			    "Error @ %d, lzlen %d, pos %d - exceeds inlen\n",
			    p, maxval, maxpos);
		    maxval = inlen - p;
		}
		if(maxval > 2 || (maxval==2 && maxpos<=256))
		{
		    if(maxpos < 0)
			fprintf(stderr, "Error @ %d, lzlen %d, pos %d\n",
				p, maxval, maxpos);
		    lzlen[p] = (maxval<maxlzlen)?maxval:maxlzlen;
		    lzpos[p] = maxpos;
		}
	    }
	}

	/* Update the two-byte history ('hash table') &
	   backSkip ('linked list') */
	if(p+1<inlen)
	{
	    int index = (indata[p]<<8) | indata[p+1];

#ifdef BACKSKIP_FULL
	    backSkip[p] = 0;
#else
	    backSkip[p & 0xffff] = 0;
#endif /* BACKSKIP_FULL */
	    if(lastPair[index] > 0 &&
	       p - (lastPair[index]-1) < 65536)
	    {
#ifdef BACKSKIP_FULL
		backSkip[p] = p - (lastPair[index]-1);
#else
		backSkip[p & 0xffff] = p - (lastPair[index]-1);
#endif /* BACKSKIP_FULL */
	    }
	    lastPair[index] = p+1;
	}
    }
    fprintf(stderr, "\rChecked: %d \n", p);
    fflush(stderr);	/* for SAS/C */


    /* Initialize the RLE selections */
    InitRle(flags);

    /* Check the normal bytes / all ratio */
    if((flags & F_AUTO))
    {
	int mb, mv;

	fprintf(stderr, "Selecting the number of escape bits.. ");
	fflush(stderr);	/* for SAS/C */

	/*
	    Absolute maximum number of escaped bytes with
	    the escape optimize is 2^-n, where n is the
	    number of escape bits used.

	    This worst case happens only on equal-
	    distributed normal bytes (01230123..).
	    This is why the typical values are so much smaller.
	 */

	mb = 0;
	mv = 8*OUT_SIZE;
	for(escBits=1;escBits<9;escBits++)
	{
	    int escaped, other = 0, c;

	    escMask = (0xff00>>escBits) & 0xff;

	    /* Find the optimum path for selected escape bits (no optimize) */
	    OptimizeLength(0);

	    /* Optimize the escape selections for this path & escBits */
	    escaped = OptimizeEscape(&escape, &other);

	    /* Compare value: bits lost for escaping -- bits lost for prefix */
	    c = (escBits+3)*escaped + other*escBits;
	    if((flags & F_STATS))
	    {
		fprintf(stderr, " %d:%d", escBits, c);
		fflush(stderr);	/* for SAS/C */
	    }
	    if(c < mv)
	    {
		mb = escBits;
		mv = c;
	    }
	    else
	    {
		/* minimum found */
		break;
	    }
	    if(escBits==4 && (flags & F_STATS))
		fprintf(stderr, "\n");
	}
	if(mb==1)	/* Minimum was 1, check 0 */
	{
	    int escaped;

	    escBits = 0;
	    escMask = 0;

	    /* Find the optimum path for selected escape bits (no optimize) */
	    OptimizeLength(0);
	    /* Optimize the escape selections for this path & escBits */
	    escaped = OptimizeEscape(&escape, NULL);

	    if((flags & F_STATS))
	    {
		fprintf(stderr, " %d:%d", escBits, 3*escaped);
		fflush(stderr);	/* for SAS/C */
	    }
	    if(3*escaped < mv)
	    {
		mb = 0;
		/* mv = 3*escaped; */
	    }
	}
	if((flags & F_STATS))
	    fprintf(stderr, "\n");

	fprintf(stderr, "Selected %d-bit escapes\n", mb);
	escBits = mb;
	escMask = (0xff00>>escBits) & 0xff;
    }

    if(!(flags & F_NOOPT))
    {
	fprintf(stderr, "Optimizing LZ77 and RLE lengths...");
 	fflush(stderr);	/* for SAS/C */
    }

    OptimizeLength((flags & F_NOOPT)?0:1);	/* Find the optimum path (optimize) */
    if((flags & F_STATS))
    {
	if(!(flags & F_NOOPT))
	    fprintf(stderr, " gained %d units.\n", lzopt/8);
    }
    else
	fprintf(stderr, "\n");

    if(1 || (flags & F_AUTOEX))
    {
	long lzstat[5] = {0,0,0,0,0}, i, cur = 0, old = extraLZPosBits;

	fprintf(stderr, "Selecting LZPOS LO length.. ");
	fflush(stderr);	/* for SAS/C */

	for(p=0;p<inlen;)
	{
	    switch(mode[p])
	    {
	    case 1: /* lz */
		extraLZPosBits = 0;
		lzstat[0] += LenLz(lzlen[p], lzpos[p]);
		extraLZPosBits = 1;
		lzstat[1] += LenLz(lzlen[p], lzpos[p]);
		extraLZPosBits = 2;
		lzstat[2] += LenLz(lzlen[p], lzpos[p]);
		extraLZPosBits = 3;
		lzstat[3] += LenLz(lzlen[p], lzpos[p]);
		extraLZPosBits = 4;
		lzstat[4] += LenLz(lzlen[p], lzpos[p]);
		p += lzlen[p];
		break;

	    case 2: /* rle */
		p += rle[p];
		break;

	    default: /* normal */
		p++;
		break;
	    }
	}
	for(i=0;i<5;i++)
	{
	    if((flags & F_STATS))
		fprintf(stderr, " %ld:%ld", i + 8, lzstat[i]);

	    if(lzstat[i] < lzstat[cur])	/* first time around (lzstat[0] < lzstat[0]) */
		cur = i;
	}
	extraLZPosBits = (flags & F_AUTOEX)?cur:old;

	if((flags & F_STATS))
	    fprintf(stderr, "\n");

	fprintf(stderr, "Selected %d-bit LZPOS LO part\n",
		extraLZPosBits + 8);
	if(cur != old)
	{
	    fprintf(stderr,
		    "Note: Using option -p%ld you may get better results.\n",
		    cur);
	}
	if(extraLZPosBits != old)
	    OptimizeLength((flags & F_NOOPT)?0:1);	/* Find the optimum path (optimize) */
    }
    if(1)
    {
	long stat[3] = {0,0,0};

	for(p=0;p<inlen;)
	{
	    switch(mode[p])
	    {
	    case 1: /* lz */
		if(lzlen[p] > (1<<maxGamma))
		    stat[0]++;
		p += lzlen[p];
		break;

	    case 2: /* rle */
		if(rle[p] > (1<<(maxGamma-1)))
		{
		    if(rle[p] <= (1<<maxGamma))
			stat[1]++;
		    else
			stat[2]++;
		}
		p += rle[p];
		break;

	    default: /* normal */
		p++;
		break;
	    }
	}
	/* TODO: better formula.. */
	if(maxGamma < 7 && stat[0] + stat[1] > 10)
	{
	    fprintf(stderr,
		    "Note: Using option -m%ld you may get better results.\n",
		    maxGamma+1);
	}
	if(maxGamma > 5 && stat[0] + stat[1] < 5)
	{
	    fprintf(stderr,
		    "Note: Using option -m%ld you may get better results.\n",
		    maxGamma-1);
	}
    }

    /* Optimize the escape selections */
    OptimizeEscape(&escape, NULL);
    if(startEscape)
	*startEscape = escape;
    OptimizeRle(flags);	/* Retune the RLE selections */

#ifdef ENABLE_VERBOSE
    if((flags & F_VERBOSE))
    {
	int oldEscape = escape;
	printf("normal RLE  LZLEN LZPOS(absolute)\n\n");

	for(p=0;p<inlen;)
	{
	    switch(mode[p])
	    {
	    case 0:
	    case 3:
		p++;
		break;
	    case 1:
		mode[p - lzpos[p]] |= 4; /* Was referred to by lz77 */
		p += lzlen[p];
		break;
	    case 2:
		p += rle[p];
	    default:
		p++;
		break;
	    }
	}

	for(j=0,p=0;p<inlen;p++)
	{
	    switch(mode[p])
	    {
	    case 4:
	    case 7:
	    case 0:
	    case 3:
		if(j==p)
		{
		    if((indata[p] & escMask) == escape)
		    {
			escape = newesc[p];
			printf("");
			/*printf("%+03d", -escBits);*/
		    }
		    else
		    {
			printf(">");
			/*printf(">%+03d", 0);*/
		    }
		    j += 1;
		}
		else
		    printf(" ");
		printf("*001*  %03d   %03d  %04x(%04x)  %02x %s %02x\n",
			rle[p], lzlen[p], lzpos[p], p-lzpos[p], indata[p],
			(mode[p]&4)?"#":" ", newesc[p]);
		break;
	    case 5:
	    case 1:
		if(j==p)
		{
		    printf(">");
		    /*printf(">%+03d", 8*lzlen[p]-LenLz(lzlen[p], lzpos[p]));*/
		    j += lzlen[p];
		}
		else
		    printf(" ");
		printf(" 001   %03d  *%03d* %04x(%04x)  %02x %s\n",
			rle[p], lzlen[p], lzpos[p], p-lzpos[p], indata[p],
			(mode[p]&4)?"#":" ");
		break;
	    case 6:
	    case 2:
		if(j==p)
		{
		    printf(">");
		    /*printf(">%+03d", 8*rle[p]-LenRle(rle[p], indata[p]));*/
		    j += rle[p];
		}
		else
		    printf(" ");
		printf(" 001  *%03d*  %03d  %04x(%04x)  %02x %s\n",
			rle[p], lzlen[p], lzpos[p], p-lzpos[p], indata[p],
			(mode[p]&4)?"#":" ");
		break;
	    default:
		j++;
		break;
	    }
	    mode[p] &= 3;
	}
	escape = oldEscape;
    }
#endif /* ENABLE_VERBOSE */

#ifdef SHOW_GREEDY
    if((flags & F_STATS))
    {
	int oldEscape = escape;

	outlen = 0;
	for(p=0;p<inlen;)
	{
	    if(rle[p] > lzlen[p])
	    {
		if(rle[p] <= (1<<maxGamma))
		    timesSRle++;
		else
		    timesLRle++;
		timesRle++;
		outlen += LenRle(rle[p], indata[p]);
		p += rle[p];
	    }
	    else if(lzlen[p])
	    {
		timesLz++;
		outlen += LenLz(lzlen[p], lzpos[p]);
		p += lzlen[p];
	    }
	    else
	    {
		if((indata[p] & escMask)==escape)
		{
		    timesEscaped++;
		    escape = newesc[p];
		    outlen += escBits + 5;
		}
		else
		    timesNormal++;
		outlen += 8;
		p++;
	    }
	}
	outlen += 24;
	outlen = (outlen+7)/8 + (sizeof(headerUncrunchNoWrap)+rleUsed-31);
	fprintf(stderr, "Greedy packing (no optimization)\n");
	fprintf(stderr, "In: %d, out: %d, ratio: %f%%, gained: %f%%, %d escape bit%s\n",
		inlen, outlen, (double)outlen*100.0/(double)inlen,
		100.0 - (double)outlen*100.0/(double)inlen, escBits, (escBits==1)?"":"s" );
	fprintf(stderr, "Times  RLE: %d (%d+%d), LZ: %d, Esc: %d (normal: %d)\n",
		timesRle, timesSRle, timesLRle,
		timesLz, timesEscaped, timesNormal);
	timesRle = timesSRle = timesLRle = timesLz = timesEscaped = timesNormal = 0;
	escape = oldEscape;
    }
#endif /* SHOW_GREEDY */

    for(p=0;p<inlen;)
    {
	switch(mode[p])
	{
	case 0: /* normal */
	case 3:
#ifdef COND_PROB
	    if(p)
		P[0][indata[p-1]]++;
#endif /* COND_PROB */
	    length[p] = outPointer;

#ifdef ESC_STATS
	    if((indata[p] & escMask) == escape)
		EP[escape>>(8-escBits)][newesc[p]>>(8-escBits)]++;
#endif /* ESC_STATS */

	    OutputNormal(&escape, indata+p, newesc[p]);
	    p++;
	    break;

	case 1: /* lz77 */

#ifdef BACKSKIP_FULL
	    /* Not possible for smaller backSkip table
	       (the table is overwritten during previous use) */
#ifdef RESCAN
	    /* Re-search matches to get the closest one */
	    if(lzlen[p] > 2 && lzlen[p] > rle[p])
	    {
		int bot = p - lzpos[p] + 1, i;
		unsigned short rlep = rle[p];

		if(!rlep)
		    rlep = 1;
		if(bot < 0)
		    bot = 0;
		bot += (rlep-1);

		i = p - (int)backSkip[p];
		while(i>=bot && i>=rlep-1)
		{
		    /* Equal number of A's ? */
		    if(rlep==1 || rle[i-rlep+1]==rlep)	/* 'head' matches */
		    {
			unsigned char *a = indata + i+1;	/* match  */
			unsigned char *b = indata + p+rlep-1+1;	/* curpos */
			unsigned char *c = indata + inlen;	/* memtop */

			j = 1;
			while(b!=c && *a++==*b++)
			{
			    j++;
			}
			if(j + rlep-1 >= lzlen[p])
			{
			    int tmppos = p-i+rlep-1;

			    rescan +=
				LenLz(lzlen[p], lzpos[p]) -
				LenLz(lzlen[p], tmppos);
#if 0
			    printf("@%d, lzlen %d, pos %04x -> %04x\n",
				    p, lzlen[p], lzpos[p], tmppos);
			    for(i=-1;i<=lzlen[p];i++)
			    {
				printf("%02x %02x %02x  ", indata[p+i], indata[p-lzpos[p]+i],
					indata[p-tmppos+i]);
			    }
			    printf("\n");
#endif
			    lzpos[p] = tmppos;
			    break;
			}
		    }
		    if(backSkip[i] == 0)
			break; /* No previous occurrances (near enough) */
		    i -= (int)backSkip[i];
		}
	    }
#endif /* RESCAN */
#endif /* BACKSKIP_FULL */

#ifdef COND_PROB
	    if(p)
		P[1][indata[p-1]]++;
#endif /* COND_PROB */
	    for(i=0;i<lzlen[p];i++)
		length[p+i] = outPointer;
	    OutputLz(&escape, lzlen[p], lzpos[p], indata+p-lzpos[p], p);
	    p += lzlen[p];
	    break;

	case 2: /* rle */
#ifdef COND_PROB
	    if(p)
		P[2][indata[p-1]]++;
#endif /* COND_PROB */
	    for(i=0;i<rle[p];i++)
		length[p+i] = outPointer;
	    OutputRle(&escape, indata+p, rle[p]);
	    p += rle[p];
	    break;

	default: /* Error Flynn :-) */
	    p++;
	    fprintf(stderr, "Internal error: mode %d\n", mode[p]);
	    break;
	}
    }
    OutputEof(&escape);

    /* xxxxxxxxxxxxxxxxxxx uncompressed */
    /*   yyyyyyyyyyyyyyyyy compressed */
    /* zzzz                */

    i = inlen;
    for(p=0;p<inlen;p++)
    {
	int pos = (inlen - outPointer) + (int)length[p] - p;
	i = min(i, pos);
    }
    if(i<0)
	reservedBytes = -i + 2;
    else
	reservedBytes = 0;
    
#ifndef BIG
    if(endAddr + reservedBytes + 3 > 0x10000)
	headerSize = sizeof(headerUncrunch) + rleUsed - 31;
    else
#endif /* BIG */
	headerSize = sizeof(headerUncrunchNoWrap) + rleUsed - 31;

    outlen = outPointer + headerSize;	/* unpack code */
    fprintf(stderr, "In: %d, out: %d, ratio: %5.2f%% (%4.2f b/B), gained: %5.2f%%, %d escape bit%s\n",
	    inlen, outlen, (double)outlen*100.0/(double)inlen + 0.005,
	    8.0*(double)outlen/(double)inlen + 0.005,
	    100.0 - (double)outlen*100.0/(double)inlen + 0.005,
	    escBits, (escBits==1)?"":"s" );

    fprintf(stderr, "Gained RLE: %d (S+L:%d+%d), LZ: %d, Esc: %d, Decompressor: %d\n",
	    gainedRle/8, gainedSRle/8, gainedLRle/8,
	    gainedLz/8, -gainedEscaped/8, -headerSize);

    fprintf(stderr, "Times  RLE: %d (%d+%d), LZ: %d, Esc: %d (normal: %d)\n",
	    timesRle, timesSRle, timesLRle,
	    timesLz, timesEscaped, timesNormal);

    if((flags & F_STATS))
    {
	const char *ll[] = {"2", "3-4", "5-8", "9-16", "17-32", "33-64", "65-128",
			    "129-256"};

	fprintf(stderr, "(Gained by RLE Code: %d, LZPOS LO Bits %d, maxLen: %d, tag bit/prim. %4.2f)\n",
		gainedRlecode/8 - rleUsed, extraLZPosBits + 8, (2<<maxGamma),
		(double)((timesRle+timesLz)*escBits +
			 timesEscaped*(escBits + 3))/
		(double)(timesRle+timesLz+timesNormal) + 0.0049);

	fprintf(stderr, "   LZPOS HI+2 LZLEN S-RLE RLEcode\n");
	fprintf(stderr, "   ------------------------------\n");
	for(i=0;i<=maxGamma;i++)
	{
	    fprintf(stderr, "%-7s %5d %5d", ll[i],
		    lenStat[i][0], lenStat[i][1]);
	    if(i<maxGamma)
		fprintf(stderr, " %5d", lenStat[i][2]);
	    else
		fprintf(stderr, "     -");

	    if(i<6)
		fprintf(stderr, "   %5d%s\n", lenStat[i][3], (i==5)?"*":"");
	    else
		fprintf(stderr, "       -\n");
	}
#ifdef BACKSKIP_FULL
#ifdef RESCAN
	fprintf(stderr, "LZ77 rescan gained %d bytes\n", rescan/8);
#endif /* RESCAN */
#endif /* BACKSKIP_FULL */


#ifdef HASH_STAT
#ifdef HASH_COMPARE
	fprintf(stderr, "Hash Checks: %ld (%ld, %4.2f%% equal), RLE/LZ compares: %ld\n",
		hashChecks, hashEqual, 100.0*(double)hashEqual/(double)hashChecks,
		compares);
#else
	fprintf(stderr, "Value Checks: %ld (%ld, %4.2f%% equal), RLE/LZ compares: %ld\n",
		hashChecks, hashEqual, 100.0*(double)hashEqual/(double)hashChecks,
		compares);
#endif /* HASH_COMPARE */
#endif /* HASH_STAT */

#ifdef COND_PROB
	j = 0;
	for(i=0;i<256;i++)
	{
	    int z = P[0][i] + P[1][i] + P[2][i];
	    double p0 = (double)P[0][i]/(double)z;
	    double p1 = (double)P[1][i]/(double)z;
	    double p2 = (double)P[2][i]/(double)z;
	    double q = -log(p0)*p0/log(2.0) -log(p1)*p1/log(2.0) -log(p2)*p2/log(2.0);

	    if(z)
	    {
		printf("%3d %0.5f %0.5f %0.5f  %0.5f %6d %5.0f\n",
			i, p0, p1, p2, q, z, (double)q*z);
		j += (int)((double)q*z);
	    }
	}
	printf("%d bits\n", j);
#endif /* COND_PROB */
#ifdef ESC_STATS
	printf("CurEsc      newEsc");
	for(i=(1<<escBits)-1;i>=0;i--)
	{
	    printf("%3d      ", i);
	}
	printf("\n");
	for(p=(1<<escBits)-1;p>=0;p--)
	{
	    j = 0;
	    for(i=(1<<escBits)-1;i>=0;i--)
	    {
		j += EP[p][i];
	    }
	    printf("%3d: %6d  ", p, j);
	    for(i=(1<<escBits)-1;i>=0;i--)
	    {
		double p0 = 0.0;

		if(j)
		{
		    p0 = 100.0*(double)EP[p][i]/(double)j;
		}
		printf("  %6.2f%%", p0);
	    }
	    printf("\n");
	}
#endif /* ESC_STATS */
    }

errorexit:
    if(rle)
	free(rle);
    if(elr)
	free(elr);
    if(lzlen)
	free(lzlen);
    if(lzpos)
	free(lzpos);
    if(length)
	free(length);
    if(mode)
	free(mode);
    if(newesc)
	free(newesc);
    if(lastPair)
	free(lastPair);
    if(backSkip)
	free(backSkip);
#ifdef HASH_COMPARE
    if(hashValue)
	free(hashValue);
#endif /* HASH_COMPARE */
    return 0;
}



int main(int argc, char *argv[])
{
    int n, execAddr = -1, ea = -1, newlen, startAddr = 0x258, startEscape;
    int flags = 0, lzlen = -1, buflen;
    char *fileIn = NULL, *fileOut = NULL;
    FILE *infp;
    unsigned char tmp[2];
    unsigned long timeused = clock();

lrange = LRANGE;
maxlzlen = MAXLZLEN;
maxrlelen = MAXRLELEN;
    InitValueLen();

    flags |= (F_AUTO | F_AUTOEX);
    for(n=1;n<argc;n++)
    {
	if(argv[n][0]=='-')
	{
	    int i = 1;
	    char *val, *tmp, c;
	    long tmpval;

	    while(argv[n][i])
	    {
		switch(argv[n][i])
		{
		case 'u':
		    flags |= F_UNPACK;
		    break;

		case 'd':	/* Raw - no loading address */
		    flags |= F_SKIP;
		    break;

		case 'n':	/* noopt, no rle/lzlen optimization */
		    flags |= F_NOOPT;
		    break;

		case 's':
		    flags |= F_STATS;
		    break;

#ifdef ENABLE_VERBOSE
		case 'v':
		    flags |= F_VERBOSE;
		    break;
#endif /* ENABLE_VERBOSE */

		case 'f':
		    flags |= F_2MHZ;
		    break;

		case 'h':
		case '?':
		    flags |= F_ERROR;
		    break;

		case 'r':
		case 'x':
		case 'm':
		case 'e':
		case 'p':
		case 'l':
		    c = argv[n][i]; /* Remember the option */
		    if(argv[n][i+1])
		    {
			val = argv[n]+i+1;
		    }
		    else if(n+1 < argc)
		    {
			val = argv[n+1];
			n++;
		    }
		    else
		    {
			flags |= F_ERROR;
			break;
		    }

		    i = strlen(argv[n])-1;
		    tmpval = strtol(val, &tmp, 0);
		    if(*tmp)
		    {
			fprintf(stderr,
				"Error: invalid number: \"%s\"\n", val);
			flags |= F_ERROR;
			break;
		    }

		    switch(c)
		    {
		    case 'r':
			lzlen = tmpval;
			break;
		    case 'x':
			ea = tmpval;
			break;
		    case 'm':
			maxGamma = tmpval;
			if(maxGamma < 5 || maxGamma > 7)
			{
			    fprintf(stderr, "Max length must be 5..7!\n");
			    flags |= F_ERROR;
			    maxGamma = 6;
			}
lrange = LRANGE;
maxlzlen = MAXLZLEN;
maxrlelen = MAXRLELEN;

			InitValueLen();
			break;
		    case 'e':
			escBits = tmpval;
			if(escBits < 0 || escBits > 8)
			{
			    fprintf(stderr, "Escape bits must be 0..8!\n");
			    flags |= F_ERROR;
			}
			else
			    flags &= ~F_AUTO;
			escMask = (0xff00>>escBits) & 0xff;
			break;
		    case 'p':
			extraLZPosBits = tmpval;
			if(extraLZPosBits < 0 || extraLZPosBits > 4)
			{
			    fprintf(stderr, "Extra LZ-pos bits must be 0..4!\n");
			    flags |= F_ERROR;
			}
			else
			    flags &= ~F_AUTOEX;
			break;
		    case 'l':
			startAddr = tmpval;
			if(startAddr < 0 || startAddr > 0xffff)
			{
			    fprintf(stderr, "Load address must be 0..0xffff!\n");
			    flags |= F_ERROR;
			}
			break;
		    }
		    break;

		default:
		    fprintf(stderr, "Error: Unknown option \"%c\"\n",
			    argv[n][i]);
		    flags |= F_ERROR;
		}
		i++;
	    }
	}
	else
	{
	    if(!fileIn)
	    {
		fileIn = argv[n];
	    }
	    else if(!fileOut)
	    {
		fileOut = argv[n];
	    }
	    else
	    {
		fprintf(stderr, "Only two filenames wanted!\n");
		flags |= F_ERROR;
	    }
	}
    }

    if((flags & F_ERROR))
    {
	fprintf(stderr, "Usage: %s [-<flags>] [<infile> [<outfile>]]\n",
		argv[0]);
	fprintf(stderr,
		"\t d         data (no loading address)\n"
		"\t l<val>    set load address\n"
		"\t x<val>    execution address\n"
		"\t e<val>    escape bits\n"
		"\t r<val>    lz search range\n"
		"\t f         enable fast mode for C128 (2 MHz)\n"
		"\t n         no optimization\n"
		"\t s         stats\n"
#ifdef ENABLE_VERBOSE
		"\t v         verbose\n"
#endif /* ENABLE_VERBOSE */
		"\t p<val>    extralzposbits\n"
		"\t m<val>    max len 5..7 (2*2^5..2*2^7)\n"
		"\t u         unpack\n");
	return EXIT_FAILURE;
    }

    if(lzlen == -1)
	lzlen = DEFAULT_LZLEN;

    if(fileIn)
    {
	if(!(infp = fopen(fileIn, "rb")))
	{
	    fprintf(stderr, "Could not open %s for reading!\n", fileIn);
	    return EXIT_FAILURE;
	}
    }
    else
    {
	fprintf(stderr, "Reading from stdin\n");
	fflush(stderr);	/* for SAS/C */
	infp = stdin;
    }

    if(!(flags & F_SKIP))
    {
	fread(tmp, 1, 2, infp);
	startAddr = tmp[0] + 256*tmp[1];
    }

    /* Read in the data */
    inlen = 0;
    buflen = 0;
    indata = NULL;
    while( 1 )
    {
	if(buflen < inlen + lrange)
	{
	    unsigned char *tmp = realloc(indata, buflen + lrange);

	    if(!tmp)
	    {
		free(indata);
		return 20;
	    }
	    indata = tmp;
	    buflen += lrange;
	}
	newlen = fread(indata + inlen, 1, lrange, infp);
	if(newlen <= 0)
	    break;
	inlen += newlen;
    }

    if(infp != stdin)
	fclose(infp);

    if((flags & F_UNPACK))
    {
	n = UnPack(startAddr, indata, fileOut, flags);

	if(indata)
	    free(indata);
	return n;
    }

    if(startAddr < 0x258
#ifndef BIG
	|| startAddr + inlen -1 > 0xffff
#endif /* BIG */
      )
    {
	fprintf(stderr,
		"Only programs from 0x0258 to 0xffff can be compressed\n");
	fprintf(stderr, "(the input file is from 0x%04x to 0x%04x)\n",
		startAddr, startAddr+inlen-1);
	if(indata)
	    free(indata);
	return EXIT_FAILURE;
    }

    if(startAddr<=0x801)
    {
	for(n=startAddr-0x801;n<startAddr-0x801+60;n++)
	{
	    if(indata[n]==0x9e) /* SYS token */
	    {
		execAddr = 0;
		n++;
		/* Skip spaces and parens */
		while(indata[n]=='(' || indata[n]==' ')
		   n++;

		while(indata[n]>='0' && indata[n]<='9')
		{
		    execAddr = execAddr * 10 + indata[n++] - '0';
		}
		break;
	    }
	}
    }

    if(ea!=-1)
    {
	if(execAddr!=-1 && ea!=execAddr)
	    fprintf(stderr, "Discarding execution address 0x%04x=%d\n",
		    execAddr, execAddr);
	execAddr = ea;
    }
    else if(execAddr < startAddr || execAddr >= startAddr+inlen)
    {
	fprintf(stderr, "Note! The execution address was not detected correctly!\n");
	fprintf(stderr, "      Use the -x option to set the execution address.\n");
    }
    fprintf(stderr, "Load address 0x%04x=%d, Last byte 0x%04x=%d\n",
	    startAddr, startAddr, startAddr+inlen-1, startAddr+inlen-1);
    fprintf(stderr, "Exec address 0x%04x=%d\n", execAddr, execAddr);

    n = PackLz77(lzlen, flags, &startEscape, startAddr + inlen);
    if(!n)
    {
	int endAddr = startAddr + inlen;

	/* Move the end address for files that got expanded */
	if(0x801 + sizeof(headerUncrunch) + outPointer > endAddr)
	{
	    endAddr = 0x801 + sizeof(headerUncrunch) + outPointer;
	}
	/* 3 bytes reserved for EOF */
	/* bytes reserved for temporary data expansion (escaped chars) */
	endAddr += 3 + reservedBytes;

#ifdef BIG
	endAddr = 0x10000;
#endif /* BIG */
	if((flags & F_STATS))
	{
	    if(endAddr > 0x10000)
	    {
		fprintf(stderr, "%s uses the memory $4b-$%02x, $c3/$c4, $f7-$1aa, "
			"$200-$253 and $801-$%04x.\n%d bytes reserved.\n",
			fileOut, endAddr - 0x10000 + 0x4b,
			endAddr-1, reservedBytes);
	    }
	    else
	    {
		fprintf(stderr, "%s uses the memory $4b, $c3/$c4, $f7-$1aa, "
			"$200-$253 and $801-$%04x.\n%d bytes reserved.\n",
			fileOut, endAddr-1, reservedBytes);
	    }
	}

	SavePack(outBuffer, outPointer, fileOut,
		 startAddr, execAddr, startEscape, rleValues,
		 endAddr, extraLZPosBits, (flags & F_2MHZ)?1:0);

	timeused = clock()-timeused;
	if(!timeused)
	    timeused++;
	fprintf(stderr, "Compressed %d bytes in %4.2f seconds (%4.2f kB/sec)\n",
		inlen,
		(double)timeused/CLOCKS_PER_SEC,
		(double)CLOCKS_PER_SEC*inlen/timeused/1024.0);
    }

    if(indata)
	free(indata);
    return n;
}


