added lzsa

6 years ago · 9799120899
95 changed files with 17933 additions and 1 deletions
--- a/Tools/unix/Makefile
+++ b/Tools/unix/Makefile
@ -9,7 +9,7 @@ ifeq ($(UNAME), Darwin)
 	SUFFIX=osx
 endif
 SUBDIRS= bst uz80as zx cpmtools bin2asm
 SUBDIRS= bst uz80as zx cpmtools bin2asm lzsa
 all:
 	@for i in $(SUBDIRS) ; do \
--- a/Tools/unix/lzsa/BlockFormat_LZSA1.md
+++ b/Tools/unix/lzsa/BlockFormat_LZSA1.md
@ -0,0 +1,65 @@
 # Block data format (LZSA1)
 Blocks encoded as LZSA1 are composed from consecutive commands. Each command follows this format:
 * token: <O|LLL|MMMM>
 * optional extra literal length
 * literal values
 * match offset low
 * optional match offset high
 * optional extra encoded match length
 **token**
 The token byte is broken down into three parts:
    7 6 5 4 3 2 1 0
    O L L L M M M M
 * L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length'
 * M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7.
 * O: set for a 2-bytes match offset, clear for a 1-byte match offset
 **optional extra literal length**
 If the literals length is 7 or more, the 'L' bits in the token form the value 7, and an extra byte follows here, with three possible types of value:
 * 0-248: the value is added to the 7 stored in the token, to compose the final literals length. For instance a length of 206 will be stored as 7 in the token + a single byte with the value of 199, as 7 + 199 = 206.
 * 250: a second byte follows. The final literals value is 256 + the second byte. For instance, a literals length of 499 is encoded as 7 in the token, a byte with the value of 250, and a final byte with the value of 243, as 256 + 243 = 499.
 * 249: a second and third byte follow, forming a little-endian 16-bit value. The final literals value is that 16-bit value. For instance, a literals length of 1024 is stored as 7 in the token, then byte values of 249, 0 and 4, as (4 * 256) = 1024.
 The extension byte values are chosen so that all three cases can be detected on 8-bit CPUs with a simple addition and overflow check.
 **literal values**
 Literal bytes, whose number is specified by the literals length, follow here. There can be zero literals in a command.
 Important note: for blocks that are part of a stream, the last command in a block ends here, as it always contains literals only. For raw blocks, the last command does contain the match offset and match length, see the note below for EOD detection.
 **match offset low**
 The low 8 bits of the match offset follows.
 **optional match offset high**
 If the 'O' bit (bit 7) is set in the token, the high 8 bits of the match offset follow, otherwise they are understood to be all set to 1. For instance, a short offset of 0x70 is interpreted as 0xff70.
 **important note regarding match offsets: stored as negative values**
 Note that the match offset is negative: it is added to the current decompressed location and not substracted, in order to locate the back-reference to copy.
 **optional extra encoded match length**
 If the encoded match length is 15 or more, the 'M' bits in the token form the value 15, and an extra byte follows here, with three possible types of value.
 * 0-237: the value is added to the 15 stored in the token. The final value is 3 + 15 + this byte.
 * 239: a second byte follows. The final match length is 256 + the second byte.
 * 238: a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is that 16-bit value.
 Again, the extension byte values are chosen so that all cases can be detected with a simple addition and overflow check on 8-bit CPUs.
 # End Of Data detection for raw blocks
 When the LZSA1 block is part of a stream (see StreamFormat.md), as previously mentioned, the block ends after the literal values of the last command, without a match offset or match length.
 However, in a raw LZSA1 block, the last command does include a 1-byte match offset (set to zero) and a match length. The match length is encoded as a long zero: the 'M' bits in the token form the value 15, then an extra match length byte is present, with the value 238 ("two match length bytes follow"). Finally, a two-byte zero match length follows, indicating the end of the block. EOD is the only time a zero match length (which normally would indicate a copy of 3 bytes) is encoded as a large 2-byte match value. This allows the EOD test to exist in a rarely used code branch.
--- a/Tools/unix/lzsa/BlockFormat_LZSA2.md
+++ b/Tools/unix/lzsa/BlockFormat_LZSA2.md
@ -0,0 +1,89 @@
 # Block data format (LZSA2)
 Blocks encoded as LZSA2 are composed from consecutive commands. Each command follows this format:
 * token: <XYZ|LL|MMM>
 * optional extra literal length
 * literal values
 * match offset
 * optional extra encoded match length
 **token**
 The token byte is broken down into three parts:
    7 6 5 4 3 2 1 0
    X Y Z L L M M M
 * L: 2-bit literals length (0-2, or 3 if extended). If the number of literals for this command is 0 to 2, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 3 is encoded and extra nibbles or bytes follow as 'optional extra literal length'
 * M: 3-bit encoded match length (0-6, or 7 if extended). Likewise, if the encoded match length for this command is 0 to 6, it is directly stored, otherwise 7 is stored and extra nibbles or bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 2 bytes. For instance, an actual match length of 5 bytes to be copied, is encoded as 3.
 * XYZ: 3-bit value that indicates how to decode the match offset
 **optional extra literal length**
 If the literals length is 3 or more, the 'L' bits in the token form the value 3, and an extra nibble is read:
 * 0-14: the value is added to the 3 stored in the token, to compose the final literals length.
 * 15: an extra byte follows
 If an extra byte follows, it can have two possible types of value:
 * 0-237: 18 is added to the value (3 from the token + 15 from the nibble), to compose the final literals length. For instance a length of 206 will be stored as 3 in the token + a nibble with the value of 15 + a single byte with the value of 188.
 * 239: a second and third byte follow, forming a little-endian 16-bit value. The final literals value is that 16-bit value. For instance, a literals length of 1027 is stored as 3 in the token, a nibble with the value of 15, then byte values of 239, 3 and 4, as 3 + (4 * 256) = 1027.
 **literal values**
 Literal bytes, whose number is specified by the literals length, follow here. There can be zero literals in a command.
 Important note: for blocks that are part of a stream, the last command in a block ends here, as it always contains literals only. For raw blocks, the last command does contain the match offset and match length, see the note below for EOD detection.
 **match offset**
 The match offset is decoded according to the XYZ bits in the token
    XYZ
    00Z 5-bit offset: read a nibble for offset bits 1-4 and use the inverted bit Z of the token as bit 0 of the offset. set bits 5-15 of the offset to 1.
    01Z 9-bit offset: read a byte for offset bits 0-7 and use the inverted bit Z for bit 8 of the offset. set bits 9-15 of the offset to 1.
    10Z 13-bit offset: read a nibble for offset bits 9-12 and use the inverted bit Z for bit 8 of the offset, then read a byte for offset bits 0-7. set bits 13-15 of the offset to 1.
    110 16-bit offset: read a byte for offset bits 8-15, then another byte for offset bits 0-7.
    111 repeat offset: reuse the offset value of the previous match command.
 The bit ordering and inversion helps optimize the decoder for size and speed on 8-bit CPUs.
 **important note regarding match offsets: stored as negative values**
 Note that the match offset is negative: it is added to the current decompressed location and not substracted, in order to locate the back-reference to copy. For this reason, as already indicated, unexpressed offset bits are set to 1 instead of 0.
 **optional extra encoded match length**
 If the encoded match length is 7 or more, the 'M' bits in the token form the value 7, and an extra nibble is read:
 * 0-14: the value is added to the 3 stored in the token, and then the minmatch of 2 is added, to compose the final match length.
 * 15: an extra byte follows
 If an extra byte follows here, it can have two possible types of value:
 * 0-231: 24 is added to the value (7 from the token + 15 from the nibble + minmatch of 2), to compose the final match length. For instance a length of 150 will be stored as 7 in the token + a nibble with the value of 15 + a single byte with the value of 126.
 * 233: a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is that 16-bit value.
 # End Of Data detection for raw blocks
 When the LZSA2 block is part of a stream (see StreamFormat.md), as previously mentioned, the block ends after the literal values of the last command, without a match offset or match length.
 However, in a raw LZSA2 block, the last command does include a 9-bit match offset (set to zero, to be ignored) and a EOD marker as the match length. The EOD match length marker is encoded as such: the 'M' bits in the token form the value 7, then a nibble with the value of 15 is present, then a single extra match length byte with the value of 232, indicating the end of the block. This allows the EOD test to exist in a rarely used code branch.
 The EOD condition can be easily checked as part of the tri-state condition when handling long matches. When 24 is added to the match byte value:
 - If the byte doesn't overflow, the final match length is ready
 - If the byte overflows and equals zero, the EOD marker has been hit
 - Otherwise, if the overflows and doesn't equal zero, a 16-bit match length must be read.
 This tri-state test translates to only an addition and two branches on 8-bit CPUs.
 The equivalent EOD condition in literal lengths (which would be byte 238, that would overflow to exactly 0 when adding 18) is never emitted, so for size-optimized decompressors, the same code can be used to read both types of lengths.
 # Reading nibbles
 When the specification indicates that a nibble (4 bit value) must be read:
 * If there are no nibbles ready, read a byte immediately. Return the high 4 bits (bits 4-7) as the nibble and store the low 4 bits for later. Flag that a nibble is ready for next time.
 * If a nibble is ready, return the previously stored low 4 bits (bits 0-3) and flag that no nibble is ready for next time.
--- a/Tools/unix/lzsa/LICENSE
+++ b/Tools/unix/lzsa/LICENSE
@ -0,0 +1,3 @@
 The LZSA code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license.
 Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information.
--- a/Tools/unix/lzsa/LICENSE.cc0.md
+++ b/Tools/unix/lzsa/LICENSE.cc0.md
@ -0,0 +1,43 @@
 ## creative commons
 # CC0 1.0 Universal
 CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER.
 ### Statement of Purpose
 The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work").
 Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others.
 For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights.
 1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following:
    i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work;
    ii. moral rights retained by the original author(s) and/or performer(s);
    iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work;
    iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below;
    v. rights protecting the extraction, dissemination, use and reuse of data in a Work;
    vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and
    vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof.
 2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose.
 3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose.
 4. __Limitations and Disclaimers.__
    a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document.
    b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law.
    c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work.
    d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work.
--- a/Tools/unix/lzsa/LICENSE.zlib.md
+++ b/Tools/unix/lzsa/LICENSE.zlib.md
@ -0,0 +1,19 @@
 Copyright (c) 2019 Emmanuel Marty
 This software is provided 'as-is', without any express or implied warranty. In
 no event will the authors be held liable for any damages arising from the use of
 this software.
 Permission is granted to anyone to use this software for any purpose, including
 commercial applications, and to alter it and redistribute it freely, subject to
 the following restrictions:
 1.  The origin of this software must not be misrepresented; you must not claim
    that you wrote the original software. If you use this software in a product,
    an acknowledgment in the product documentation would be appreciated but is
    not required.
 2.  Altered source versions must be plainly marked as such, and must not be
    misrepresented as being the original software.
 3.  This notice may not be removed or altered from any source distribution.
--- a/Tools/unix/lzsa/Makefile
+++ b/Tools/unix/lzsa/Makefile
@ -0,0 +1,45 @@
 CC=gcc
 CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
 OBJDIR=obj
 LDFLAGS=
 STRIP=strip
 $(OBJDIR)/%.o: src/../%.c
 	@mkdir -p '$(@D)'
 	$(CC) $(CFLAGS) -c $< -o $@
 APP := lzsa
 OBJS += $(OBJDIR)/src/lzsa.o
 OBJS += $(OBJDIR)/src/dictionary.o
 OBJS += $(OBJDIR)/src/expand_block_v1.o
 OBJS += $(OBJDIR)/src/expand_block_v2.o
 OBJS += $(OBJDIR)/src/expand_context.o
 OBJS += $(OBJDIR)/src/expand_inmem.o
 OBJS += $(OBJDIR)/src/expand_streaming.o
 OBJS += $(OBJDIR)/src/frame.o
 OBJS += $(OBJDIR)/src/matchfinder.o
 OBJS += $(OBJDIR)/src/shrink_block_v1.o
 OBJS += $(OBJDIR)/src/shrink_block_v2.o
 OBJS += $(OBJDIR)/src/shrink_context.o
 OBJS += $(OBJDIR)/src/shrink_inmem.o
 OBJS += $(OBJDIR)/src/shrink_streaming.o
 OBJS += $(OBJDIR)/src/stream.o
 OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
 OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort_utils.o
 OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
 OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
 UNAME := $(shell uname)
 all: $(APP)
 	cp $(APP) ../../$(UNAME)
 $(APP): $(OBJS)
 	@mkdir -p ../../bin/posix
 	$(CC) $^ $(LDFLAGS) -o $(APP)
 	$(STRIP) $(APP)
 clean:
 	@rm -rf $(APP) $(OBJDIR)
--- a/Tools/unix/lzsa/README.md
+++ b/Tools/unix/lzsa/README.md
@ -0,0 +1,84 @@
 LZSA is a collection of byte-aligned compression formats that are specifically engineered for very fast decompression on 8-bit systems. It can compress files of any size by using blocks of a maximum size of 64 Kb with block-interdependent compression and up to 64 Kb of back-references for matches.
 ![Pareto frontier](pareto_graph.png)
 <sup>*ZX Spectrum</sup>
 Check out [The Hollow](https://www.pouet.net/prod.php?which=81909) by Darklite and Offense, winner of the Solskogen 2019 wild compo, that uses LZSA on Z80.
 [Gabba](https://www.pouet.net/prod.php?which=83539) by Stardust ranked 2nd in the ZX Spectrum demo compo at CAFe demoparty 2019 and also used LZSA on Z80. 
 The LZSA compression tool uses an aggressive optimal packing strategy to try to find the sequence of commands that gives the smallest packed file that decompresses to the original while maintaining the maximum possible decompression speed.
 The compression formats give the user choices that range from decompressing faster than LZ4 on 8-bit systems with better compression, to compressing as well as ZX7 with much better decompression speed. LZSA1 is designed to replace LZ4 and LZSA2 to replace ZX7, in 8-bit scenarios.
 Compression ratio comparison between LZSA and other optimal packers, for a workload composed of ZX Spectrum and C64 files:
                         Bytes            Ratio            Decompression speed vs. LZ4
    LZSA2                676681           52,49% <------   75%   
    MegaLZ 4.89          679041           52,68%           Not measured
    ZX7                  687133           53,30%           47,73%
    LZ5 1.4.1            727107           56,40%           75%
    LZSA1                735785           57,08% <------   90%
    Lizard -29           776122           60,21%           Not measured
    LZ4_HC -19 -B4 -BD   781049           60,59%           100%
    Uncompressed         1289127          100%             N/A
 Performance over well-known compression corpus files:
                         Uncompressed     LZ4_HC -19 -B4 -BD    LZSA1                LZSA2
    Canterbury           2810784          935827 (33,29%)       850792 (30,27%)      770877 (27,43%)
    Silesia              211938580        77299725 (36,47%)     73706340 (34,78%)    68928564 (32,52%)
    Calgary              3251493          1248780 (38,40%)      1192123 (36,67%)     1110290 (34,15%)
    Large                11159482         3771025 (33,79%)      3648393 (32,69%)     3519480 (31,54%)
    enwik9               1000000000       371841591 (37,18%)    355360043 (35,54%)   334900611 (33,49%)
 As an example of LZSA1's simplicity, a size-optimized decompressor on Z80 has been implemented in 67 bytes.
 The compressor is approximately 2X slower than LZ4_HC but compresses better while maintaining similar decompression speeds and decompressor simplicity.
 The main differences between LZSA1 and the LZ4 compression format are:
 * The use of short (8-bit) match offsets where possible. The match-finder and optimizer cooperate to try and use the shortest match offsets possible.
 * Shorter encoding of lengths. As blocks are maximum 64 Kb in size, lengths can only be up to 64 Kb.
 * As a result of the smaller commands due to the possibly shorter match offsets, a minimum match size of 3 bytes instead of 4. The use of small matches is driven by the optimizer, and used where they provide gains.
 As for LZSA2:
 * 5-bit, 9-bit, 13-bit and 16-bit match offsets, using nibble encoding
 * Rep-matches
 * Shorter encoding of lengths, also using nibbles
 * A minmatch of 2 bytes
 * No (slow) bit-packing. LZSA2 uses byte alignment in the hot path, and nibbles.
 Inspirations:
 * [LZ4](https://github.com/lz4/lz4) by Yann Collet.
 * [LZ5/Lizard](https://github.com/inikep/lizard) by Przemyslaw Skibinski and Yann Collet.
 * The suffix array intervals in [Wimlib](https://wimlib.net/git/?p=wimlib;a=tree) by Eric Biggers.
 * ZX7 by Einar Saukas
 * [apc](https://github.com/svendahl/cap) by Sven-Åke Dahl
 * [Charles Bloom](http://cbloomrants.blogspot.com/)'s compression blog
 License:
 * The LZSA code is available under the Zlib license.
 * The match finder (matchfinder.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
 8-bit assembly code:
 * Z80 decompressors (size- and speed-optimized) written by [introspec](https://github.com/specke)
 * 6502 and 8088 size-optimized improvements by [Peter Ferrie](https://github.com/peterferrie)
 * 8088 speed-optimized decompressor by [Jim Leonard](https://github.com/mobygamer)
 External links:
 * [i8080 decompressors](https://gitlab.com/ivagor/lzsa8080/tree/master) by Ivan Gorodetsky
 * [PDP-11 decompressors](https://gitlab.com/ivagor/lzsa8080/tree/master/PDP11) also by Ivan Gorodetsky
 * LZSA's page on [Pouet](https://www.pouet.net/prod.php?which=81573)
 # Compressed format
 Decompression code is provided for common 8-bit CPUs such as Z80 and 6502. However, if you would like to write your own, or understand the encoding, LZSA compresses data to a format that is fast and simple to decompress on 8-bit CPUs. It is encoded in either a stream of blocks, or as a single raw block, depending on command-line settings. The encoding is deliberately designed to avoid complicated operations on 8-bits (such as 16-bit math).
 * [Stream format](https://github.com/emmanuel-marty/lzsa/blob/master/StreamFormat.md)
 * [Block encoding for LZSA1](https://github.com/emmanuel-marty/lzsa/blob/master/BlockFormat_LZSA1.md)
 * [Block encoding for LZSA2](https://github.com/emmanuel-marty/lzsa/blob/master/BlockFormat_LZSA2.md)
--- a/Tools/unix/lzsa/StreamFormat.md
+++ b/Tools/unix/lzsa/StreamFormat.md
@ -0,0 +1,39 @@
 # Stream format
 The stream format is composed of:
 * a header
 * one or more frames
 * a footer
 # Header format
 The 3-bytes LZSA header contains a signature and a traits byte:
    0    1                2
    0x7b 0x9e             7 6 5 4 3 2 1
                          V V V Z Z Z Z
    <--- signature --->   <- traits ->
 Trait bits:
 * V: 3 bit code that indicates which block data encoding is used. 0 is LZSA1 and 2 is LZSA2.
 * Z: these bits in the traits are set to 0 for LZSA1 and LZSA2.
 # Frame format
 Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data. The block data is encoded either as LZSA1 or LZSA2 depending on the V bits of the traits byte in the header.
    0    1    2
    DSZ0 DSZ1 U|DSZ2
 * DSZ0 (length byte 0) contains bits 0-7 of the block data size
 * DSZ1 (length byte 1) contains bits 8-15 of the block data size
 * DSZ2 (bit 0 of length byte 2) contains bit 16 of the block data size
 * U (bit 7 of length byte 2) is set if the block data is uncompressed, and clear if the block data is compressed.
 * Bits 1..6 of length byte 2 are currently undefined and must be set to 0.
 # Footer format
 The stream ends with the EOD frame: the 3 length bytes are set to 0x00, 0x00, 0x00, and no block data follows.
--- a/Tools/unix/lzsa/VS2017/lzsa.sln
+++ b/Tools/unix/lzsa/VS2017/lzsa.sln
@ -0,0 +1,31 @@
 Microsoft Visual Studio Solution File, Format Version 12.00
 # Visual Studio 15
 VisualStudioVersion = 15.0.28307.489
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lzsa", "lzsa.vcxproj", "{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|x64 = Debug|x64
 		Debug|x86 = Debug|x86
 		Release|x64 = Release|x64
 		Release|x86 = Release|x86
 	EndGlobalSection
 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x64.ActiveCfg = Debug|x64
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x64.Build.0 = Debug|x64
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x86.ActiveCfg = Debug|Win32
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Debug|x86.Build.0 = Debug|Win32
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x64.ActiveCfg = Release|x64
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x64.Build.0 = Release|x64
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x86.ActiveCfg = Release|Win32
 		{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}.Release|x86.Build.0 = Release|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {A1E1655C-AA9F-41F0-80C9-18DD0B859D7C}
 	EndGlobalSection
 EndGlobal
--- a/Tools/unix/lzsa/VS2017/lzsa.vcxproj
+++ b/Tools/unix/lzsa/VS2017/lzsa.vcxproj
@ -0,0 +1,225 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|Win32">
      <Configuration>Debug</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|Win32">
      <Configuration>Release</Configuration>
      <Platform>Win32</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>15.0</VCProjectVersion>
    <ProjectGuid>{3F30FEE8-63C5-4D39-A175-EDD7EA93E9B8}</ProjectGuid>
    <Keyword>Win32Proj</Keyword>
    <RootNamespace>lzsa</RootNamespace>
    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v141</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v141</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v141</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v141</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LinkIncremental>true</LinkIncremental>
    <OutDir>$(ProjectDir)bin\</OutDir>
    <TargetName>$(ProjectName)_debug</TargetName>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
    <OutDir>$(ProjectDir)bin\</OutDir>
    <TargetName>$(ProjectName)_debug</TargetName>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LinkIncremental>false</LinkIncremental>
    <OutDir>$(ProjectDir)bin\</OutDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
    <OutDir>$(ProjectDir)bin\</OutDir>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <ClCompile>
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <Optimization>Disabled</Optimization>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <PrecompiledHeaderFile>
      </PrecompiledHeaderFile>
      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <Optimization>Disabled</Optimization>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <PrecompiledHeaderFile>
      </PrecompiledHeaderFile>
      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <ClCompile>
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <PrecompiledHeaderFile>
      </PrecompiledHeaderFile>
      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <OmitFramePointers>true</OmitFramePointers>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_CRT_SECURE_NO_WARNINGS;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <PrecompiledHeaderFile>
      </PrecompiledHeaderFile>
      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
      <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
      <AdditionalIncludeDirectories>..\src\libdivsufsort\include;..\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
      <OmitFramePointers>true</OmitFramePointers>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <OutputFile>$(ProjectDir)bin\$(TargetName)$(TargetExt)</OutputFile>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClInclude Include="..\src\dictionary.h" />
    <ClInclude Include="..\src\expand_context.h" />
    <ClInclude Include="..\src\expand_streaming.h" />
    <ClInclude Include="..\src\expand_block_v1.h" />
    <ClInclude Include="..\src\expand_block_v2.h" />
    <ClInclude Include="..\src\format.h" />
    <ClInclude Include="..\src\frame.h" />
    <ClInclude Include="..\src\expand_inmem.h" />
    <ClInclude Include="..\src\lib.h" />
    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h" />
    <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" />
    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h" />
    <ClInclude Include="..\src\matchfinder.h" />
    <ClInclude Include="..\src\shrink_context.h" />
    <ClInclude Include="..\src\shrink_inmem.h" />
    <ClInclude Include="..\src\shrink_streaming.h" />
    <ClInclude Include="..\src\shrink_block_v1.h" />
    <ClInclude Include="..\src\shrink_block_v2.h" />
    <ClInclude Include="..\src\stream.h" />
    <ClInclude Include="pch.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\src\dictionary.c" />
    <ClCompile Include="..\src\expand_context.c" />
    <ClCompile Include="..\src\expand_streaming.c" />
    <ClCompile Include="..\src\expand_block_v1.c" />
    <ClCompile Include="..\src\expand_block_v2.c" />
    <ClCompile Include="..\src\frame.c" />
    <ClCompile Include="..\src\expand_inmem.c" />
    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" />
    <ClCompile Include="..\src\libdivsufsort\lib\sssort.c" />
    <ClCompile Include="..\src\libdivsufsort\lib\trsort.c" />
    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c" />
    <ClCompile Include="..\src\lzsa.c" />
    <ClCompile Include="..\src\matchfinder.c" />
    <ClCompile Include="..\src\shrink_context.c" />
    <ClCompile Include="..\src\shrink_inmem.c" />
    <ClCompile Include="..\src\shrink_streaming.c" />
    <ClCompile Include="..\src\shrink_block_v1.c" />
    <ClCompile Include="..\src\shrink_block_v2.c" />
    <ClCompile Include="..\src\stream.c" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
 </Project>
--- a/Tools/unix/lzsa/VS2017/lzsa.vcxproj.filters
+++ b/Tools/unix/lzsa/VS2017/lzsa.vcxproj.filters
@ -0,0 +1,147 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <Filter Include="Fichiers sources">
      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
    </Filter>
    <Filter Include="Fichiers d%27en-tête">
      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
      <Extensions>h;hh;hpp;hxx;hm;inl;inc;ipp;xsd</Extensions>
    </Filter>
    <Filter Include="Fichiers de ressources">
      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
    </Filter>
    <Filter Include="Fichiers sources\libdivsufsort">
      <UniqueIdentifier>{5ec09c0d-19f7-4a6f-b524-f405fb99e48c}</UniqueIdentifier>
    </Filter>
    <Filter Include="Fichiers sources\libdivsufsort\lib">
      <UniqueIdentifier>{a922f475-1322-496d-8a6d-7f1c6b92423d}</UniqueIdentifier>
    </Filter>
    <Filter Include="Fichiers sources\libdivsufsort\include">
      <UniqueIdentifier>{bd05c6e8-af92-4ab8-8916-0424cd8d186b}</UniqueIdentifier>
    </Filter>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="pch.h">
      <Filter>Fichiers d%27en-tête</Filter>
    </ClInclude>
    <ClInclude Include="..\src\format.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h">
      <Filter>Fichiers sources\libdivsufsort\include</Filter>
    </ClInclude>
    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h">
      <Filter>Fichiers sources\libdivsufsort\include</Filter>
    </ClInclude>
    <ClInclude Include="..\src\frame.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\matchfinder.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\lib.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\stream.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\expand_streaming.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\expand_inmem.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\dictionary.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\shrink_context.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\shrink_streaming.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\expand_context.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\expand_block_v1.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\expand_block_v2.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\shrink_block_v1.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\shrink_block_v2.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\shrink_inmem.h">
      <Filter>Fichiers sources</Filter>
    </ClInclude>
    <ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h">
      <Filter>Fichiers sources\libdivsufsort\include</Filter>
    </ClInclude>
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c">
      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
    </ClCompile>
    <ClCompile Include="..\src\libdivsufsort\lib\sssort.c">
      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
    </ClCompile>
    <ClCompile Include="..\src\libdivsufsort\lib\trsort.c">
      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
    </ClCompile>
    <ClCompile Include="..\src\frame.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\matchfinder.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\lzsa.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\stream.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\expand_streaming.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\expand_inmem.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\dictionary.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\shrink_context.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\shrink_streaming.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\expand_context.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\expand_block_v1.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\expand_block_v2.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\shrink_block_v1.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\shrink_block_v2.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\shrink_inmem.c">
      <Filter>Fichiers sources</Filter>
    </ClCompile>
    <ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c">
      <Filter>Fichiers sources\libdivsufsort\lib</Filter>
    </ClCompile>
  </ItemGroup>
 </Project>
--- a/Tools/unix/lzsa/VS2017/lzsa.vcxproj.user
+++ b/Tools/unix/lzsa/VS2017/lzsa.vcxproj.user
@ -0,0 +1,27 @@
 <?xml version="1.0" encoding="utf-8"?>
 <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
    <LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
    <LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
    <LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
    <LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
    <LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
    <LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
    <LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
    <LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LocalDebuggerCommand>$(TargetPath)</LocalDebuggerCommand>
    <LocalDebuggerCommandArguments>-f2 -c -v corpus/zxspectrum/graphics/bfox-dont_go_away_(2010).mg1 bfox.lzs</LocalDebuggerCommandArguments>
    <DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
    <LocalDebuggerWorkingDirectory>$(ProjectDir)..\</LocalDebuggerWorkingDirectory>
  </PropertyGroup>
 </Project>
--- a/Tools/unix/lzsa/Xcode/lzsa.xcodeproj/project.pbxproj
+++ b/Tools/unix/lzsa/Xcode/lzsa.xcodeproj/project.pbxproj
@ -0,0 +1,429 @@
 // !$*UTF8*$!
 {
 	archiveVersion = 1;
 	classes = {
 	};
 	objectVersion = 50;
 	objects = {
 /* Begin PBXBuildFile section */
 		0CADC63122AAD8EB003E9821 /* shrink_inmem.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5EE22AAD8EA003E9821 /* shrink_inmem.c */; };
 		0CADC63222AAD8EB003E9821 /* frame.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5F322AAD8EB003E9821 /* frame.c */; };
 		0CADC63322AAD8EB003E9821 /* matchfinder.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5F422AAD8EB003E9821 /* matchfinder.c */; };
 		0CADC63422AAD8EB003E9821 /* shrink_block_v1.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC5FA22AAD8EB003E9821 /* shrink_block_v1.c */; };
 		0CADC63A22AAD8EB003E9821 /* trsort.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC61622AAD8EB003E9821 /* trsort.c */; };
 		0CADC63B22AAD8EB003E9821 /* divsufsort.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC61722AAD8EB003E9821 /* divsufsort.c */; };
 		0CADC63D22AAD8EB003E9821 /* sssort.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC61922AAD8EB003E9821 /* sssort.c */; };
 		0CADC63E22AAD8EB003E9821 /* expand_block_v1.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62122AAD8EB003E9821 /* expand_block_v1.c */; };
 		0CADC63F22AAD8EB003E9821 /* lzsa.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62222AAD8EB003E9821 /* lzsa.c */; };
 		0CADC64022AAD8EB003E9821 /* shrink_streaming.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62322AAD8EB003E9821 /* shrink_streaming.c */; };
 		0CADC64122AAD8EB003E9821 /* expand_inmem.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62522AAD8EB003E9821 /* expand_inmem.c */; };
 		0CADC64222AAD8EB003E9821 /* stream.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62922AAD8EB003E9821 /* stream.c */; };
 		0CADC64322AAD8EB003E9821 /* expand_block_v2.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62A22AAD8EB003E9821 /* expand_block_v2.c */; };
 		0CADC64422AAD8EB003E9821 /* shrink_context.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62B22AAD8EB003E9821 /* shrink_context.c */; };
 		0CADC64522AAD8EB003E9821 /* expand_streaming.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62D22AAD8EB003E9821 /* expand_streaming.c */; };
 		0CADC64622AAD8EB003E9821 /* dictionary.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62E22AAD8EB003E9821 /* dictionary.c */; };
 		0CADC64722AAD8EB003E9821 /* expand_context.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62F22AAD8EB003E9821 /* expand_context.c */; };
 		0CADC64822AAD8EB003E9821 /* shrink_block_v2.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */; };
 		0CADC64A22AB8DAD003E9821 /* divsufsort_utils.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */; };
 /* End PBXBuildFile section */
 /* Begin PBXCopyFilesBuildPhase section */
 		0CADC57622A65EA4003E9821 /* CopyFiles */ = {
 			isa = PBXCopyFilesBuildPhase;
 			buildActionMask = 2147483647;
 			dstPath = /usr/share/man/man1/;
 			dstSubfolderSpec = 0;
 			files = (
 			);
 			runOnlyForDeploymentPostprocessing = 1;
 		};
 /* End PBXCopyFilesBuildPhase section */
 /* Begin PBXFileReference section */
 		0CADC57822A65EA5003E9821 /* lzsa */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = lzsa; sourceTree = BUILT_PRODUCTS_DIR; };
 		0CADC5ED22AAD8EA003E9821 /* expand_streaming.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_streaming.h; path = ../../src/expand_streaming.h; sourceTree = "<group>"; };
 		0CADC5EE22AAD8EA003E9821 /* shrink_inmem.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_inmem.c; path = ../../src/shrink_inmem.c; sourceTree = "<group>"; };
 		0CADC5EF22AAD8EB003E9821 /* stream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = stream.h; path = ../../src/stream.h; sourceTree = "<group>"; };
 		0CADC5F022AAD8EB003E9821 /* expand_block_v1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_block_v1.h; path = ../../src/expand_block_v1.h; sourceTree = "<group>"; };
 		0CADC5F122AAD8EB003E9821 /* shrink_block_v1.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_block_v1.h; path = ../../src/shrink_block_v1.h; sourceTree = "<group>"; };
 		0CADC5F222AAD8EB003E9821 /* lib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = lib.h; path = ../../src/lib.h; sourceTree = "<group>"; };
 		0CADC5F322AAD8EB003E9821 /* frame.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = frame.c; path = ../../src/frame.c; sourceTree = "<group>"; };
 		0CADC5F422AAD8EB003E9821 /* matchfinder.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = matchfinder.c; path = ../../src/matchfinder.c; sourceTree = "<group>"; };
 		0CADC5F522AAD8EB003E9821 /* matchfinder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = matchfinder.h; path = ../../src/matchfinder.h; sourceTree = "<group>"; };
 		0CADC5F622AAD8EB003E9821 /* dictionary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = dictionary.h; path = ../../src/dictionary.h; sourceTree = "<group>"; };
 		0CADC5F722AAD8EB003E9821 /* shrink_context.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_context.h; path = ../../src/shrink_context.h; sourceTree = "<group>"; };
 		0CADC5F822AAD8EB003E9821 /* shrink_inmem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_inmem.h; path = ../../src/shrink_inmem.h; sourceTree = "<group>"; };
 		0CADC5F922AAD8EB003E9821 /* expand_block_v2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_block_v2.h; path = ../../src/expand_block_v2.h; sourceTree = "<group>"; };
 		0CADC5FA22AAD8EB003E9821 /* shrink_block_v1.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_block_v1.c; path = ../../src/shrink_block_v1.c; sourceTree = "<group>"; };
 		0CADC5FB22AAD8EB003E9821 /* expand_context.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_context.h; path = ../../src/expand_context.h; sourceTree = "<group>"; };
 		0CADC60922AAD8EB003E9821 /* divsufsort_private.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_private.h; sourceTree = "<group>"; };
 		0CADC60A22AAD8EB003E9821 /* divsufsort.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort.h; sourceTree = "<group>"; };
 		0CADC61622AAD8EB003E9821 /* trsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = trsort.c; sourceTree = "<group>"; };
 		0CADC61722AAD8EB003E9821 /* divsufsort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort.c; sourceTree = "<group>"; };
 		0CADC61922AAD8EB003E9821 /* sssort.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = sssort.c; sourceTree = "<group>"; };
 		0CADC62122AAD8EB003E9821 /* expand_block_v1.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_block_v1.c; path = ../../src/expand_block_v1.c; sourceTree = "<group>"; };
 		0CADC62222AAD8EB003E9821 /* lzsa.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = lzsa.c; path = ../../src/lzsa.c; sourceTree = "<group>"; };
 		0CADC62322AAD8EB003E9821 /* shrink_streaming.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_streaming.c; path = ../../src/shrink_streaming.c; sourceTree = "<group>"; };
 		0CADC62422AAD8EB003E9821 /* format.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = format.h; path = ../../src/format.h; sourceTree = "<group>"; };
 		0CADC62522AAD8EB003E9821 /* expand_inmem.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_inmem.c; path = ../../src/expand_inmem.c; sourceTree = "<group>"; };
 		0CADC62622AAD8EB003E9821 /* shrink_block_v2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_block_v2.h; path = ../../src/shrink_block_v2.h; sourceTree = "<group>"; };
 		0CADC62722AAD8EB003E9821 /* expand_inmem.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_inmem.h; path = ../../src/expand_inmem.h; sourceTree = "<group>"; };
 		0CADC62822AAD8EB003E9821 /* shrink_streaming.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = shrink_streaming.h; path = ../../src/shrink_streaming.h; sourceTree = "<group>"; };
 		0CADC62922AAD8EB003E9821 /* stream.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = stream.c; path = ../../src/stream.c; sourceTree = "<group>"; };
 		0CADC62A22AAD8EB003E9821 /* expand_block_v2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_block_v2.c; path = ../../src/expand_block_v2.c; sourceTree = "<group>"; };
 		0CADC62B22AAD8EB003E9821 /* shrink_context.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_context.c; path = ../../src/shrink_context.c; sourceTree = "<group>"; };
 		0CADC62C22AAD8EB003E9821 /* frame.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = frame.h; path = ../../src/frame.h; sourceTree = "<group>"; };
 		0CADC62D22AAD8EB003E9821 /* expand_streaming.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_streaming.c; path = ../../src/expand_streaming.c; sourceTree = "<group>"; };
 		0CADC62E22AAD8EB003E9821 /* dictionary.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = dictionary.c; path = ../../src/dictionary.c; sourceTree = "<group>"; };
 		0CADC62F22AAD8EB003E9821 /* expand_context.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = expand_context.c; path = ../../src/expand_context.c; sourceTree = "<group>"; };
 		0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_block_v2.c; path = ../../src/shrink_block_v2.c; sourceTree = "<group>"; };
 		0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort_utils.c; sourceTree = "<group>"; };
 		0CADC64B22AB8DC3003E9821 /* divsufsort_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_config.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 /* Begin PBXFrameworksBuildPhase section */
 		0CADC57522A65EA4003E9821 /* Frameworks */ = {
 			isa = PBXFrameworksBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
 /* End PBXFrameworksBuildPhase section */
 /* Begin PBXGroup section */
 		0CADC56F22A65EA4003E9821 = {
 			isa = PBXGroup;
 			children = (
 				0CADC57A22A65EA5003E9821 /* lzsa */,
 				0CADC57922A65EA5003E9821 /* Products */,
 			);
 			sourceTree = "<group>";
 		};
 		0CADC57922A65EA5003E9821 /* Products */ = {
 			isa = PBXGroup;
 			children = (
 				0CADC57822A65EA5003E9821 /* lzsa */,
 			);
 			name = Products;
 			sourceTree = "<group>";
 		};
 		0CADC57A22A65EA5003E9821 /* lzsa */ = {
 			isa = PBXGroup;
 			children = (
 				0CADC62E22AAD8EB003E9821 /* dictionary.c */,
 				0CADC5F622AAD8EB003E9821 /* dictionary.h */,
 				0CADC62122AAD8EB003E9821 /* expand_block_v1.c */,
 				0CADC5F022AAD8EB003E9821 /* expand_block_v1.h */,
 				0CADC62A22AAD8EB003E9821 /* expand_block_v2.c */,
 				0CADC5F922AAD8EB003E9821 /* expand_block_v2.h */,
 				0CADC62F22AAD8EB003E9821 /* expand_context.c */,
 				0CADC5FB22AAD8EB003E9821 /* expand_context.h */,
 				0CADC62522AAD8EB003E9821 /* expand_inmem.c */,
 				0CADC62722AAD8EB003E9821 /* expand_inmem.h */,
 				0CADC62D22AAD8EB003E9821 /* expand_streaming.c */,
 				0CADC5ED22AAD8EA003E9821 /* expand_streaming.h */,
 				0CADC62422AAD8EB003E9821 /* format.h */,
 				0CADC5F322AAD8EB003E9821 /* frame.c */,
 				0CADC62C22AAD8EB003E9821 /* frame.h */,
 				0CADC5F222AAD8EB003E9821 /* lib.h */,
 				0CADC5FC22AAD8EB003E9821 /* libdivsufsort */,
 				0CADC62222AAD8EB003E9821 /* lzsa.c */,
 				0CADC5F422AAD8EB003E9821 /* matchfinder.c */,
 				0CADC5F522AAD8EB003E9821 /* matchfinder.h */,
 				0CADC5FA22AAD8EB003E9821 /* shrink_block_v1.c */,
 				0CADC5F122AAD8EB003E9821 /* shrink_block_v1.h */,
 				0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */,
 				0CADC62622AAD8EB003E9821 /* shrink_block_v2.h */,
 				0CADC62B22AAD8EB003E9821 /* shrink_context.c */,
 				0CADC5F722AAD8EB003E9821 /* shrink_context.h */,
 				0CADC5EE22AAD8EA003E9821 /* shrink_inmem.c */,
 				0CADC5F822AAD8EB003E9821 /* shrink_inmem.h */,
 				0CADC62322AAD8EB003E9821 /* shrink_streaming.c */,
 				0CADC62822AAD8EB003E9821 /* shrink_streaming.h */,
 				0CADC62922AAD8EB003E9821 /* stream.c */,
 				0CADC5EF22AAD8EB003E9821 /* stream.h */,
 			);
 			path = lzsa;
 			sourceTree = "<group>";
 		};
 		0CADC5FC22AAD8EB003E9821 /* libdivsufsort */ = {
 			isa = PBXGroup;
 			children = (
 				0CADC60322AAD8EB003E9821 /* include */,
 				0CADC61422AAD8EB003E9821 /* lib */,
 			);
 			name = libdivsufsort;
 			path = ../../src/libdivsufsort;
 			sourceTree = "<group>";
 		};
 		0CADC60322AAD8EB003E9821 /* include */ = {
 			isa = PBXGroup;
 			children = (
 				0CADC64B22AB8DC3003E9821 /* divsufsort_config.h */,
 				0CADC60922AAD8EB003E9821 /* divsufsort_private.h */,
 				0CADC60A22AAD8EB003E9821 /* divsufsort.h */,
 			);
 			path = include;
 			sourceTree = "<group>";
 		};
 		0CADC61422AAD8EB003E9821 /* lib */ = {
 			isa = PBXGroup;
 			children = (
 				0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */,
 				0CADC61622AAD8EB003E9821 /* trsort.c */,
 				0CADC61722AAD8EB003E9821 /* divsufsort.c */,
 				0CADC61922AAD8EB003E9821 /* sssort.c */,
 			);
 			path = lib;
 			sourceTree = "<group>";
 		};
 /* End PBXGroup section */
 /* Begin PBXNativeTarget section */
 		0CADC57722A65EA4003E9821 /* lzsa */ = {
 			isa = PBXNativeTarget;
 			buildConfigurationList = 0CADC57F22A65EA5003E9821 /* Build configuration list for PBXNativeTarget "lzsa" */;
 			buildPhases = (
 				0CADC57422A65EA4003E9821 /* Sources */,
 				0CADC57522A65EA4003E9821 /* Frameworks */,
 				0CADC57622A65EA4003E9821 /* CopyFiles */,
 			);
 			buildRules = (
 			);
 			dependencies = (
 			);
 			name = lzsa;
 			productName = lzsa;
 			productReference = 0CADC57822A65EA5003E9821 /* lzsa */;
 			productType = "com.apple.product-type.tool";
 		};
 /* End PBXNativeTarget section */
 /* Begin PBXProject section */
 		0CADC57022A65EA4003E9821 /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
 				LastUpgradeCheck = 1020;
 				ORGANIZATIONNAME = Emmanuel;
 				TargetAttributes = {
 					0CADC57722A65EA4003E9821 = {
 						CreatedOnToolsVersion = 10.2.1;
 					};
 				};
 			};
 			buildConfigurationList = 0CADC57322A65EA4003E9821 /* Build configuration list for PBXProject "lzsa" */;
 			compatibilityVersion = "Xcode 9.3";
 			developmentRegion = en;
 			hasScannedForEncodings = 0;
 			knownRegions = (
 				en,
 			);
 			mainGroup = 0CADC56F22A65EA4003E9821;
 			productRefGroup = 0CADC57922A65EA5003E9821 /* Products */;
 			projectDirPath = "";
 			projectRoot = "";
 			targets = (
 				0CADC57722A65EA4003E9821 /* lzsa */,
 			);
 		};
 /* End PBXProject section */
 /* Begin PBXSourcesBuildPhase section */
 		0CADC57422A65EA4003E9821 /* Sources */ = {
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
 				0CADC64822AAD8EB003E9821 /* shrink_block_v2.c in Sources */,
 				0CADC63D22AAD8EB003E9821 /* sssort.c in Sources */,
 				0CADC64322AAD8EB003E9821 /* expand_block_v2.c in Sources */,
 				0CADC63F22AAD8EB003E9821 /* lzsa.c in Sources */,
 				0CADC64422AAD8EB003E9821 /* shrink_context.c in Sources */,
 				0CADC64522AAD8EB003E9821 /* expand_streaming.c in Sources */,
 				0CADC63E22AAD8EB003E9821 /* expand_block_v1.c in Sources */,
 				0CADC63122AAD8EB003E9821 /* shrink_inmem.c in Sources */,
 				0CADC63B22AAD8EB003E9821 /* divsufsort.c in Sources */,
 				0CADC64622AAD8EB003E9821 /* dictionary.c in Sources */,
 				0CADC63422AAD8EB003E9821 /* shrink_block_v1.c in Sources */,
 				0CADC64A22AB8DAD003E9821 /* divsufsort_utils.c in Sources */,
 				0CADC64222AAD8EB003E9821 /* stream.c in Sources */,
 				0CADC64022AAD8EB003E9821 /* shrink_streaming.c in Sources */,
 				0CADC63A22AAD8EB003E9821 /* trsort.c in Sources */,
 				0CADC64122AAD8EB003E9821 /* expand_inmem.c in Sources */,
 				0CADC63322AAD8EB003E9821 /* matchfinder.c in Sources */,
 				0CADC64722AAD8EB003E9821 /* expand_context.c in Sources */,
 				0CADC63222AAD8EB003E9821 /* frame.c in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
 /* End PBXSourcesBuildPhase section */
 /* Begin XCBuildConfiguration section */
 		0CADC57D22A65EA5003E9821 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
 				CLANG_ENABLE_OBJC_WEAK = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
 				CLANG_WARN_COMMA = YES;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
 				CLANG_WARN_STRICT_PROTOTYPES = YES;
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = dwarf;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = c99;
 				GCC_DYNAMIC_NO_PIC = NO;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 0;
 				GCC_PREPROCESSOR_DEFINITIONS = (
 					"DEBUG=1",
 					"$(inherited)",
 				);
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
 					../src/libdivsufsort/include,
 					../src/xxhash,
 					../src,
 				);
 				LLVM_LTO = YES;
 				MACOSX_DEPLOYMENT_TARGET = 10.8;
 				MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
 				MTL_FAST_MATH = YES;
 				ONLY_ACTIVE_ARCH = YES;
 				OTHER_CFLAGS = "";
 				SDKROOT = macosx;
 			};
 			name = Debug;
 		};
 		0CADC57E22A65EA5003E9821 /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				ALWAYS_SEARCH_USER_PATHS = NO;
 				CLANG_ANALYZER_NONNULL = YES;
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
 				CLANG_CXX_LIBRARY = "libc++";
 				CLANG_ENABLE_MODULES = YES;
 				CLANG_ENABLE_OBJC_ARC = YES;
 				CLANG_ENABLE_OBJC_WEAK = YES;
 				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
 				CLANG_WARN_BOOL_CONVERSION = YES;
 				CLANG_WARN_COMMA = YES;
 				CLANG_WARN_CONSTANT_CONVERSION = YES;
 				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
 				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_EMPTY_BODY = YES;
 				CLANG_WARN_ENUM_CONVERSION = YES;
 				CLANG_WARN_INFINITE_RECURSION = YES;
 				CLANG_WARN_INT_CONVERSION = YES;
 				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
 				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
 				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
 				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
 				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
 				CLANG_WARN_STRICT_PROTOTYPES = YES;
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
 				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = c99;
 				GCC_NO_COMMON_BLOCKS = YES;
 				GCC_OPTIMIZATION_LEVEL = 3;
 				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
 				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
 				GCC_WARN_UNDECLARED_SELECTOR = YES;
 				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
 				GCC_WARN_UNUSED_FUNCTION = YES;
 				GCC_WARN_UNUSED_VARIABLE = YES;
 				HEADER_SEARCH_PATHS = (
 					../src/libdivsufsort/include,
 					../src/xxhash,
 					../src,
 				);
 				LLVM_LTO = YES;
 				MACOSX_DEPLOYMENT_TARGET = 10.8;
 				MTL_ENABLE_DEBUG_INFO = NO;
 				MTL_FAST_MATH = YES;
 				OTHER_CFLAGS = "";
 				SDKROOT = macosx;
 			};
 			name = Release;
 		};
 		0CADC58022A65EA5003E9821 /* Debug */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Debug;
 		};
 		0CADC58122A65EA5003E9821 /* Release */ = {
 			isa = XCBuildConfiguration;
 			buildSettings = {
 				CODE_SIGN_STYLE = Automatic;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 			};
 			name = Release;
 		};
 /* End XCBuildConfiguration section */
 /* Begin XCConfigurationList section */
 		0CADC57322A65EA4003E9821 /* Build configuration list for PBXProject "lzsa" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				0CADC57D22A65EA5003E9821 /* Debug */,
 				0CADC57E22A65EA5003E9821 /* Release */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
 		0CADC57F22A65EA5003E9821 /* Build configuration list for PBXNativeTarget "lzsa" */ = {
 			isa = XCConfigurationList;
 			buildConfigurations = (
 				0CADC58022A65EA5003E9821 /* Debug */,
 				0CADC58122A65EA5003E9821 /* Release */,
 			);
 			defaultConfigurationIsVisible = 0;
 			defaultConfigurationName = Release;
 		};
 /* End XCConfigurationList section */
 	};
 	rootObject = 0CADC57022A65EA4003E9821 /* Project object */;
 }
--- a/Tools/unix/lzsa/asm/6502/decompress_fast_v1.asm
+++ b/Tools/unix/lzsa/asm/6502/decompress_fast_v1.asm
@ -0,0 +1,305 @@
 ; -----------------------------------------------------------------------------
 ; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
 ;
 ; in:
 ; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
 ; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
 ;
 ; out:
 ; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
 ;
 ; -----------------------------------------------------------------------------
 ; Backward decompression is also supported, use lzsa -r -b <original_file> <compressed_file>
 ; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
 ;
 ; in:
 ; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
 ; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
 ;
 ; out:
 ; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
 ;
 ; -----------------------------------------------------------------------------
 ;
 ;  Copyright (C) 2019 Emmanuel Marty, Peter Ferrie
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ; -----------------------------------------------------------------------------
 DECOMPRESS_LZSA1_FAST
   LDY #$00
 DECODE_TOKEN
   JSR GETSRC                           ; read token byte: O|LLL|MMMM
   PHA                                  ; preserve token on stack
   AND #$70                             ; isolate literals count
   BEQ NO_LITERALS                      ; skip if no literals to copy
   CMP #$70                             ; LITERALS_RUN_LEN?
   BNE PREPARE_COPY_LITERALS            ; if not, count is directly embedded in token
   JSR GETSRC                           ; get extra byte of variable literals count
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$F9                             ; (LITERALS_RUN_LEN)
   BCC PREPARE_COPY_LITERALS_DIRECT
   BEQ LARGE_VARLEN_LITERALS            ; if adding up to zero, go grab 16-bit count
   JSR GETSRC                           ; get single extended byte of variable literals count
   INY                                  ; add 256 to literals count
   BCS PREPARE_COPY_LITERALS_DIRECT     ; (*like JMP PREPARE_COPY_LITERALS_DIRECT but shorter)
 LARGE_VARLEN_LITERALS                   ; handle 16 bits literals count
                                        ; literals count = directly these 16 bits
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
   TXA
   JMP PREPARE_COPY_LARGE_LITERALS
 PREPARE_COPY_LITERALS
   TAX
   LDA SHIFT_TABLE-1,X                  ; shift literals length into place
                                        ; -1 because position 00 is reserved
 PREPARE_COPY_LITERALS_DIRECT
   TAX
 PREPARE_COPY_LARGE_LITERALS
   BEQ COPY_LITERALS
   INY
 COPY_LITERALS
   JSR GETPUT                           ; copy one byte of literals
   DEX
   BNE COPY_LITERALS
   DEY
   BNE COPY_LITERALS
 NO_LITERALS
   PLA                                  ; retrieve token from stack
   PHA                                  ; preserve token again
   BMI GET_LONG_OFFSET                  ; $80: 16 bit offset
   JSR GETSRC                           ; get 8 bit offset from stream in A
   TAX                                  ; save for later
   LDA #$FF                             ; high 8 bits
   BNE GOT_OFFSET                       ; go prepare match
                                        ; (*like JMP GOT_OFFSET but shorter)
 SHORT_VARLEN_MATCHLEN
   JSR GETSRC                           ; get single extended byte of variable match len
   INY                                  ; add 256 to match length
 PREPARE_COPY_MATCH
   TAX
 PREPARE_COPY_MATCH_Y
   TXA
   BEQ COPY_MATCH_LOOP
   INY
 COPY_MATCH_LOOP
   LDA $AAAA                            ; get one byte of backreference
   JSR PUTDST                           ; copy to destination
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- put backreference bytes backward
   LDA COPY_MATCH_LOOP+1
   BEQ GETMATCH_ADJ_HI
 GETMATCH_DONE
   DEC COPY_MATCH_LOOP+1
 } else {
   ; Forward decompression -- put backreference bytes forward
   INC COPY_MATCH_LOOP+1
   BEQ GETMATCH_ADJ_HI
 GETMATCH_DONE
 }
   DEX
   BNE COPY_MATCH_LOOP
   DEY
   BNE COPY_MATCH_LOOP
   BEQ DECODE_TOKEN                     ; (*like JMP DECODE_TOKEN but shorter)
 !ifdef BACKWARD_DECOMPRESS {
 GETMATCH_ADJ_HI
   DEC COPY_MATCH_LOOP+2
   JMP GETMATCH_DONE
 } else {
 GETMATCH_ADJ_HI
   INC COPY_MATCH_LOOP+2
   JMP GETMATCH_DONE
 }
 GET_LONG_OFFSET                         ; handle 16 bit offset:
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
 GOT_OFFSET
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression - substract match offset
   STA OFFSHI                           ; store high 8 bits of offset
   STX OFFSLO
   SEC                                  ; substract dest - match offset
   LDA PUTDST+1
 OFFSLO = *+1
   SBC #$AA                             ; low 8 bits
   STA COPY_MATCH_LOOP+1                ; store back reference address
   LDA PUTDST+2
 OFFSHI = *+1
   SBC #$AA                             ; high 8 bits
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
   SEC
 } else {
   ; Forward decompression - add match offset
   STA OFFSHI                           ; store high 8 bits of offset
   TXA
   CLC                                  ; add dest + match offset
   ADC PUTDST+1                         ; low 8 bits
   STA COPY_MATCH_LOOP+1                ; store back reference address
 OFFSHI = *+1
   LDA #$AA                             ; high 8 bits
   ADC PUTDST+2
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
 }
   PLA                                  ; retrieve token from stack again
   AND #$0F                             ; isolate match len (MMMM)
   ADC #$02                             ; plus carry which is always set by the high ADC
   CMP #$12                             ; MATCH_RUN_LEN?
   BCC PREPARE_COPY_MATCH               ; if not, count is directly embedded in token
   JSR GETSRC                           ; get extra byte of variable match length
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$EE                             ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
   BCC PREPARE_COPY_MATCH
   BNE SHORT_VARLEN_MATCHLEN
                                        ; Handle 16 bits match length
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
                                        ; large match length with zero high byte?
   BNE PREPARE_COPY_MATCH_Y             ; if not, continue
 DECOMPRESSION_DONE
   RTS
 SHIFT_TABLE
   !BYTE     $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
   !BYTE $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01
   !BYTE $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02
   !BYTE $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03
   !BYTE $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04
   !BYTE $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05
   !BYTE $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06
   !BYTE $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- get and put bytes backward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   LDA PUTDST+1
   BEQ PUTDST_ADJ_HI
   DEC PUTDST+1
   RTS
 PUTDST_ADJ_HI
   DEC PUTDST+2
   DEC PUTDST+1
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   PHA
   LDA GETSRC+1
   BEQ GETSRC_ADJ_HI
   DEC GETSRC+1
   PLA
   RTS
 GETSRC_ADJ_HI
   DEC GETSRC+2
   DEC GETSRC+1
   PLA
   RTS
 } else {
   ; Forward decompression -- get and put bytes forward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   INC PUTDST+1
   BEQ PUTDST_ADJ_HI
   RTS
 PUTDST_ADJ_HI
   INC PUTDST+2
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   INC GETSRC+1
   BEQ GETSRC_ADJ_HI
   RTS
 GETSRC_ADJ_HI
   INC GETSRC+2
   RTS
 }
--- a/Tools/unix/lzsa/asm/6502/decompress_fast_v2.asm
+++ b/Tools/unix/lzsa/asm/6502/decompress_fast_v2.asm
@ -0,0 +1,363 @@
 ; -----------------------------------------------------------------------------
 ; Decompress raw LZSA2 block.
 ; Create one with lzsa -r -f2 <original_file> <compressed_file>
 ;
 ; in:
 ; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
 ; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
 ;
 ; out:
 ; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
 ;
 ; -----------------------------------------------------------------------------
 ; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
 ; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
 ;
 ; in:
 ; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
 ; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
 ;
 ; out:
 ; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
 ;
 ; -----------------------------------------------------------------------------
 ;
 ;  Copyright (C) 2019 Emmanuel Marty, Peter Ferrie
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ; -----------------------------------------------------------------------------
 NIBCOUNT = $FC                          ; zero-page location for temp offset
 DECOMPRESS_LZSA2_FAST
   LDY #$00
   STY NIBCOUNT
 DECODE_TOKEN
   JSR GETSRC                           ; read token byte: XYZ|LL|MMM
   PHA                                  ; preserve token on stack
   AND #$18                             ; isolate literals count (LL)
   BEQ NO_LITERALS                      ; skip if no literals to copy
   CMP #$18                             ; LITERALS_RUN_LEN_V2?
   BCC PREPARE_COPY_LITERALS            ; if less, count is directly embedded in token
   JSR GETNIBBLE                        ; get extra literals length nibble
                                        ; add nibble to len from token
   ADC #$02                             ; (LITERALS_RUN_LEN_V2) minus carry
   CMP #$12                             ; LITERALS_RUN_LEN_V2 + 15 ?
   BCC PREPARE_COPY_LITERALS_DIRECT     ; if less, literals count is complete
   JSR GETSRC                           ; get extra byte of variable literals count
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$EE                             ; overflow?
   JMP PREPARE_COPY_LITERALS_DIRECT
 PREPARE_COPY_LITERALS_LARGE
                                        ; handle 16 bits literals count
                                        ; literals count = directly these 16 bits
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
   BCS PREPARE_COPY_LITERALS_HIGH       ; (*same as JMP PREPARE_COPY_LITERALS_HIGH but shorter)
 PREPARE_COPY_LITERALS
   LSR                                  ; shift literals count into place
   LSR
   LSR
 PREPARE_COPY_LITERALS_DIRECT
   TAX
   BCS PREPARE_COPY_LITERALS_LARGE      ; if so, literals count is large
 PREPARE_COPY_LITERALS_HIGH
   TXA
   BEQ COPY_LITERALS
   INY
 COPY_LITERALS
   JSR GETPUT                           ; copy one byte of literals
   DEX
   BNE COPY_LITERALS
   DEY
   BNE COPY_LITERALS
 NO_LITERALS
   PLA                                  ; retrieve token from stack
   PHA                                  ; preserve token again
   ASL
   BCS REPMATCH_OR_LARGE_OFFSET         ; 1YZ: rep-match or 13/16 bit offset
   ASL                                  ; 0YZ: 5 or 9 bit offset
   BCS OFFSET_9_BIT         
                                        ; 00Z: 5 bit offset
   LDX #$FF                             ; set offset bits 15-8 to 1
   JSR GETCOMBINEDBITS                  ; rotate Z bit into bit 0, read nibble for bits 4-1
   ORA #$E0                             ; set bits 7-5 to 1
   BNE GOT_OFFSET_LO                    ; go store low byte of match offset and prepare match
 OFFSET_9_BIT                            ; 01Z: 9 bit offset
   ;;ASL                                  ; shift Z (offset bit 8) in place
   ROL
   ROL
   AND #$01
   EOR #$FF                             ; set offset bits 15-9 to 1
   BNE GOT_OFFSET_HI                    ; go store high byte, read low byte of match offset and prepare match
                                        ; (*same as JMP GOT_OFFSET_HI but shorter)
 REPMATCH_OR_LARGE_OFFSET
   ASL                                  ; 13 bit offset?
   BCS REPMATCH_OR_16_BIT               ; handle rep-match or 16-bit offset if not
                                        ; 10Z: 13 bit offset
   JSR GETCOMBINEDBITS                  ; rotate Z bit into bit 8, read nibble for bits 12-9
   ADC #$DE                             ; set bits 15-13 to 1 and substract 2 (to substract 512)
   BNE GOT_OFFSET_HI                    ; go store high byte, read low byte of match offset and prepare match
                                        ; (*same as JMP GOT_OFFSET_HI but shorter)
 REPMATCH_OR_16_BIT                      ; rep-match or 16 bit offset
   ;;ASL                                  ; XYZ=111?
   BMI REP_MATCH                        ; reuse previous offset if so (rep-match)
                                        ; 110: handle 16 bit offset
   JSR GETSRC                           ; grab high 8 bits
 GOT_OFFSET_HI
   TAX
   JSR GETSRC                           ; grab low 8 bits
 GOT_OFFSET_LO
   STA OFFSLO                           ; store low byte of match offset
   STX OFFSHI                           ; store high byte of match offset
 REP_MATCH
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression - substract match offset
   SEC                                  ; add dest + match offset
   LDA PUTDST+1                         ; low 8 bits
 OFFSLO = *+1
   SBC #$AA
   STA COPY_MATCH_LOOP+1                ; store back reference address
   LDA PUTDST+2
 OFFSHI = *+1
   SBC #$AA                             ; high 8 bits
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
   SEC
 } else {
   ; Forward decompression - add match offset
   CLC                                  ; add dest + match offset
   LDA PUTDST+1                         ; low 8 bits
 OFFSLO = *+1
   ADC #$AA
   STA COPY_MATCH_LOOP+1                ; store back reference address
 OFFSHI = *+1
   LDA #$AA                             ; high 8 bits
   ADC PUTDST+2
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
 }
   PLA                                  ; retrieve token from stack again
   AND #$07                             ; isolate match len (MMM)
   ADC #$01                             ; add MIN_MATCH_SIZE_V2 and carry
   CMP #$09                             ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
   BCC PREPARE_COPY_MATCH               ; if less, length is directly embedded in token
   JSR GETNIBBLE                        ; get extra match length nibble
                                        ; add nibble to len from token
   ADC #$08                             ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
   CMP #$18                             ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
   BCC PREPARE_COPY_MATCH               ; if less, match length is complete
   JSR GETSRC                           ; get extra byte of variable match length
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$E8                             ; overflow?
 PREPARE_COPY_MATCH
   TAX
   BCC PREPARE_COPY_MATCH_Y             ; if not, the match length is complete
   BEQ DECOMPRESSION_DONE               ; if EOD code, bail
                                        ; Handle 16 bits match length
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
 PREPARE_COPY_MATCH_Y
   TXA
   BEQ COPY_MATCH_LOOP
   INY
 COPY_MATCH_LOOP
   LDA $AAAA                            ; get one byte of backreference
   JSR PUTDST                           ; copy to destination
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- put backreference bytes backward
   LDA COPY_MATCH_LOOP+1
   BEQ GETMATCH_ADJ_HI
 GETMATCH_DONE
   DEC COPY_MATCH_LOOP+1
 } else {
   ; Forward decompression -- put backreference bytes forward
   INC COPY_MATCH_LOOP+1
   BEQ GETMATCH_ADJ_HI
 GETMATCH_DONE
 }
   DEX
   BNE COPY_MATCH_LOOP
   DEY
   BNE COPY_MATCH_LOOP
   JMP DECODE_TOKEN
 !ifdef BACKWARD_DECOMPRESS {
 GETMATCH_ADJ_HI
   DEC COPY_MATCH_LOOP+2
   JMP GETMATCH_DONE
 } else {
 GETMATCH_ADJ_HI
   INC COPY_MATCH_LOOP+2
   JMP GETMATCH_DONE
 }
 GETCOMBINEDBITS
   EOR #$80
   ASL
   PHP
   JSR GETNIBBLE                        ; get nibble into bits 0-3 (for offset bits 1-4)
   PLP                                  ; merge Z bit as the carry bit (for offset bit 0)
 COMBINEDBITZ
   ROL                                  ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
 DECOMPRESSION_DONE
   RTS
 GETNIBBLE
 NIBBLES = *+1
   LDA #$AA
   LSR NIBCOUNT
   BCC NEED_NIBBLES
   AND #$0F                             ; isolate low 4 bits of nibble
   RTS
 NEED_NIBBLES
   INC NIBCOUNT
   JSR GETSRC                           ; get 2 nibbles
   STA NIBBLES
   LSR 
   LSR 
   LSR 
   LSR 
   SEC
   RTS
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- get and put bytes backward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   LDA PUTDST+1
   BEQ PUTDST_ADJ_HI
   DEC PUTDST+1
   RTS
 PUTDST_ADJ_HI
   DEC PUTDST+2
   DEC PUTDST+1
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   PHA
   LDA GETSRC+1
   BEQ GETSRC_ADJ_HI
   DEC GETSRC+1
   PLA
   RTS
 GETSRC_ADJ_HI
   DEC GETSRC+2
   DEC GETSRC+1
   PLA
   RTS
 } else {
   ; Forward decompression -- get and put bytes forward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   INC PUTDST+1
   BEQ PUTDST_ADJ_HI
   RTS
 PUTDST_ADJ_HI
   INC PUTDST+2
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   INC GETSRC+1
   BEQ GETSRC_ADJ_HI
   RTS
 GETSRC_ADJ_HI
   INC GETSRC+2
   RTS
 }
--- a/Tools/unix/lzsa/asm/6502/decompress_faster_v1.asm
+++ b/Tools/unix/lzsa/asm/6502/decompress_faster_v1.asm
@ -0,0 +1,392 @@
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; lzsa1_6502.s
 ;
 ; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA1 format.
 ;
 ; This code is written for the ACME assembler.
 ;
 ; Optional code is presented for one minor 6502 optimization that breaks
 ; compatibility with the current LZSA1 format standard.
 ;
 ; The code is 168 bytes for the small version, and 205 bytes for the normal.
 ;
 ; Copyright John Brandwood 2019.
 ;
 ; Distributed under the Boost Software License, Version 1.0.
 ; (See accompanying file LICENSE_1_0.txt or copy at
 ;  http://www.boost.org/LICENSE_1_0.txt)
 ;
 ; ***************************************************************************
 ; ***************************************************************************
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; Decompression Options & Macros
 ;
                ;
                ; Save 6 bytes of code and 21 cycles by swapping the order
                ; of bytes in the 16-bit length encoding?
                ;
                ; N.B. Setting this breaks compatibility with LZSA v1.2
                ;
 LZSA_SWAP_LEN16 =       0
                ;
                ; Choose size over space (within sane limits)?
                ;
 LZSA_SMALL_SIZE =       0
                ;
                ; Remove code inlining to save space?
                ;
                ; This saves 15 bytes of code at the cost of 7% speed.
                ;
                !if     LZSA_SMALL_SIZE {
 LZSA_NO_INLINE  =       1
                } else {
 LZSA_NO_INLINE  =       0
                }
                ;
                ; Use smaller code for copying literals?
                ;
                ; This saves 11 bytes of code at the cost of 15% speed.
                ;
                !if     LZSA_SMALL_SIZE {
 LZSA_SHORT_CP   =       1
                } else {
 LZSA_SHORT_CP   =       0
                }
                ;
                ; Use smaller code for copying literals?
                ;
                ; This saves 11 bytes of code at the cost of 30% speed.
                ;
                !if     LZSA_SMALL_SIZE {
 LZSA_SHORT_LZ   =       1
                } else {
 LZSA_SHORT_LZ   =       0
                }
                ;
                ; Assume that we're decompessing from a large multi-bank
                ; compressed data file, and that the next bank may need to
                ; paged in when a page-boundary is crossed.
                ;
 LZSA_FROM_BANK  =       0
                ;
                ; Macro to increment the source pointer to the next page.
                ;
                ; This should call a subroutine to determine if a bank
                ; has been crossed, and a new bank should be paged in.
                ;
                !if     LZSA_FROM_BANK {
                        !macro  LZSA_INC_PAGE {
                        jsr     lzsa1_next_page
                        }
                } else {
                        !macro  LZSA_INC_PAGE {
                        inc     <lzsa_srcptr + 1
                        }
                }
                ;
                ; Macro to read a byte from the compressed source data.
                ;
                !if     LZSA_NO_INLINE {
                        !macro LZSA_GET_SRC {
                        jsr     lzsa1_get_byte
                        }
                } else {
                        !macro LZSA_GET_SRC {
                        lda     (lzsa_srcptr),y
                        inc     <lzsa_srcptr + 0
                        bne     .skip
                        +LZSA_INC_PAGE
 .skip:
                        }
                }
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; Data usage is last 8 bytes of zero-page.
 ;
                !if     (LZSA_SHORT_CP | LZSA_SHORT_LZ) {
 lzsa_length     =       $F8                     ; 1 byte.
                }
 lzsa_cmdbuf     =       $F9                     ; 1 byte.
 lzsa_winptr     =       $FA                     ; 1 word.
 lzsa_srcptr     =       $FC                     ; 1 word.
 lzsa_dstptr     =       $FE                     ; 1 word.
 LZSA_SRC_LO     =       $FC
 LZSA_SRC_HI     =       $FD
 LZSA_DST_LO     =       $FE
 LZSA_DST_HI     =       $FF
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; lzsa1_unpack - Decompress data stored in Emmanuel Marty's LZSA1 format.
 ;
 ; Args: lzsa_srcptr = ptr to compessed data
 ; Args: lzsa_dstptr = ptr to output buffer
 ; Uses: lots!
 ;
 ; If compiled with LZSA_FROM_BANK, then lzsa_srcptr should be within the bank
 ; window range.
 ;
 DECOMPRESS_LZSA1_FAST:
 lzsa1_unpack:   ldy     #0                      ; Initialize source index.
                ldx     #0                      ; Initialize hi-byte of length.
                ;
                ; Copy bytes from compressed source data.
                ;
                ; N.B. X=0 is expected and guaranteed when we get here.
                ;
 .cp_length:     +LZSA_GET_SRC
                sta     <lzsa_cmdbuf            ; Preserve this for later.
                and     #$70                    ; Extract literal length.
                beq     .lz_offset              ; Skip directly to match?
                lsr                             ; Get 3-bit literal length.
                lsr
                lsr
                lsr
                cmp     #$07                    ; Extended length?
                bne     .got_cp_len
                jsr     .get_length             ; CS from CMP, X=0.
                !if     LZSA_SHORT_CP {
 .got_cp_len:    cmp     #0                      ; Check the lo-byte of length.
                beq     .put_cp_len
                inx                             ; Increment # of pages to copy.
 .put_cp_len:    stx     <lzsa_length
                tax
 .cp_page:       lda     (lzsa_srcptr),y
                sta     (lzsa_dstptr),y
                inc     <lzsa_srcptr + 0
                bne     .skip1
                inc     <lzsa_srcptr + 1
 .skip1:         inc     <lzsa_dstptr + 0
                bne     .skip2
                inc     <lzsa_dstptr + 1
 .skip2:         dex
                bne     .cp_page
                dec     <lzsa_length            ; Any full pages left to copy?
                bne     .cp_page
                } else {
 .got_cp_len:    tay                             ; Check the lo-byte of length.
                beq     .cp_page
                inx                             ; Increment # of pages to copy.
 .get_cp_src:    clc                             ; Calc address of partial page.
                adc     <lzsa_srcptr + 0
                sta     <lzsa_srcptr + 0
                bcs     .get_cp_dst
                dec     <lzsa_srcptr + 1
 .get_cp_dst:    tya
                clc                             ; Calc address of partial page.
                adc     <lzsa_dstptr + 0
                sta     <lzsa_dstptr + 0
                bcs     .get_cp_idx
                dec     <lzsa_dstptr + 1
 .get_cp_idx:    tya                             ; Negate the lo-byte of length.
                eor     #$FF
                tay
                iny
 .cp_page:       lda     (lzsa_srcptr),y
                sta     (lzsa_dstptr),y
                iny
                bne     .cp_page
                inc     <lzsa_srcptr + 1
                inc     <lzsa_dstptr + 1
                dex                             ; Any full pages left to copy?
                bne     .cp_page
                }
                ;
                ; Copy bytes from decompressed window.
                ;
                ; N.B. X=0 is expected and guaranteed when we get here.
                ;
 .lz_offset:     +LZSA_GET_SRC
                clc
                adc     <lzsa_dstptr + 0
                sta     <lzsa_winptr + 0
                lda     #$FF
                bit     <lzsa_cmdbuf
                bpl     .hi_offset
                +LZSA_GET_SRC
 .hi_offset:     adc     <lzsa_dstptr + 1
                sta     <lzsa_winptr + 1
 .lz_length:     lda     <lzsa_cmdbuf            ; X=0 from previous loop.
                and     #$0F
                adc     #$03 - 1                ; CS from previous ADC.
                cmp     #$12                    ; Extended length?
                bne     .got_lz_len
                jsr     .get_length             ; CS from CMP, X=0.
                !if     LZSA_SHORT_LZ {
 .got_lz_len:    cmp     #0                      ; Check the lo-byte of length.
                beq     .put_lz_len
                inx                             ; Increment # of pages to copy.
 .put_lz_len:    stx     <lzsa_length
                tax
 .lz_page:       lda     (lzsa_winptr),y
                sta     (lzsa_dstptr),y
                inc     <lzsa_winptr + 0
                bne     .skip3
                inc     <lzsa_winptr + 1
 .skip3:         inc     <lzsa_dstptr + 0
                bne     .skip4
                inc     <lzsa_dstptr + 1
 .skip4:         dex
                bne     .lz_page
                dec     <lzsa_length            ; Any full pages left to copy?
                bne     .lz_page
                jmp     .cp_length              ; Loop around to the beginning.
                } else {
 .got_lz_len:    tay                             ; Check the lo-byte of length.
                beq     .lz_page
                inx                             ; Increment # of pages to copy.
 .get_lz_win:    clc                             ; Calc address of partial page.
                adc     <lzsa_winptr + 0
                sta     <lzsa_winptr + 0
                bcs     .get_lz_dst
                dec     <lzsa_winptr + 1
 .get_lz_dst:    tya
                clc                             ; Calc address of partial page.
                adc     <lzsa_dstptr + 0
                sta     <lzsa_dstptr + 0
                bcs     .get_lz_idx
                dec     <lzsa_dstptr + 1
 .get_lz_idx:    tya                             ; Negate the lo-byte of length.
                eor     #$FF
                tay
                iny
 .lz_page:       lda     (lzsa_winptr),y
                sta     (lzsa_dstptr),y
                iny
                bne     .lz_page
                inc     <lzsa_winptr + 1
                inc     <lzsa_dstptr + 1
                dex                             ; Any full pages left to copy?
                bne     .lz_page
                jmp     .cp_length              ; Loop around to the beginning.
                }
                ;
                ; Get 16-bit length in X:A register pair.
                ;
                ; N.B. X=0 is expected and guaranteed when we get here.
                ;
 .get_length:    clc                             ; Add on the next byte to get
                adc     (lzsa_srcptr),y         ; the length.
                inc     <lzsa_srcptr + 0
                bne     .skip_inc
                +LZSA_INC_PAGE
 .skip_inc:      bcc     .got_length             ; No overflow means done.
                cmp     #$00                    ; Overflow to 256 or 257?
                beq     .extra_word
 .extra_byte:    inx
                jmp     lzsa1_get_byte          ; So rare, this can be slow!
                !if     LZSA_SWAP_LEN16 {
 .extra_word:    jsr     lzsa1_get_byte          ; So rare, this can be slow!
                tax
                beq     .finished               ; Length-hi == 0 at EOF.
                } else {
 .extra_word:    jsr     lzsa1_get_byte          ; So rare, this can be slow!
                pha
                jsr     lzsa1_get_byte          ; So rare, this can be slow!
                tax
                beq     .finished               ; Length-hi == 0 at EOF.
                pla                             ; Length-lo.
                rts
                }
 lzsa1_get_byte:
                lda     (lzsa_srcptr),y         ; Subroutine version for when
                inc     <lzsa_srcptr + 0        ; inlining isn't advantageous.
                beq     lzsa1_next_page
 .got_length:    rts
 lzsa1_next_page:
                inc     <lzsa_srcptr + 1        ; Inc & test for bank overflow.
                !if     LZSA_FROM_BANK {
                bmi     lzsa1_next_bank         ; Change for target hardware!
                }
                rts
 .finished:      pla                             ; Length-lo.
                pla                             ; Decompression completed, pop
                pla                             ; return address.
                rts
--- a/Tools/unix/lzsa/asm/6502/decompress_faster_v2.asm
+++ b/Tools/unix/lzsa/asm/6502/decompress_faster_v2.asm
@ -0,0 +1,637 @@
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; lzsa2_6502.s
 ;
 ; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA2 format.
 ;
 ; This code is written for the ACME assembler.
 ;
 ; Optional code is presented for two minor 6502 optimizations that break
 ; compatibility with the current LZSA2 format standard.
 ;
 ; The code is 241 bytes for the small version, and 267 bytes for the normal.
 ;
 ; Copyright John Brandwood 2019.
 ;
 ; Distributed under the Boost Software License, Version 1.0.
 ; (See accompanying file LICENSE_1_0.txt or copy at
 ;  http://www.boost.org/LICENSE_1_0.txt)
 ;
 ; ***************************************************************************
 ; ***************************************************************************
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; Decompression Options & Macros
 ;
                ;
                ; Save 7 bytes of code, and 21 cycles every time that a
                ; 16-bit length is decoded?
                ;
                ; N.B. Setting this breaks compatibility with LZSA v1.2
                ;
 LZSA_SWAP_LEN16 =       0
                ;
                ; Save 3 bytes of code, and 4 or 8 cycles when decoding
                ; an offset?
                ;
                ; N.B. Setting this breaks compatibility with LZSA v1.2
                ;
 LZSA_SWAP_XZY   =       0
                ;
                ; Choose size over space (within sane limits)?
                ;
 LZSA_SMALL_SIZE =       0
                ;
                ; Remove code inlining to save space?
                ;
                ; This saves 15 bytes of code at the cost of 7% speed.
                ;
                !if      LZSA_SMALL_SIZE {
 LZSA_NO_INLINE  =       1
                } else {
 LZSA_NO_INLINE  =       0
                }
                ;
                ; Use smaller code for copying literals?
                ;
                ; This saves 11 bytes of code at the cost of 5% speed.
                ;
                !if      LZSA_SMALL_SIZE {
 LZSA_SHORT_CP   =       1
                } else {
 LZSA_SHORT_CP   =       0
                }
                ;
                ; Assume that we're decompressing from a large multi-bank
                ; compressed data file, and that the next bank may need to
                ; paged in when a page-boundary is crossed.
                ;
 LZSA_FROM_BANK  =       0
                ;
                ; We will read from or write to $FFFF.  This prevents the
                ; use of the "INC ptrhi / BNE" trick and reduces speed.
                ;
 LZSA_USE_FFFF  =        0
                ;
                ; Macro to increment the source pointer to the next page.
                ;
                !if     LZSA_FROM_BANK {
                        !macro  LZSA_INC_PAGE {
                        jsr     lzsa2_next_page
                        }
                } else {
                        !macro LZSA_INC_PAGE {
                        inc     <lzsa_srcptr + 1
                        }
                }
                ;
                ; Macro to read a byte from the compressed source data.
                ;
                !if     LZSA_NO_INLINE {
                        !macro  LZSA_GET_SRC {
                        jsr     lzsa2_get_byte
                        }
                } else {
                        !macro  LZSA_GET_SRC {
                        lda     (lzsa_srcptr),y
                        inc     <lzsa_srcptr + 0
                        bne     .skip
                        +LZSA_INC_PAGE
 .skip:
                        }
                }
                ;
                ; Macro to speed up reading 50% of nibbles.
                ;
                ; This seems to save very few cycles compared to the
                ; increase in code size, and it isn't recommended.
                ;
 LZSA_SLOW_NIBL  =       1
                !if     (LZSA_SLOW_NIBL + LZSA_SMALL_SIZE) {
                        !macro  LZSA_GET_NIBL {
                        jsr     lzsa2_get_nibble        ; Always call a function.
                        }
                } else {
                        !macro  LZSA_GET_NIBL {
                        lsr     <lzsa_nibflg            ; Is there a nibble waiting?
                        lda     <lzsa_nibble            ; Extract the lo-nibble.
                        bcs     .skip
                        jsr     lzsa2_new_nibble        ; Extract the hi-nibble.
 .skip:                  ora     #$F0
                        }
                }
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; Data usage is last 11 bytes of zero-page.
 ;
 lzsa_cmdbuf     =       $F5                     ; 1 byte.
 lzsa_nibflg     =       $F6                     ; 1 byte.
 lzsa_nibble     =       $F7                     ; 1 byte.
 lzsa_offset     =       $F8                     ; 1 word.
 lzsa_winptr     =       $FA                     ; 1 word.
 lzsa_srcptr     =       $FC                     ; 1 word.
 lzsa_dstptr     =       $FE                     ; 1 word.
 lzsa_length     =       lzsa_winptr             ; 1 word.
 LZSA_SRC_LO     =       $FC
 LZSA_SRC_HI     =       $FD
 LZSA_DST_LO     =       $FE
 LZSA_DST_HI     =       $FF
 ; ***************************************************************************
 ; ***************************************************************************
 ;
 ; lzsa2_unpack - Decompress data stored in Emmanuel Marty's LZSA2 format.
 ;
 ; Args: lzsa_srcptr = ptr to compessed data
 ; Args: lzsa_dstptr = ptr to output buffer
 ; Uses: lots!
 ;
 ; If compiled with LZSA_FROM_BANK, then lzsa_srcptr should be within the bank
 ; window range.
 ;
 DECOMPRESS_LZSA2_FAST:
 lzsa2_unpack:   ldy     #0                      ; Initialize source index.
                sty     <lzsa_nibflg            ; Initialize nibble buffer.
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
                beq     .cp_length              ; always taken
 .incsrc1:
                inc     <lzsa_srcptr + 1
                bne     .resume_src1            ; always taken
                !if     LZSA_SHORT_CP {
 .incsrc2:
                inc     <lzsa_srcptr + 1
                bne     .resume_src2            ; always taken
 .incdst:
                inc     <lzsa_dstptr + 1
                bne     .resume_dst             ; always taken
                }
                }
                ;
                ; Copy bytes from compressed source data.
                ;
 .cp_length:     ldx     #$00                    ; Hi-byte of length or offset.
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
                +LZSA_GET_SRC
                } else {
                lda     (lzsa_srcptr),y
                inc     <lzsa_srcptr + 0
                beq     .incsrc1
                }
 .resume_src1:
                sta     <lzsa_cmdbuf            ; Preserve this for later.
                and     #$18                    ; Extract literal length.
                beq     .lz_offset              ; Skip directly to match?
                lsr                             ; Get 2-bit literal length.
                lsr
                lsr
                cmp     #$03                    ; Extended length?
                bne     .got_cp_len
                jsr     .get_length             ; X=0 table index for literals.
                !if     LZSA_SHORT_CP {
 .got_cp_len:    cmp     #0                      ; Check the lo-byte of length.
                beq     .put_cp_len
                inx                             ; Increment # of pages to copy.
 .put_cp_len:    stx     <lzsa_length
                tax
 .cp_page:       lda     (lzsa_srcptr),y
                sta     (lzsa_dstptr),y
                inc     <lzsa_srcptr + 0
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
                bne     .skip1
                inc     <lzsa_srcptr + 1
 .skip1:         inc     <lzsa_dstptr + 0
                bne     .skip2
                inc     <lzsa_dstptr + 1
 .skip2:
                } else {
                beq     .incsrc2
 .resume_src2:
                inc     <lzsa_dstptr + 0
                beq     .incdst
 .resume_dst:
                }
                dex
                bne     .cp_page
                dec     <lzsa_length            ; Any full pages left to copy?
                bne     .cp_page
                } else {
 .got_cp_len:    tay                             ; Check the lo-byte of length.
                beq     .cp_page
                inx                             ; Increment # of pages to copy.
 .get_cp_src:    clc                             ; Calc address of partial page.
                adc     <lzsa_srcptr + 0
                sta     <lzsa_srcptr + 0
                bcs     .get_cp_dst
                dec     <lzsa_srcptr + 1
 .get_cp_dst:    tya
                clc                             ; Calc address of partial page.
                adc     <lzsa_dstptr + 0
                sta     <lzsa_dstptr + 0
                bcs     .get_cp_idx
                dec     <lzsa_dstptr + 1
 .get_cp_idx:    tya                             ; Negate the lo-byte of length.
                eor     #$FF
                tay
                iny
 .cp_page:       lda     (lzsa_srcptr),y
                sta     (lzsa_dstptr),y
                iny
                bne     .cp_page
                inc     <lzsa_srcptr + 1
                inc     <lzsa_dstptr + 1
                dex                             ; Any full pages left to copy?
                bne     .cp_page
                }
                !if     LZSA_SWAP_XZY {
                ;
                ; Shorter and faster path with NEW order of bits.
                ;
                ; STD  NEW
                ; ================================
                ; xyz  xzy
                ; 00z  0z0  5-bit offset
                ; 01z  0z1  9-bit offset
                ; 10z  1z0  13-bit offset
                ; 110  101  16-bit offset
                ; 111  111  repeat offset
                ;      NVZ  for a BIT instruction
                ;
                ; N.B. Saves 3 bytes in code length.
                ;      get5 and get13 are 8 cycles faster.
                ;      get9, get16, and rep are 4 cycles faster.
                ;
 .lz_offset:     lda     #$20                    ; Y bit in lzsa_cmdbuf.
                bit     <lzsa_cmdbuf
                bmi     .get_13_16_rep
                bne     .get_9_bits
 .get_5_bits:    dex                             ; X=$FF
 .get_13_bits:   +LZSA_GET_NIBL                  ; Always returns with CS.
                bvc     .get_5_skip
                clc
 .get_5_skip:    rol                             ; Shift into position, set C.
                cpx     #$00                    ; X=$FF for a 5-bit offset.
                bne     .set_offset
                sbc     #2                      ; Subtract 512 because 13-bit
                tax                             ; offset starts at $FE00.
                bne     .get_low8               ; Always NZ from previous TAX.
 .get_9_bits:    dex                             ; X=$FF if VC, X=$FE if VS.
                bvc     .get_low8
                dex
                bvs     .get_low8               ; Always VS from previous BIT.
 .get_13_16_rep: beq     .get_13_bits            ; Shares code with 5-bit path.
 .get_16_rep:    bvs     .lz_length              ; Repeat previous offset.
                } else {
                ;
                ; Slower and longer path with STD order of bits.
                ;
                ; STD  NEW
                ; ================================
                ; xyz  xzy
                ; 00z  0z0  5-bit offset
                ; 01z  0z1  9-bit offset
                ; 10z  1z0  13-bit offset
                ; 110  101  16-bit offset
                ; 111  111  repeat offset
                ;      NVZ  for a BIT instruction
                ;
                ; N.B. Costs 3 bytes in code length.
                ;      get5 and get13 are 8 cycles slower.
                ;      get9, get16, and rep are 4 cycles slower.
                ;
 .lz_offset:     lda     <lzsa_cmdbuf
                asl
                bcs     .get_13_16_rep
                asl
                bcs     .get_9_bits
 .get_5_bits:    dex                             ; X=$FF
 .get_13_bits:   asl
                php
                +LZSA_GET_NIBL                  ; Always returns with CS.
                plp
                rol                             ; Shift into position, set C.
                eor     #$01
                cpx     #$00                    ; X=$FF for a 5-bit offset.
                bne     .set_offset
                sbc     #2                      ; Subtract 512 because 13-bit
                                                ; offset starts at $FE00.
                bne     .get_low8x              ; Always NZ from previous SBC.
 .get_9_bits:    dex                             ; X=$FF if CS, X=$FE if CC.
                asl
                bcc     .get_low8
                dex
                bcs     .get_low8               ; Always VS from previous BIT.
 .get_13_16_rep: asl
                bcc     .get_13_bits            ; Shares code with 5-bit path.
 .get_16_rep:    bmi     .lz_length              ; Repeat previous offset.
                }
                ;
                ; Copy bytes from decompressed window.
                ;
                ; N.B. X=0 is expected and guaranteed when we get here.
                ;
 .get_16_bits:   jsr     lzsa2_get_byte          ; Get hi-byte of offset.
 .get_low8x:     tax
 .get_low8:
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
                +LZSA_GET_SRC                   ; Get lo-byte of offset.
                } else {
                lda     (lzsa_srcptr),y
                inc     <lzsa_srcptr + 0
                beq     .incsrc3
 .resume_src3:
                }
 .set_offset:    stx     <lzsa_offset + 1        ; Save new offset.
                sta     <lzsa_offset + 0
 .lz_length:     ldx     #$00                    ; Hi-byte of length.
                lda     <lzsa_cmdbuf
                and     #$07
                clc
                adc     #$02
                cmp     #$09                    ; Extended length?
                bne     .got_lz_len
                inx
                jsr     .get_length             ; X=1 table index for match.
 .got_lz_len:    eor     #$FF                    ; Negate the lo-byte of length
                tay                             ; and check for zero.
                iny
                beq     .calc_lz_addr
                eor     #$FF
                inx                             ; Increment # of pages to copy.
                clc                             ; Calc destination for partial
                adc     <lzsa_dstptr + 0        ; page.
                sta     <lzsa_dstptr + 0
                bcs     .calc_lz_addr
                dec     <lzsa_dstptr + 1
 .calc_lz_addr:  clc                             ; Calc address of match.
                lda     <lzsa_dstptr + 0        ; N.B. Offset is negative!
                adc     <lzsa_offset + 0
                sta     <lzsa_winptr + 0
                lda     <lzsa_dstptr + 1
                adc     <lzsa_offset + 1
                sta     <lzsa_winptr + 1
 .lz_page:       lda     (lzsa_winptr),y
                sta     (lzsa_dstptr),y
                iny
                bne     .lz_page
                inc     <lzsa_winptr + 1
                inc     <lzsa_dstptr + 1
                dex                             ; Any full pages left to copy?
                bne     .lz_page
                jmp     .cp_length              ; Loop around to the beginning.
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
 .incsrc3:
                inc     <lzsa_srcptr + 1
                bne     .resume_src3            ; always taken
                }
                ;
                ; Lookup tables to differentiate literal and match lengths.
                ;
 .nibl_len_tbl:  !byte   3 + $10                 ; 0+3 (for literal).
                !byte   9 + $10                 ; 2+7 (for match).
 .byte_len_tbl:  !byte   18 - 1                  ; 0+3+15 - CS (for literal).
                !byte   24 - 1                  ; 2+7+15 - CS (for match).
                ;
                ; Get 16-bit length in X:A register pair.
                ;
 .get_length:    +LZSA_GET_NIBL
                cmp     #$FF                    ; Extended length?
                bcs     .byte_length
                adc     .nibl_len_tbl,x         ; Always CC from previous CMP.
 .got_length:    ldx     #$00                    ; Set hi-byte of 4 & 8 bit
                rts                             ; lengths.
 .byte_length:   jsr     lzsa2_get_byte          ; So rare, this can be slow!
                adc     .byte_len_tbl,x         ; Always CS from previous CMP.
                bcc     .got_length
                beq     .finished
                !if      LZSA_SWAP_LEN16 {
 .word_length:   jsr     lzsa2_get_byte          ; So rare, this can be slow!
                tax
                } else {
 .word_length:   jsr     lzsa2_get_byte          ; So rare, this can be slow!
                pha
                jsr     lzsa2_get_byte          ; So rare, this can be slow!
                tax
                pla
                rts
                }
 lzsa2_get_byte: 
                lda     (lzsa_srcptr),y         ; Subroutine version for when
                inc     <lzsa_srcptr + 0        ; inlining isn't advantageous.
                beq     lzsa2_next_page
                rts
 lzsa2_next_page:
                inc     <lzsa_srcptr + 1        ; Inc & test for bank overflow.
                !if     LZSA_FROM_BANK {
                bmi     lzsa2_next_bank         ; Change for target hardware!
                }
                rts
 .finished:      pla                             ; Decompression completed, pop
                pla                             ; return address.
                rts
                ;
                ; Get a nibble value from compressed data in A.
                ;
                !if     (LZSA_SLOW_NIBL | LZSA_SMALL_SIZE) {
 lzsa2_get_nibble:
                lsr     <lzsa_nibflg            ; Is there a nibble waiting?
                lda     <lzsa_nibble            ; Extract the lo-nibble.
                bcs     .got_nibble
                inc     <lzsa_nibflg            ; Reset the flag.
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
                +LZSA_GET_SRC
                } else {
                lda     (lzsa_srcptr),y
                inc     <lzsa_srcptr + 0
                beq     .incsrc4
 .resume_src4:
                }
                sta     <lzsa_nibble            ; Preserve for next time.
                lsr                             ; Extract the hi-nibble.
                lsr
                lsr
                lsr
                !if     LZSA_SWAP_XZY {
                sec                             ; Offset code relies on CS.
                }
 .got_nibble:    ora     #$F0
                rts
                } else {
 lzsa2_new_nibble:
                inc     <lzsa_nibflg            ; Reset the flag.
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) {
                +LZSA_GET_SRC
                } else {
                lda     (lzsa_srcptr),y
                inc     <lzsa_srcptr + 0
                beq     .incsrc4
 .resume_src4:
                }
                sta     <lzsa_nibble            ; Preserve for next time.
                lsr                             ; Extract the hi-nibble.
                lsr
                lsr
                lsr
                !if     LZSA_SWAP_XZY {
                sec                             ; Offset code relies on CS.
                }
                rts
                }
                !if     (LZSA_FROM_BANK | LZSA_NO_INLINE | LZSA_USE_FFFF) = 0 {
 .incsrc4:
                inc     <lzsa_srcptr + 1
                bne     .resume_src4            ; always taken
                }
--- a/Tools/unix/lzsa/asm/6502/decompress_small_v1.asm
+++ b/Tools/unix/lzsa/asm/6502/decompress_small_v1.asm
@ -0,0 +1,270 @@
 ; -----------------------------------------------------------------------------
 ; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
 ;
 ; in:
 ; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
 ; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
 ;
 ; out:
 ; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
 ;
 ; -----------------------------------------------------------------------------
 ; Backward decompression is also supported, use lzsa -r -b <original_file> <compressed_file>
 ; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
 ;
 ; in:
 ; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
 ; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
 ;
 ; out:
 ; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
 ;
 ; -----------------------------------------------------------------------------
 ;
 ;  Copyright (C) 2019 Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ; -----------------------------------------------------------------------------
 DECOMPRESS_LZSA1
   LDY #$00
 DECODE_TOKEN
   JSR GETSRC                           ; read token byte: O|LLL|MMMM
   PHA                                  ; preserve token on stack
   AND #$70                             ; isolate literals count
   BEQ NO_LITERALS                      ; skip if no literals to copy
   LSR                                  ; shift literals count into place
   LSR
   LSR
   LSR
   CMP #$07                             ; LITERALS_RUN_LEN?
   BCC PREPARE_COPY_LITERALS            ; if not, count is directly embedded in token
   JSR GETSRC                           ; get extra byte of variable literals count
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$F9                             ; (LITERALS_RUN_LEN)
   BCC PREPARE_COPY_LITERALS
   BEQ LARGE_VARLEN_LITERALS            ; if adding up to zero, go grab 16-bit count
   JSR GETSRC                           ; get single extended byte of variable literals count
   INY                                  ; add 256 to literals count
   BCS PREPARE_COPY_LITERALS            ; (*like JMP PREPARE_COPY_LITERALS but shorter)
 LARGE_VARLEN_LITERALS                   ; handle 16 bits literals count
                                        ; literals count = directly these 16 bits
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
   TXA
 PREPARE_COPY_LITERALS
   TAX
   BEQ COPY_LITERALS
   INY
 COPY_LITERALS
   JSR GETPUT                           ; copy one byte of literals
   DEX
   BNE COPY_LITERALS
   DEY
   BNE COPY_LITERALS
 NO_LITERALS
   PLA                                  ; retrieve token from stack
   PHA                                  ; preserve token again
   BMI GET_LONG_OFFSET                  ; $80: 16 bit offset
   JSR GETSRC                           ; get 8 bit offset from stream in A
   TAX                                  ; save for later
   LDA #$FF                             ; high 8 bits
   BNE GOT_OFFSET                       ; go prepare match
                                        ; (*like JMP GOT_OFFSET but shorter)
 SHORT_VARLEN_MATCHLEN
   JSR GETSRC                           ; get single extended byte of variable match len
   INY                                  ; add 256 to match length
 PREPARE_COPY_MATCH
   TAX
 PREPARE_COPY_MATCH_Y
   TXA
   BEQ COPY_MATCH_LOOP
   INY
 COPY_MATCH_LOOP
   LDA $AAAA                            ; get one byte of backreference
   JSR PUTDST                           ; copy to destination
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- put backreference bytes backward
   LDA COPY_MATCH_LOOP+1
   BNE GETMATCH_DONE
   DEC COPY_MATCH_LOOP+2
 GETMATCH_DONE
   DEC COPY_MATCH_LOOP+1
 } else {
   ; Forward decompression -- put backreference bytes forward
   INC COPY_MATCH_LOOP+1
   BNE GETMATCH_DONE
   INC COPY_MATCH_LOOP+2
 GETMATCH_DONE
 }
   DEX
   BNE COPY_MATCH_LOOP
   DEY
   BNE COPY_MATCH_LOOP
   BEQ DECODE_TOKEN                     ; (*like JMP DECODE_TOKEN but shorter)
 GET_LONG_OFFSET                         ; handle 16 bit offset:
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
 GOT_OFFSET
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression - substract match offset
   STA OFFSHI                           ; store high 8 bits of offset
   STX OFFSLO
   SEC                                  ; substract dest - match offset
   LDA PUTDST+1
 OFFSLO = *+1
   SBC #$AA                             ; low 8 bits
   STA COPY_MATCH_LOOP+1                ; store back reference address
   LDA PUTDST+2
 OFFSHI = *+1
   SBC #$AA                             ; high 8 bits
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
   SEC
 } else {
   ; Forward decompression - add match offset
   STA OFFSHI                           ; store high 8 bits of offset
   TXA
   CLC                                  ; add dest + match offset
   ADC PUTDST+1                         ; low 8 bits
   STA COPY_MATCH_LOOP+1                ; store back reference address
 OFFSHI = *+1
   LDA #$AA                             ; high 8 bits
   ADC PUTDST+2
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
 }
   PLA                                  ; retrieve token from stack again
   AND #$0F                             ; isolate match len (MMMM)
   ADC #$02                             ; plus carry which is always set by the high ADC
   CMP #$12                             ; MATCH_RUN_LEN?
   BCC PREPARE_COPY_MATCH               ; if not, count is directly embedded in token
   JSR GETSRC                           ; get extra byte of variable match length
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$EE                             ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
   BCC PREPARE_COPY_MATCH
   BNE SHORT_VARLEN_MATCHLEN
                                        ; Handle 16 bits match length
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
                                        ; large match length with zero high byte?
   BNE PREPARE_COPY_MATCH_Y             ; if not, continue
 DECOMPRESSION_DONE
   RTS
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- get and put bytes backward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   LDA PUTDST+1
   BNE PUTDST_DONE
   DEC PUTDST+2
 PUTDST_DONE
   DEC PUTDST+1
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   PHA
   LDA GETSRC+1
   BNE GETSRC_DONE
   DEC GETSRC+2
 GETSRC_DONE
   DEC GETSRC+1
   PLA
   RTS
 } else {
   ; Forward decompression -- get and put bytes forward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   INC PUTDST+1
   BNE PUTDST_DONE
   INC PUTDST+2
 PUTDST_DONE
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   INC GETSRC+1
   BNE GETSRC_DONE
   INC GETSRC+2
 GETSRC_DONE
   RTS
 }
--- a/Tools/unix/lzsa/asm/6502/decompress_small_v2.asm
+++ b/Tools/unix/lzsa/asm/6502/decompress_small_v2.asm
@ -0,0 +1,336 @@
 ; -----------------------------------------------------------------------------
 ; Decompress raw LZSA2 block.
 ; Create one with lzsa -r -f2 <original_file> <compressed_file>
 ;
 ; in:
 ; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
 ; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
 ;
 ; out:
 ; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
 ;
 ; -----------------------------------------------------------------------------
 ; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
 ; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
 ;
 ; in:
 ; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
 ; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
 ;
 ; out:
 ; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
 ;
 ; -----------------------------------------------------------------------------
 ;
 ;  Copyright (C) 2019 Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ; -----------------------------------------------------------------------------
 NIBCOUNT = $FC                          ; zero-page location for temp offset
 DECOMPRESS_LZSA2
   LDY #$00
   STY NIBCOUNT
 DECODE_TOKEN
   JSR GETSRC                           ; read token byte: XYZ|LL|MMM
   PHA                                  ; preserve token on stack
   AND #$18                             ; isolate literals count (LL)
   BEQ NO_LITERALS                      ; skip if no literals to copy
   LSR                                  ; shift literals count into place
   LSR
   LSR
   CMP #$03                             ; LITERALS_RUN_LEN_V2?
   BCC PREPARE_COPY_LITERALS            ; if less, count is directly embedded in token
   JSR GETNIBBLE                        ; get extra literals length nibble
                                        ; add nibble to len from token
   ADC #$02                             ; (LITERALS_RUN_LEN_V2) minus carry
   CMP #$12                             ; LITERALS_RUN_LEN_V2 + 15 ?
   BCC PREPARE_COPY_LITERALS            ; if less, literals count is complete
   JSR GETSRC                           ; get extra byte of variable literals count
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$EE                             ; overflow?
 PREPARE_COPY_LITERALS
   TAX
   BCC PREPARE_COPY_LITERALS_HIGH       ; if not, literals count is complete
                                        ; handle 16 bits literals count
                                        ; literals count = directly these 16 bits
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
 PREPARE_COPY_LITERALS_HIGH
   TXA
   BEQ COPY_LITERALS
   INY
 COPY_LITERALS
   JSR GETPUT                           ; copy one byte of literals
   DEX
   BNE COPY_LITERALS
   DEY
   BNE COPY_LITERALS
 NO_LITERALS
   PLA                                  ; retrieve token from stack
   PHA                                  ; preserve token again
   ASL
   BCS REPMATCH_OR_LARGE_OFFSET         ; 1YZ: rep-match or 13/16 bit offset
   ASL                                  ; 0YZ: 5 or 9 bit offset
   BCS OFFSET_9_BIT         
                                        ; 00Z: 5 bit offset
   LDX #$FF                             ; set offset bits 15-8 to 1
   JSR GETCOMBINEDBITS                  ; rotate Z bit into bit 0, read nibble for bits 4-1
   ORA #$E0                             ; set bits 7-5 to 1
   BNE GOT_OFFSET_LO                    ; go store low byte of match offset and prepare match
 OFFSET_9_BIT                            ; 01Z: 9 bit offset
   ;;ASL                                  ; shift Z (offset bit 8) in place
   ROL
   ROL
   AND #$01
   EOR #$FF                             ; set offset bits 15-9 to 1
   BNE GOT_OFFSET_HI                    ; go store high byte, read low byte of match offset and prepare match
                                        ; (*same as JMP GOT_OFFSET_HI but shorter)
 REPMATCH_OR_LARGE_OFFSET
   ASL                                  ; 13 bit offset?
   BCS REPMATCH_OR_16_BIT               ; handle rep-match or 16-bit offset if not
                                        ; 10Z: 13 bit offset
   JSR GETCOMBINEDBITS                  ; rotate Z bit into bit 8, read nibble for bits 12-9
   ADC #$DE                             ; set bits 15-13 to 1 and substract 2 (to substract 512)
   BNE GOT_OFFSET_HI                    ; go store high byte, read low byte of match offset and prepare match
                                        ; (*same as JMP GOT_OFFSET_HI but shorter)
 REPMATCH_OR_16_BIT                      ; rep-match or 16 bit offset
   ;;ASL                                  ; XYZ=111?
   BMI REP_MATCH                        ; reuse previous offset if so (rep-match)
                                        ; 110: handle 16 bit offset
   JSR GETSRC                           ; grab high 8 bits
 GOT_OFFSET_HI
   TAX
   JSR GETSRC                           ; grab low 8 bits
 GOT_OFFSET_LO
   STA OFFSLO                           ; store low byte of match offset
   STX OFFSHI                           ; store high byte of match offset
 REP_MATCH
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression - substract match offset
   SEC                                  ; add dest + match offset
   LDA PUTDST+1                         ; low 8 bits
 OFFSLO = *+1
   SBC #$AA
   STA COPY_MATCH_LOOP+1                ; store back reference address
   LDA PUTDST+2
 OFFSHI = *+1
   SBC #$AA                             ; high 8 bits
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
   SEC
 } else {
   ; Forward decompression - add match offset
   CLC                                  ; add dest + match offset
   LDA PUTDST+1                         ; low 8 bits
 OFFSLO = *+1
   ADC #$AA
   STA COPY_MATCH_LOOP+1                ; store back reference address
 OFFSHI = *+1
   LDA #$AA                             ; high 8 bits
   ADC PUTDST+2
   STA COPY_MATCH_LOOP+2                ; store high 8 bits of address
 }
   PLA                                  ; retrieve token from stack again
   AND #$07                             ; isolate match len (MMM)
   ADC #$01                             ; add MIN_MATCH_SIZE_V2 and carry
   CMP #$09                             ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
   BCC PREPARE_COPY_MATCH               ; if less, length is directly embedded in token
   JSR GETNIBBLE                        ; get extra match length nibble
                                        ; add nibble to len from token
   ADC #$08                             ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
   CMP #$18                             ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
   BCC PREPARE_COPY_MATCH               ; if less, match length is complete
   JSR GETSRC                           ; get extra byte of variable match length
                                        ; the carry is always set by the CMP above
                                        ; GETSRC doesn't change it
   SBC #$E8                             ; overflow?
 PREPARE_COPY_MATCH
   TAX
   BCC PREPARE_COPY_MATCH_Y             ; if not, the match length is complete
   BEQ DECOMPRESSION_DONE               ; if EOD code, bail
                                        ; Handle 16 bits match length
   JSR GETLARGESRC                      ; grab low 8 bits in X, high 8 bits in A
   TAY                                  ; put high 8 bits in Y
 PREPARE_COPY_MATCH_Y
   TXA
   BEQ COPY_MATCH_LOOP
   INY
 COPY_MATCH_LOOP
   LDA $AAAA                            ; get one byte of backreference
   JSR PUTDST                           ; copy to destination
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- put backreference bytes backward
   LDA COPY_MATCH_LOOP+1
   BNE GETMATCH_DONE
   DEC COPY_MATCH_LOOP+2
 GETMATCH_DONE
   DEC COPY_MATCH_LOOP+1
 } else {
   ; Forward decompression -- put backreference bytes forward
   INC COPY_MATCH_LOOP+1
   BNE GETMATCH_DONE
   INC COPY_MATCH_LOOP+2
 GETMATCH_DONE
 }
   DEX
   BNE COPY_MATCH_LOOP
   DEY
   BNE COPY_MATCH_LOOP
   JMP DECODE_TOKEN
 GETCOMBINEDBITS
   EOR #$80
   ASL
   PHP
   JSR GETNIBBLE                        ; get nibble into bits 0-3 (for offset bits 1-4)
   PLP                                  ; merge Z bit as the carry bit (for offset bit 0)
 COMBINEDBITZ
   ROL                                  ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
 DECOMPRESSION_DONE
   RTS
 GETNIBBLE
 NIBBLES = *+1
   LDA #$AA
   LSR NIBCOUNT
   BCS HAS_NIBBLES
   INC NIBCOUNT
   JSR GETSRC                           ; get 2 nibbles
   STA NIBBLES
   LSR 
   LSR 
   LSR 
   LSR 
   SEC
 HAS_NIBBLES
   AND #$0F                             ; isolate low 4 bits of nibble
   RTS
 !ifdef BACKWARD_DECOMPRESS {
   ; Backward decompression -- get and put bytes backward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   LDA PUTDST+1
   BNE PUTDST_DONE
   DEC PUTDST+2
 PUTDST_DONE
   DEC PUTDST+1
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   PHA
   LDA GETSRC+1
   BNE GETSRC_DONE
   DEC GETSRC+2
 GETSRC_DONE
   DEC GETSRC+1
   PLA
   RTS
 } else {
   ; Forward decompression -- get and put bytes forward
 GETPUT
   JSR GETSRC
 PUTDST
 LZSA_DST_LO = *+1
 LZSA_DST_HI = *+2
   STA $AAAA
   INC PUTDST+1
   BNE PUTDST_DONE
   INC PUTDST+2
 PUTDST_DONE
   RTS
 GETLARGESRC
   JSR GETSRC                           ; grab low 8 bits
   TAX                                  ; move to X
                                        ; fall through grab high 8 bits
 GETSRC
 LZSA_SRC_LO = *+1
 LZSA_SRC_HI = *+2
   LDA $AAAA
   INC GETSRC+1
   BNE GETSRC_DONE
   INC GETSRC+2
 GETSRC_DONE
   RTS
 }
--- a/Tools/unix/lzsa/asm/8088/LZSA1FTA.ASM
+++ b/Tools/unix/lzsa/asm/8088/LZSA1FTA.ASM
@ -0,0 +1,250 @@
 ;  lzsa1fta.asm time-efficient decompressor implementation for 8088
 ;  Turbo Assembler IDEAL mode dialect; can also be assembled with NASM.
 ;
 ;  Usual DOS assembler SMALL model assumptions apply.  This code:
 ;  - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
 ;  - Is interrupt-safe
 ;  - Is not re-entrant (do not decompress while already running decompression)
 ;  - Trashes all data and segment registers
 ;
 ;  Copyright (C) 2019 Jim Leonard, Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
        IDEAL
        P8086
 SEGMENT CODE para public
 ASSUME  cs:CODE, ds:CODE
 PUBLIC  lzsa1_decompress_speed
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA1 block
 ;  inputs:
 ;  * ds:si: raw LZSA1 block
 ;  * es:di: output buffer
 ;  output:
 ;  * ax:    decompressed size
 ;  ---------------------------------------------------------------------------
 ; Must declare this in the code segment:
 SHR4table:
        DB 00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h
        DB 01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h
        DB 02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h
        DB 03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h
        DB 04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h
        DB 05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h
        DB 06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h
        DB 07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h
 PROC    lzsa1_decompress_speed  NEAR
 lzsa1_start:
        push    di              ;remember decompression offset
        cld                     ;ensure string ops move forward
        mov     bx,offset SHR4table
        xor     cx,cx
@@decode_token:
        xchg    cx,ax           ;clear ah (cx = 0 from match copy's rep movsb)
        lodsb                   ;read token byte: O|LLL|MMMM
        mov     dx,ax           ;copy our token to dl for later MMMM handling
        and     al,070H         ;isolate literals length in token (LLL)
        jz      @@check_offset_size ;if LLL=0, we have no literals; goto match
        cmp     al,070H         ;LITERALS_RUN_LEN?
        jne     @@got_literals  ;no, we have full count from token; go copy
        lodsb                   ;grab extra length byte
        add     al,07H          ;add LITERALS_RUN_LEN
        jnc     @@got_literals_exact ;if no overflow, we have full count
        je      @@big_literals
@@mid_literals:
        lodsb                   ;grab single extra length byte
        inc     ah              ;add 256
        xchg    cx,ax           ;with longer counts, we can save some time
        shr     cx,1            ;by doing a word copy instead of a byte copy.
        rep     movsw           ;We don't need to account for overlap because
        adc     cx,0            ;source for literals isn't the output buffer.
        rep     movsb
        jmp     @@check_offset_size
@@big_literals:
        lodsw                   ;grab 16-bit extra length
        xchg    cx,ax           ;with longer counts, we can save some time
        shr     cx,1            ;by doing a word copy instead of a byte copy.
        rep     movsw
        adc     cx,0
        rep     movsb
        jmp     @@check_offset_size
@@got_literals:
        segcs   xlat            ;shift literals length into place
@@got_literals_exact:
        xchg    cx,ax
        rep     movsb           ;copy cx literals from ds:si to es:di
@@check_offset_size:
        test    dl,dl           ;check match offset size in token (O bit)
        js      @@get_long_offset ;load absolute 16-bit match offset
        mov     ah,0ffh         ;set up high byte
        lodsb                   ;load low byte
@@get_match_length:
        xchg    dx,ax           ;dx: match offset  ax: original token
        and     al,0FH          ;isolate match length in token (MMMM)
        cmp     al,0FH          ;MATCH_RUN_LEN?
        jne     @@got_matchlen_short  ;no, we have the full match length from the token, go copy
        lodsb                   ;grab extra length byte
        add     al,012H         ;add MIN_MATCH_SIZE + MATCH_RUN_LEN
        jnc     @@do_long_copy  ;if no overflow, we have the entire length
        jne     @@mid_matchlen
        lodsw                   ;grab 16-bit length
        xchg    cx,ax           ;get ready to do a long copy
        jcxz    @@done_decompressing ;wait, is it the EOD marker? Exit if so
        jmp     @@copy_len_preset ;otherwise, do the copy
@@get_long_offset:
        lodsw                   ;Get 2-byte match offset
        jmp     @@get_match_length
@@got_matchlen_short:
        add     al,3            ;add MIN_MATCH_SIZE
        xchg    cx,ax           ;copy match length into cx
        mov     bp,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds=es
        xchg    ax,si           ;save si
        mov     si,di           ;ds:si now points at back reference in output data
        add     si,dx
        rep     movsb           ;copy match
        xchg    si,ax           ;restore si
        mov     ds,bp           ;restore ds
        jmp     @@decode_token  ;go decode another token
@@done_decompressing:
        pop     ax              ;retrieve the original decompression offset
        xchg    di,ax           ;compute decompressed size
        sub     ax,di
        ret                     ;done decompressing, exit to caller
 ;With a confirmed longer match length, we have an opportunity to optimize for
 ;the case where a single byte is repeated long enough that we can benefit
 ;from rep movsw to perform the run (instead of rep movsb).
@@mid_matchlen:
        lodsb                   ;grab single extra length byte
        inc     ah              ;add 256
@@do_long_copy:
        xchg    cx,ax           ;copy match length into cx
@@copy_len_preset:
        push    ds              ;save ds
        mov     bp,es
        mov     ds,bp           ;ds=es
        mov     bp,si           ;save si
        mov     si,di           ;ds:si now points at back reference in output data
        add     si,dx
        cmp     dx,-2           ;do we have a byte/word run to optimize?
        jae     @@do_run        ;perform a run
 ;You may be tempted to change "jae" to "jge" because DX is a signed number.
 ;Don't!  The total window is 64k, so if you treat this as a signed comparison,
 ;you will get incorrect results for offsets over 32K.
 ;If we're here, we have a long copy and it isn't byte-overlapping (if it
 ;overlapped, we'd be in @@do_run_1)  So, let's copy faster with REP MOVSW.
 ;This won't affect 8088 that much, but it speeds up 8086 and higher.
        shr     cx,1
        rep     movsw
        adc     cx,0
        rep     movsb
        mov     si,bp           ;restore si
        pop     ds
        jmp     @@decode_token  ;go decode another token
@@do_run:
        je      @@do_run_2      ;fall through to byte (common) if not word run
@@do_run_1:
        lodsb                   ;load first byte of run into al
        mov     ah,al
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0
        rep     stosb           ;finish word run
        mov     si,bp           ;restore si
        pop     ds
        jmp     @@decode_token  ;go decode another token
@@do_run_2:
        lodsw                   ;load first word of run
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0            ;despite 2-byte offset, compressor might
        rep     stosb           ;output odd length. better safe than sorry.
        mov     si,bp           ;restore si
        pop     ds
        jmp     @@decode_token  ;go decode another token
 ENDP    lzsa1_decompress_speed
 ENDS    CODE
 END
 ;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
 ; original E. Marty code    shuttle 123208 alice 65660 robotron 407338 ***
 ; table for shr al,4        shuttle 120964 alice 63230 robotron 394733 +++
 ; push/pop to mov/mov       shuttle 118176 alice 61835 robotron 386762 +++
 ; movsw for literalcpys     shuttle 124102 alice 64908 robotron 400220 --- rb
 ; stosw for byte runs       shuttle 118897 alice 65040 robotron 403518 --- rb
 ; better stosw for runs     shuttle 117712 alice 65040 robotron 403343 +--
 ; disable RLE by default    shuttle 116924 alice 60783 robotron 381226 +++
 ; optimize got_matchlen     shuttle 115294 alice 59588 robotron 374330 +++
 ; fall through to getML     shuttle 113258 alice 59572 robotron 372004 +++
 ; fall through to midLI     shuttle 113258 alice 59572 robotron 375060 ..- rb
 ; fall through midMaLen     shuttle 113247 alice 59572 robotron 372004 +.+
 ; movsw for litlen > 255    shuttle 113247 alice 59572 robotron 371612 ..+
 ; rep stosw for long runs   shuttle 113247 alice 59572 robotron 371612 ...
 ; rep movsw for long cpys   shuttle 113247 alice 59572 robotron 371035 ..+
 ; xchg/dec ah -> mov ah,val shuttle 112575 alice 59272 robotron 369198 +++
 ; force >12h len.to longcpy shuttle 101998 alice 59266 robotron 364459 +.+
 ; more efficient run branch shuttle 102239 alice 59297 robotron 364716 --- rb
 ; even more eff. run branch shuttle 101998 alice 59266 robotron 364459 ***
 ; BUGFIX - bad sign compare shuttle 101955 alice 59225 robotron 364117 +++
 ; reverse 16-bit len compar shuttle 102000 alice 59263 robotron 364460 --- rb
 ; jcxz for EOD detection    no change to speed, but is 1 byte shorter  +++
 ; force movsw for literals  shuttle 107183 alice 62555 robotron 379524 --- rb
 ; defer shr4 until necessry shuttle 102069 alice 60236 robotron 364096 ---
 ; skip literals if LLL=0    shuttle  98655 alice 57849 robotron 363358 ---
 ; fall through to mid_liter shuttle  98595 alice 57789 robotron 361998 +++
 ; == jumptable experiments begin ==
 ; jumptable for small copys shuttle 101594 alice 61078 robotron 386018 ---
 ; start:xchg instead of mov shuttle 100948 alice 60467 robotron 381112 +++
 ; use table for LLL=0 check shuttle 106972 alice 63333 robotron 388304 --- rb
 ; jmptbl to fallthrough mov shuttle 102532 alice 60760 robotron 383070 ---
 ; cpy fallthrough check_ofs shuttle  98939 alice 58917 robotron 371019 +**
 ; single jumptable jump     shuttle  97528 alice 57264 robotron 362194 ++*
 ; conditional check for L=7 shuttle  98610 alice 58521 robotron 368153 --- rb
 ; rip out the jumptable :-/ shuttle  97616 alice 57128 robotron 360697 +++
 ; defer add MIN_MATCH_SIZE  shuttle  97250 alice 57004 robotron 361191 ++?
 ; cache constants in regs   shuttle 104681 alice 59939 robotron 380125 --- rb
--- a/Tools/unix/lzsa/asm/8088/LZSA1JMP.ASM
+++ b/Tools/unix/lzsa/asm/8088/LZSA1JMP.ASM
@ -0,0 +1,523 @@
 ; lzsa1fta.asm time-efficient decompressor implementation for 808x CPUs.
 ; Turbo Assembler IDEAL mode dialect.
 ; (Is supposed to also assemble with NASM's IDEAL mode support, but YMMV.)
 ;
 ; This code assembles to about 3K of lookup tables and unrolled code,
 ; but the tradeoff for that size is the absolute fastest decompressor
 ; of LZSA1 block data for 808x CPUs.
 ; If you need moderately fast code with less size, see LZSA1FTA.ASM.
 ; If you need the smallest decompression code, see decompress_small_v1.S.
 ;
 ; Usual DOS assembler SMALL model assumptions apply.  This code:
 ; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
 ; - Is interrupt-safe
 ; - Is not re-entrant (do not decompress while already running decompression)
 ; - Trashes all data and segment registers
 ;
 ; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
 ;
 ; This software is provided 'as-is', without any express or implied
 ; warranty.  In no event will the authors be held liable for any damages
 ; arising from the use of this software.
 ;
 ; Permission is granted to anyone to use this software for any purpose,
 ; including commercial applications, and to alter it and redistribute it
 ; freely, subject to the following restrictions:
 ;
 ; 1. The origin of this software must not be misrepresented; you must not
 ;    claim that you wrote the original software. If you use this software
 ;    in a product, an acknowledgment in the product documentation would be
 ;    appreciated but is not required.
 ; 2. Altered source versions must be plainly marked as such, and must not be
 ;    misrepresented as being the original software.
 ; 3. This notice may not be removed or altered from any source distribution.
 ;
 ; ===========================================================================
 ;
 ; The key area to concentrate on when optimizing LZSA1 decompression speed is
 ; reducing time spent handling the shortest matches. This is for two reasons:
 ;   1. shorter matches are more common
 ;   2. short matches are least efficient in terms of decomp speed per byte
 ; You can confirm #1 using the --stats mode of the compressor.
 ;
 ; Branches are costly on 8086.  To ensure we branch as little as possible, a
 ; jumptable will be used to branch directly to as many direct decode paths as
 ; possible.  This will burn up 512 bytes of RAM for a jumptable, and a few
 ; hundred bytes of duplicated program code (rather than JMP/CALL common code
 ; blocks, we inline them to avoid the branch overhead).
 ;
 ; ===========================================================================
 ;
 ; === LZSA1 block reference:
 ;
 ; Blocks encoded as LZSA1 are composed from consecutive commands.
 ; Each command follows this format:
 ;
 ; token: <O|LLL|MMMM>
 ; optional extra literal length
 ; literal values
 ; match offset low
 ; optional match offset high
 ; optional extra encoded match length
 ;
 ;
 ; === LZSA1 Token Reference:
 ;
 ; 7 6 5 4 3 2 1 0
 ; O L L L M M M M
 ;
 ; L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for
 ; this command is 0 to 6, the length is encoded in the token and no extra bytes
 ; are required. Otherwise, a value of 7 is encoded and extra bytes follow as
 ; 'optional extra literal length'
 ;
 ; M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the
 ; encoded match length for this command is 0 to 14, it is directly stored,
 ; otherwise 15 is stored and extra bytes follow as 'optional extra encoded match
 ; length'. Except for the last command in a block, a command always contains a
 ; match, so the encoded match length is the actual match length, offset by the
 ; minimum which is 3 bytes. For instance, an actual match length of 10 bytes to
 ; be copied, is encoded as 7.
 ;
 ; O: set for a 2-bytes match offset, clear for a 1-byte match offset
 ;
 ;
 ; === Decoding extended literal length:
 ;
 ; If the literals length is 7 or more, then an extra byte follows here, with
 ; three possible values:
 ;
 ;   0-248: the value is added to the 7 stored in the token.
 ;   250: a second byte follows. The final literals value is 256 + the second byte.
 ;   249: a little-endian 16-bit value follows, forming the final literals value.
 ;
 ;
 ; === Decoding match offsets:
 ;
 ; match offset low: The low 8 bits of the match offset follows.
 ;
 ; optional match offset high: If the 'O' bit (bit 7) is set in the token, the
 ; high 8 bits of the match offset follow, otherwise they are understood to be all
 ; set to 1. For instance, a short offset of 0x70 is interpreted as 0xff70
 ;
 ;
 ; === Decoding extra encoded match length:
 ;
 ; optional extra encoded match length: If the encoded match length is 15 or more,
 ; the 'M' bits in the token form the value 15, and an extra byte follows here,
 ; with three possible types of value.
 ;
 ;  0-237: the value is added to the 15 stored in the token. The final value is 3 + 15 + this byte.
 ;  239:   a second byte follows. The final match length is 256 + the second byte.
 ;  238:   a second and third byte follow, forming a little-endian 16-bit value.
 ;         The final encoded match length is that 16-bit value.
 ;
 ; ===========================================================================
        IDEAL   ; Use Turbo Assembler IDEAL syntax checking
        P8086   ; Restrict code generation to the 808x and later
        JUMPS   ; Perform fixups for out-of-bound conditional jumps
                ; This is required for the (L=07 & M=0Fh) decode paths as they
                ; have the most code, but these are uncommon paths so the
                ; tiny speed loss in just these paths is not a concern.
 SEGMENT CODE para public
 ASSUME  cs:CODE, ds:CODE
 PUBLIC  lzsa1_decompress_speed_jumptable
 ; EQU helper statements (so we can construct a jump table without going crazy)
 minmatch EQU 3
 litrunlen EQU 7
 leml1 EQU OFFSET lit_ext_mat_len_1b
 leme1 EQU OFFSET lit_ext_mat_ext_1b
 leml2 EQU OFFSET lit_ext_mat_len_2b
 leme2 EQU OFFSET lit_ext_mat_ext_2b
 ;short-circuit special cases for 0 through 6 literal copies:
 l6ml1 EQU OFFSET lit_len_mat_len_1b
 l6me1 EQU OFFSET lit_len_mat_ext_1b
 l6ml2 EQU OFFSET lit_len_mat_len_2b
 l6me2 EQU OFFSET lit_len_mat_ext_2b
 l5ml1 EQU OFFSET lit_len_mat_len_1b + 1
 l5me1 EQU OFFSET lit_len_mat_ext_1b + 1
 l5ml2 EQU OFFSET lit_len_mat_len_2b + 1
 l5me2 EQU OFFSET lit_len_mat_ext_2b + 1
 l4ml1 EQU OFFSET lit_len_mat_len_1b + 2
 l4me1 EQU OFFSET lit_len_mat_ext_1b + 2
 l4ml2 EQU OFFSET lit_len_mat_len_2b + 2
 l4me2 EQU OFFSET lit_len_mat_ext_2b + 2
 l3ml1 EQU OFFSET lit_len_mat_len_1b + 3
 l3me1 EQU OFFSET lit_len_mat_ext_1b + 3
 l3ml2 EQU OFFSET lit_len_mat_len_2b + 3
 l3me2 EQU OFFSET lit_len_mat_ext_2b + 3
 l2ml1 EQU OFFSET lit_len_mat_len_1b + 4
 l2me1 EQU OFFSET lit_len_mat_ext_1b + 4
 l2ml2 EQU OFFSET lit_len_mat_len_2b + 4
 l2me2 EQU OFFSET lit_len_mat_ext_2b + 4
 l1ml1 EQU OFFSET lit_len_mat_len_1b + 5
 l1me1 EQU OFFSET lit_len_mat_ext_1b + 5
 l1ml2 EQU OFFSET lit_len_mat_len_2b + 5
 l1me2 EQU OFFSET lit_len_mat_ext_2b + 5
 l0ml1 EQU OFFSET lit_len_mat_len_1b + 6 ; MMMM handling comes after LLL code
 l0me1 EQU OFFSET lit_len_mat_ext_1b + 6 ; MMMM handling comes after LLL code
 l0ml2 EQU OFFSET lit_len_mat_len_2b + 6 ; MMMM handling comes after LLL code
 l0me2 EQU OFFSET lit_len_mat_ext_2b + 6 ; MMMM handling comes after LLL code
 ; === Hand-written (!) jumptable actually begins here.
 ; Located before the program code results in an extra JMP and 3 wasted bytes,
 ; but it makes the code easier to follow in this location.
 ; Relocate the jump table after the ENDP directive to save 3 bytes.
 ;
 ; 7 6 5 4 3 2 1 0
 ; O L L L M M M M
 ;
 ;         0     1     2     3     4     5     6     7     8     9     a     b     c     d     e     f
 jtbl DW l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0me1 ;0
     DW l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1me1 ;1
     DW l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2me1 ;2
     DW l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3me1 ;3
     DW l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4me1 ;4
     DW l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5me1 ;5
     DW l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6me1 ;6
     DW leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leme1 ;7
     DW l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0me2 ;8
     DW l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1me2 ;9
     DW l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2me2 ;a
     DW l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3me2 ;b
     DW l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4me2 ;c
     DW l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5me2 ;d
     DW l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6me2 ;e
     DW leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leme2 ;f
 PROC    lzsa1_decompress_speed_jumptable  NEAR
 ; ---------------------------------------------------------------------------
 ; Decompress raw LZSA1 block
 ; inputs:
 ; * ds:si: raw LZSA1 block
 ; * es:di: output buffer
 ; output:
 ; * ax:    decompressed size
 ; ---------------------------------------------------------------------------
 MACRO get_byte_match_offset
        mov     ah,0ffh         ;O=0, so set up offset's high byte
        lodsb                   ;load low byte; ax=match offset
        xchg    bp,ax           ;bp=match offset  ax=00 + original token
 ENDM
 MACRO get_word_match_offset
        lodsw                   ;ax=match offset
        xchg    bp,ax           ;bp=match offset  ax=00 + original token
 ENDM
 MACRO do_match_copy_long
 LOCAL do_run, do_run_w
 ; Copies a long match as optimally as possible.
 ; requirements: cx=length, bp=negative offset, ds:si=compdata, es:di=output
 ; trashes: ax, bx
 ; must leave cx=0 at exit
        mov     bx,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds=es
        xchg    ax,si           ;save si
        lea     si,[bp+di]      ;si = output buffer + negative match offset
        cmp     bp,-2           ;do we have a byte/word run to optimize?
        jae     do_run          ;perform a run if so, otherwise fall through
 ;You may be tempted to change "jae" to "jge" because DX is a signed number.
 ;Don't!  The total window is 64k, so if you treat this as a signed comparison,
 ;you will get incorrect results for offsets over 32K.
 ;If we're here, we have a long copy and it isn't byte-overlapping (if it
 ;overlapped, we'd be in @@do_run)  So, let's copy faster with REP MOVSW.
 ;This affects 8088 only slightly, but is a bigger win on 8086 and higher.
        shr     cx,1
        rep     movsw
        adc     cl,0
        rep     movsb
        xchg    si,ax           ;restore si
        mov     ds,bx           ;restore ds
        jmp     decode_token
 do_run:
        je      do_run_w        ;if applicable, handle word-sized value faster
        xchg    dx,ax           ;save si into dx, as ax is getting trashed
        lodsb                   ;load first byte of run into al
        mov     ah,al
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cl,0
        rep     stosb           ;finish word run
        mov     si,dx           ;restore si
        mov     ds,bx           ;restore ds
        jmp     decode_token
 do_run_w:
        xchg    dx,ax           ;save si into dx, as ax is getting trashed
        lodsw                   ;load first word of run
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cl,0            ;despite 2-byte offset, compressor might
        rep     stosb           ;output odd length. better safe than sorry.
        mov     si,dx           ;restore si
        mov     ds,bx           ;restore ds
        jmp     decode_token
 ENDM
 MACRO do_match_copy
 ; Copies a shorter match with as little overhead as possible.
 ; requirements: cx=length, bp=negative offset, ds:si=compdata, es:di=output
 ; trashes: ax, bx
 ; must leave cx=0 at exit
        mov     bx,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds=es
        xchg    ax,si           ;save si
        lea     si,[bp+di]      ;si = output buffer + negative match offset
        rep     movsb
        xchg    si,ax           ;restore si
        mov     ds,bx           ;restore ds
        jmp     decode_token
 ENDM
 MACRO do_literal_copy
 ; Copies a literal sequence using words.
 ; Meant for longer lengths; for 128 bytes or less, use REP MOVSB.
 ; requirements: cx=length, ds:si=compdata, es:di=output
 ; must leave cx=0 at exit
        shr     cx,1
        rep     movsw
        adc     cl,0
        rep     movsb
 ENDM
 MACRO copy_small_match_len
        and     al,0FH          ;isolate length in token (MMMM)
        add     al,minmatch     ;ax=match length
        xchg    cx,ax           ;cx=match length
        do_match_copy           ;copy match with cx=length, bp=offset
 ENDM
 MACRO copy_large_match_len
 LOCAL val239, val238, EOD
 ; Handle MMMM=Fh
 ; Assumptions: ah=0 from get_????_match_offset's xchg
        lodsb                   ;grab extra match length byte
        add     al,0Fh+minmatch ;add MATCH_RUN_LEN + MIN_MATCH_SIZE
        jz      val238          ;if zf & cf, 238: get 16-bit match length
        jc      val239          ;if cf,      239: get extra match length byte
        xchg    cx,ax           ;otherwise, we have our match length
        do_match_copy_long      ;copy match with cx=length, bp=offset
 val239:
        lodsb                   ;ah=0; grab single extra length byte
        inc     ah              ;ax=256+length byte
        xchg    cx,ax
        do_match_copy_long      ;copy match with cx=length, bp=offset
 val238:
        lodsw                   ;grab 16-bit length
        xchg    cx,ax
        jcxz    EOD             ;is it the EOD marker? Exit if so
        do_match_copy_long      ;copy match with cx=length, bp=offset
 EOD:
        jmp     done_decompressing
 ENDM
 lzsa1_start:
        push    di              ;remember decompression offset
        cld                     ;ensure string ops move forward
        xor     cx,cx
 decode_token:
        xchg    cx,ax           ;clear ah (cx = 0 from match copy's REP)
        lodsb                   ;read token byte: O|LLL|MMMM
        mov     bp,ax           ;preserve 0+token in bp for later MMMM handling
        mov     bx,ax           ;prep for table lookup
        shl     bx,1            ;adjust for offset word size
        jmp     [cs:jtbl+bx]    ;jump directly to relevant decode path
 ; There are eight basic decode paths for an LZSA1 token.  Each of these
 ; paths perform only the necessary actions to decode the token and then
 ; fetch the next token.  This results in a lot of code duplication, but
 ; it is the only way to get down to two branches per token (jump to unique
 ; decode path, then jump back to next token) for the most common cases.
 ; Path #1: LLL=0-6, MMMM=0-Eh, O=0 (1-byte match offset)
 ; Handle LLL=0-6 by jumping directly into # of bytes to copy (6 down to 1)
 lit_len_mat_len_1b:
        movsb
        movsb
        movsb
        movsb
        movsb
        movsb
        get_byte_match_offset
        copy_small_match_len
 ; Path #2: LLL=0-6, MMMM=Fh,   O=0 (1-byte match offset)
 lit_len_mat_ext_1b:
        movsb
        movsb
        movsb
        movsb
        movsb
        movsb
        get_byte_match_offset
        copy_large_match_len
 ; Path #3: LLL=7,   MMMM=0-Eh, O=0 (1-byte match offset)
 lit_ext_mat_len_1b:
 ; on entry: ax=0 + token, bp=ax
        lodsb                   ;grab extra literal length byte
        add     al,litrunlen    ;add 7h literal run length
        jz      @@val249_3      ;if zf & cf, 249: get 16-bit literal length
        jc      @@val250_3      ;if cf,      250: get extra literal length byte
        xchg    cx,ax           ;otherwise, we have our literal length
        do_literal_copy         ;this might be better as rep movsw !!! benchmark
        get_byte_match_offset
        copy_small_match_len
@@val250_3:
        lodsb                   ;ah=0; grab single extra length byte
        inc     ah              ;ax=256+length byte
        xchg    cx,ax
        do_literal_copy
        get_byte_match_offset
        copy_small_match_len
@@val249_3:
        lodsw                   ;grab 16-bit length
        xchg    cx,ax
        do_literal_copy
        get_byte_match_offset
        copy_small_match_len
 ; Path #4: LLL=7,   MMMM=Fh,   O=0 (1-byte match offset)
 lit_ext_mat_ext_1b:
 ; on entry: ax=0 + token, bp=ax
        lodsb                   ;grab extra literal length byte
        add     al,litrunlen    ;add 7h literal run length
        jz      @@val249_4      ;if zf & cf, 249: get 16-bit literal length
        jc      @@val250_4      ;if cf,      250: get extra literal length byte
        xchg    cx,ax           ;otherwise, we have our literal length
        do_literal_copy         ;this might be better as rep movsw !!! benchmark
        get_byte_match_offset
        copy_large_match_len
@@val250_4:
        lodsb                   ;ah=0; grab single extra length byte
        inc     ah              ;ax=256+length byte
        xchg    cx,ax
        do_literal_copy
        get_byte_match_offset
        copy_large_match_len
@@val249_4:
        lodsw                   ;grab 16-bit length
        xchg    cx,ax
        do_literal_copy
        get_byte_match_offset
        copy_large_match_len
 ; Path #5: LLL=0-6, MMMM=0-Eh, O=1 (2-byte match offset)
 ; Handle LLL=0-6 by jumping directly into # of bytes to copy (6 down to 1)
 lit_len_mat_len_2b:
        movsb
        movsb
        movsb
        movsb
        movsb
        movsb
        get_word_match_offset
        copy_small_match_len
 ; Path #6: LLL=0-6, MMMM=Fh,   O=1 (2-byte match offset)
 lit_len_mat_ext_2b:
        movsb
        movsb
        movsb
        movsb
        movsb
        movsb
        get_word_match_offset
        copy_large_match_len
 ; Path #7: LLL=7,   MMMM=0-Eh, O=1 (2-byte match offset)
 lit_ext_mat_len_2b:
 ; on entry: ax=0 + token, bp=ax
        lodsb                   ;grab extra literal length byte
        add     al,litrunlen    ;add 7h literal run length
        jz      @@val249_7      ;if zf & cf, 249: get 16-bit literal length
        jc      @@val250_7      ;if cf,      250: get extra literal length byte
        xchg    cx,ax           ;otherwise, we have our literal length
        do_literal_copy         ;this might be better as rep movsw !!! benchmark
        get_word_match_offset
        copy_small_match_len
@@val250_7:
        lodsb                   ;ah=0; grab single extra length byte
        inc     ah              ;ax=256+length byte
        xchg    cx,ax
        do_literal_copy
        get_word_match_offset
        copy_small_match_len
@@val249_7:
        lodsw                   ;grab 16-bit length
        xchg    cx,ax
        do_literal_copy
        get_word_match_offset
        copy_small_match_len
 ; Path #8: LLL=7,   MMMM=Fh,   O=1 (2-byte match offset)
 lit_ext_mat_ext_2b:
 ; on entry: ax=0 + token, bp=ax
        lodsb                   ;grab extra literal length byte
        add     al,litrunlen    ;add 7h literal run length
        jz      @@val249_8      ;if zf & cf, 249: get 16-bit literal length
        jc      @@val250_8      ;if cf,      250: get extra literal length byte
        xchg    cx,ax           ;otherwise, we have our literal length
        do_literal_copy         ;this might be better as rep movsw !!! benchmark
        get_word_match_offset
        copy_large_match_len
@@val250_8:
        lodsb                   ;ah=0; grab single extra length byte
        inc     ah              ;ax=256+length byte
        xchg    cx,ax
        do_literal_copy
        get_word_match_offset
        copy_large_match_len
@@val249_8:
        lodsw                   ;grab 16-bit length
        xchg    cx,ax
        do_literal_copy
        get_word_match_offset
        copy_large_match_len
 done_decompressing:
 ;return # of decompressed bytes in ax
        pop     ax              ;retrieve the original decompression offset
        sub     di,ax           ;adjust for original offset
        xchg    di,ax           ;return adjusted value in ax
        ret                     ;done decompressing, exit to caller
 ENDP    lzsa1_decompress_speed_jumptable
 ENDS    CODE
 END
 ;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
 ; defer add MIN_MATCH_SIZE  shuttle  97207 alice 57200 robotron 362884 ++*
 ; jumptable rewrite, no RLE shuttle  97744 alice 46905 robotron 309032 -++
 ; adc cx,0 -> adc cl,0      shuttle  97744 alice 46893 robotron 309032 .+.!
 ; jumptable rewrite w/RLE   shuttle  88776 alice 50433 robotron 319222 +--
 ; short match copies movsb  shuttle  97298 alice 49769 robotron 326282 ---rb
 ; long match copy #1 16-bit shuttle  92490 alice 46905 robotron 308722 +*+
 ; long match copy #2 extraB shuttle  92464 alice 46905 robotron 308371 +.+
 ; long match copy #3 0f->ed shuttle  86765 alice 46864 robotron 303895 +++!
--- a/Tools/unix/lzsa/asm/8088/LZSA2FTA.ASM
+++ b/Tools/unix/lzsa/asm/8088/LZSA2FTA.ASM
@ -0,0 +1,302 @@
 ;  lzsa2fta.asm - LZSA v2 time-efficient decompressor implementation for 8088
 ;  Turbo Assembler IDEAL mode dialect; can also be assembled with NASM.
 ;
 ;  Usual DOS assembler SMALL model assumptions apply.  This code:
 ;  - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
 ;  - Is interrupt-safe
 ;  - Is not re-entrant (do not decompress while already running decompression)
 ;  - Trashes all data and segment registers
 ;
 ;  Copyright (C) 2019 Jim Leonard, Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
        IDEAL
        P8086
        MODEL SMALL
        CODESEG
 ;While LZSA2 is technically capable of generating a match offset of -2,
 ;this sequence never actually showed up in my LZSA2 test corpus, likely due
 ;to compressor optimizations and the LZSA2 format itself.  If you know your
 ;test data will contain a match offset of -2, you can enable code to write
 ;out the sequence very quickly at the cost of 18 bytes of code.
 HANDLE_WORD_RUN EQU 0
 PUBLIC  lzsa2_decompress_speed
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA2 block
 ;  inputs:
 ;  * ds:si: raw LZSA2 block
 ;  * es:di: output buffer
 ;  output:
 ;  * ax:    decompressed size
 ;  ---------------------------------------------------------------------------
 PROC    lzsa2_decompress_speed  NEAR
 MACRO get_nybble
 LOCAL has_nybble
        neg     bh              ;nybble ready?
        jns     has_nybble
        xchg    bx,ax
        lodsb                   ;load two nybbles
        xchg    bx,ax
 has_nybble:
        mov     cl,4            ;swap 4 high and low bits of nybble
        ror     bl,cl
        mov     cl,0FH
        and     cl,bl
 ENDM
 lzsa2_speed_start:
        push    di              ;remember decompression offset
        cld                     ;make string operations go forward
        xor     cx,cx
        mov     bx,0100H        ;bx used by get_nybble
@@decode_token:
        mov     ax,cx           ;clear ah - cx is zero (and must stay that way)
        lodsb                   ;read token byte: XYZ|LL|MMMM
        mov     dx,ax           ;keep copy of token in dl
        and     al,018H         ;isolate literals length in token (LL)
        jz      @@check_offset  ;no literals? stop decoding, go to matches
 ;At this point, al can be in three (unshifted) states: 1, 2, or 3.
 ;3 = not done yet.
        cmp     al,(2 shl 3)    ;LITERALS_RUN_LEN_V2? (original: cmp al,03h)
        jb      @@lit1b         ;LZSA2 output 1-byte more often, so test first
        je      @@lit2b
        mov     cl,3
        shr     al,cl           ;shift literals length into place
        get_nybble              ;cl := get extra literals length nybble
        add     al,cl           ;add len from token to nybble
        cmp     al,012H         ;LITERALS_RUN_LEN_V2 + 15 ?
        jne     @@got_literals  ;if not, we have the full literals count
        lodsb                   ;grab extra length byte
        add     al,012H         ;overflow?
        jnc     @@got_literals_big ;if not, we have a big full literals count
        lodsw                   ;grab 16-bit extra length
 ;For larger counts, it pays to set up a faster copy
@@got_literals_big:
        xchg    cx,ax
        shr     cx,1
        rep     movsw
        adc     cx,0
        rep     movsb
        jmp     @@check_offset
@@got_literals:
        xchg    cx,ax
        rep     movsb           ;copy cx literals from ds:si to es:di
        jmp     @@check_offset
 ;LZSA2 likes to produce tiny literals of 1 or 2 bytes.  Handle them here.
@@lit2b:movsb
@@lit1b:movsb
@@check_offset:
        test    dl,dl           ;check match offset mode in token (X bit)
        js      @@rep_match_or_large_offset
        cmp     dl,040H         ;check if this is a 5 or 9-bit offset (Y bit)
        jnb     @@offset_9_bit
        ;5 bit offset:
        xchg    cx,ax           ;clear ah - cx is zero from prior rep movs
        mov     al,020H         ;shift Z (offset bit 4) in place
        and     al,dl
        shl     al,1
        shl     al,1
        get_nybble              ;get nybble for offset bits 0-3
        or      al,cl           ;merge nybble
        rol     al,1
        xor     al,0E1H         ;set offset bits 7-5 to 1
        dec     ah              ;set offset bits 15-8 to 1
        jmp     @@get_match_length
@@rep_match_or_16_bit:
        test    dl,020H         ;test bit Z (offset bit 8)
        jne     @@repeat_match  ;rep-match
        ;16 bit offset:
        lodsw                   ;Get 2-byte match offset
        xchg    ah,al
        jmp     @@get_match_length
@@offset_9_bit:
        ;9 bit offset:
        xchg    cx,ax           ;clear ah - cx is zero from prior rep movs
        lodsb                   ;get 8 bit offset from stream in A
        dec     ah              ;set offset bits 15-8 to 1
        test    dl,020H         ;test bit Z (offset bit 8)
        je      @@get_match_length
        dec     ah              ;clear bit 8 if Z bit is clear
        jmp     @@get_match_length
@@rep_match_or_large_offset:
        cmp     dl,0c0H         ;check if this is a 13-bit offset
                                ;or a 16-bit offset/rep match (Y bit)
        jnb     @@rep_match_or_16_bit
        ;13 bit offset:
        mov     ah,020H         ;shift Z (offset bit 12) in place
        and     ah,dl
        shl     ah,1
        shl     ah,1
        get_nybble              ;get nybble for offset bits 8-11
        or      ah,cl           ;merge nybble
        rol     ah,1
        xor     ah,0E1H         ;set offset bits 15-13 to 1
        sub     ah,2            ;substract 512
        lodsb                   ;load match offset bits 0-7
@@get_match_length:
        mov     bp,ax           ;bp:=offset
@@repeat_match:
        mov     ax,dx           ;ax: original token
        and     al,07H          ;isolate match length in token (MMM)
        add     al,2            ;add MIN_MATCH_SIZE_V2
        cmp     al,09H          ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
        jne     @@got_matchlen  ;no, we have full match length from token
        get_nybble              ;get extra literals length nybble
        add     al,cl           ;add len from token to nybble
        cmp     al,018H         ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
        jne     @@got_matchlen  ;no, we have full match length from token
        lodsb                   ;grab extra length byte
        add     al,018H         ;overflow?
        jnc     @@got_matchlen_big  ;if not, we have entire (big) length
        je      @@done_decompressing ; detect EOD code
        lodsw                   ;grab 16-bit length
 ;If we're here, we have a larger match copy and can optimize how we do that
@@got_matchlen_big:
        xchg    cx,ax           ;copy match length into cx
        mov     dx,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds:=es
        xchg    si,ax           ;dx:ax = old ds:si
        mov     si,di           ;ds:si now points at back reference in output data
        add     si,bp
 IF HANDLE_WORD_RUN
        cmp     bp,-2           ;do we have a byte/word run to optimize?
        jae     @@do_run        ;perform a run
 ELSE
        cmp     bp,-1           ;do we have a byte run to optimize?
        je      @@do_run_1      ;perform a byte run
 ENDIF
 ;You may be tempted to change "jae" to "jge" because DX is a signed number.
 ;Don't!  The total window is 64k, so if you treat this as a signed comparison,
 ;you will get incorrect results for offsets over 32K.
 ;
 ;If we're here, we have a long copy and it isn't byte-overlapping (if it
 ;overlapped, we'd be in @@do_run_1)  So, let's copy faster with REP MOVSW.
 ;This won't affect 8088 that much, but it speeds up 8086 and higher.
        shr     cx,1
        rep     movsw
        adc     cx,0
        rep     movsb
        xchg    si,ax
        mov     ds,dx           ;restore ds:si
        jmp     @@decode_token  ;go decode another token
 ;Smaller match copies handled here:
@@got_matchlen:
        xchg    cx,ax           ;copy match length into cx
        mov     dx,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds:=es
        xchg    si,ax           ;dx:ax = old ds:si
        mov     si,di           ;ds:si = back reference in output data
        add     si,bp
        rep     movsb           ;copy match
        xchg    si,ax
        mov     ds,dx           ;restore ds:si
        jmp     @@decode_token  ;go decode another token
@@done_decompressing:
        pop     ax              ;retrieve the original decompression offset
        xchg    di,ax           ;compute decompressed size
        sub     ax,di
        ret                     ;done
 IF HANDLE_WORD_RUN
@@do_run:
        je      @@do_run_2      ;fall through to byte (common) if not word run
 ENDIF
@@do_run_1:
        push    ax
        lodsb                   ;load first byte of run into al
        mov     ah,al
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0
        rep     stosb           ;finish word run
        pop     si
        mov     ds,dx
        jmp     @@decode_token  ;go decode another token
 IF HANDLE_WORD_RUN
@@do_run_2:
        push    ax
        lodsw                   ;load first word of run
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0            ;despite 2-byte offset, compressor might
        rep     stosb           ;output odd length. better safe than sorry.
        pop     si
        mov     ds,dx
        jmp     @@decode_token  ;go decode another token
 ENDIF
 ENDP    lzsa2_decompress_speed
 ENDS
 END
 ;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
 ;Compression corpus:shuttle alice robotro rletest largetx linewar ...... ..
 ;Start of exercise   160828 113311 665900  238507 1053865 1004237 ******
 ;add al,val -> al,cl 160813 113296 668721  237484 1053604 1003815 ++-+++
 ;sub ah,2 -> dec dec 160907 113585 666744  237484 1056651 1005172 --+*-- rb
 ;mov ax,cx->xchgcxax 159741 112460 660594  237477 1046770  998323 ++++++
 ;unroll get_nibble   152552 106327 621119  237345  982381  942373 ++++++
 ;early exit if LL=0  147242 103842 615559  239318  946863  942932 +++-+-
 ;push/pop->mov/mov   145447 100832 604822  237288  927017  931366 ++++++
 ;push/pop->mov/mov(2)143214  98817 592920  239298  908217  910955 +++-++
 ;rep stos for -1, -2 143289 102812 617087  237164  942081  940688 ---+-- rb
 ;larger literal cpys 143214  98817 591940  238296  907237  909657 **++++
 ;larger copys & runs 132440  98802 586551  178768  904129  896709 ++++++ :-)
 ;smaller lit. copies 131991  99131 583933  177760  901824  898308 +-+++-
 ;swap smal lit compa 131828  99022 585121  177757  901793  894054 ++-*++
 ;compare before shif 130587  95970 569908  177753  889221  872461 +++*++
 ;getmatchlength base 130587  95970 570634  177753  893536  871556 ...... ===
 ; f->rep_match_or_16 xxxxxx  xxxxx 569910  xxxxxx  889266  871435 ..+.++
 ; f->rep_match_or_la 129966  94748 566169  xxxxxx  880870  867030 +++.++ +++
 ; f->offset_9_bit    132126  95258 568869  xxxxxx  893169  870364 -++.-+
 ;final fallthrough   129966  94748 566169  177753  880870  865023 ******
--- a/Tools/unix/lzsa/asm/8088/decompress_small_v1.S
+++ b/Tools/unix/lzsa/asm/8088/decompress_small_v1.S
@ -0,0 +1,120 @@
 ;  decompress_small.S - space-efficient decompressor implementation for 8088
 ;
 ;  Copyright (C) 2019 Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
   segment .text
   bits 16
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA1 block
 ;  inputs:
 ;  * ds:si: raw LZSA1 block
 ;  * es:di: output buffer
 ;  output:
 ;  * ax:    decompressed size
 ;  ---------------------------------------------------------------------------
 lzsa1_decompress:
   push di                 ; remember decompression offset
   cld                     ; make string operations (lods, movs, stos..) move forward
   xor cx,cx
 .decode_token:
   mov ax,cx               ; clear ah - cx is zero from above or from after rep movsb in .copy_match
   lodsb                   ; read token byte: O|LLL|MMMM
   mov dx,ax               ; keep token in dl
   and al,070H             ; isolate literals length in token (LLL)
   mov cl,4
   shr al,cl               ; shift literals length into place
   cmp al,07H              ; LITERALS_RUN_LEN?
   jne .got_literals       ; no, we have the full literals count from the token, go copy
   lodsb                   ; grab extra length byte
   add al,07H              ; add LITERALS_RUN_LEN
   jnc .got_literals       ; if no overflow, we have the full literals count, go copy
   jne .mid_literals
   lodsw                   ; grab 16-bit extra length
   db 81H                  ; mask inc ah/lodsb
                           ; (*like jmp short .got_literals but faster)
 .mid_literals:
   inc ah                  ; add 256
   lodsb                   ; grab single extra length byte
 .got_literals:
   xchg cx,ax
   rep movsb               ; copy cx literals from ds:si to es:di
   test dl,dl              ; check match offset size in token (O bit)
   js .get_long_offset
   dec cx
   xchg cx,ax              ; ah to 0xff - cx was zero from the rep movsb above
   lodsb
   db 3CH                  ; mask lodsw
                           ; (*like jmp short .get_match_length but faster)
 .get_long_offset:
   lodsw                   ; Get 2-byte match offset
 .get_match_length:
   xchg dx,ax              ; dx: match offset  ax: original token
   and al,0FH              ; isolate match length in token (MMMM)
   add al,3                ; add MIN_MATCH_SIZE
   cmp al,012H             ; MATCH_RUN_LEN?
   jne .got_matchlen       ; no, we have the full match length from the token, go copy
   lodsb                   ; grab extra length byte
   add al,012H             ; add MIN_MATCH_SIZE + MATCH_RUN_LEN
   jnc .got_matchlen       ; if no overflow, we have the entire length
   jne .mid_matchlen       
   lodsw                   ; grab 16-bit length
   test ax,ax              ; bail if we hit EOD
   je short .done_decompressing
   db 81H                  ; mask inc ah/lodsb
                           ; (*like jmp short .got_literals but faster)
 .mid_matchlen:
   inc ah                  ; add 256
   lodsb                   ; grab single extra length byte
 .got_matchlen:
   xchg cx,ax              ; copy match length into cx
   push ds                 ; save ds:si (current pointer to compressed data)
   xchg si,ax          
   push es
   pop ds
   mov si,di               ; ds:si now points at back reference in output data
   add si,dx
   rep movsb               ; copy match
   xchg si,ax              ; restore ds:si
   pop ds
   jmp short .decode_token ; go decode another token
 .done_decompressing:
   pop ax                  ; retrieve the original decompression offset
   xchg ax,di              ; compute decompressed size
   sub ax,di
   ret                     ; done
--- a/Tools/unix/lzsa/asm/8088/decompress_small_v2.S
+++ b/Tools/unix/lzsa/asm/8088/decompress_small_v2.S
@ -0,0 +1,176 @@
 ;  decompress_small.S - space-efficient decompressor implementation for 8088
 ;
 ;  Copyright (C) 2019 Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
   segment .text
   bits 16
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA2 block
 ;  inputs:
 ;  * ds:si: raw LZSA2 block
 ;  * es:di: output buffer
 ;  output:
 ;  * ax:    decompressed size
 ;  ---------------------------------------------------------------------------
 lzsa2_decompress:
   push di                 ; remember decompression offset
   cld                     ; make string operations (lods, movs, stos..) move forward
   xor cx,cx
   mov bx,0100H
   xor bp,bp
 .decode_token:
   mov ax,cx               ; clear ah - cx is zero from above or from after rep movsb in .copy_match
   lodsb                   ; read token byte: XYZ|LL|MMMM
   mov dx,ax               ; keep token in dl
   and al,018H             ; isolate literals length in token (LL)
   mov cl,3
   shr al,cl               ; shift literals length into place
   cmp al,03H              ; LITERALS_RUN_LEN_V2?
   jne .got_literals       ; no, we have the full literals count from the token, go copy
   call .get_nibble        ; get extra literals length nibble
   add al,cl               ; add len from token to nibble 
   cmp al,012H             ; LITERALS_RUN_LEN_V2 + 15 ?
   jne .got_literals       ; if not, we have the full literals count, go copy
   lodsb                   ; grab extra length byte
   add al,012H             ; overflow?
   jnc .got_literals       ; if not, we have the full literals count, go copy
   lodsw                   ; grab 16-bit extra length
 .got_literals:
   xchg cx,ax
   rep movsb               ; copy cx literals from ds:si to es:di
   test dl,0C0h            ; check match offset mode in token (X bit)
   js .rep_match_or_large_offset
   ;;cmp dl,040H             ; check if this is a 5 or 9-bit offset (Y bit)
                           ; discovered via the test with bit 6 set
   xchg cx,ax              ; clear ah - cx is zero from the rep movsb above
   jne .offset_9_bit
                           ; 5 bit offset
   cmp dl,020H             ; test bit 5
   call .get_nibble_x
   jmp short .dec_offset_top
 .offset_9_bit:             ; 9 bit offset
   lodsb                   ; get 8 bit offset from stream in A
   dec ah                  ; set offset bits 15-8 to 1
   test dl,020H            ; test bit Z (offset bit 8)
   je .get_match_length
 .dec_offset_top:
   dec ah                  ; clear bit 8 if Z bit is clear
                           ; or set offset bits 15-8 to 1
   jmp short .get_match_length
 .rep_match_or_large_offset:
   ;;cmp dl,0c0H             ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
   jpe .rep_match_or_16_bit
                           ; 13 bit offset
   cmp dl,0A0H             ; test bit 5 (knowing that bit 7 is also set)
   xchg ah,al
   call .get_nibble_x
   sub al,2                ; substract 512
   jmp short .get_match_length_1
 .rep_match_or_16_bit:
   test dl,020H            ; test bit Z (offset bit 8)
   jne .repeat_match       ; rep-match
                           ; 16 bit offset
   lodsb                   ; Get 2-byte match offset
 .get_match_length_1:
   xchg ah,al
   lodsb                   ; load match offset bits 0-7
 .get_match_length:
   xchg bp,ax              ; bp: offset
 .repeat_match:
   xchg ax,dx              ; ax: original token
   and al,07H              ; isolate match length in token (MMM)
   add al,2                ; add MIN_MATCH_SIZE_V2
   cmp al,09H              ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
   jne .got_matchlen       ; no, we have the full match length from the token, go copy
   call .get_nibble        ; get extra literals length nibble
   add al,cl               ; add len from token to nibble 
   cmp al,018H             ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
   jne .got_matchlen       ; no, we have the full match length from the token, go copy
   lodsb                   ; grab extra length byte
   add al,018H             ; overflow?
   jnc .got_matchlen       ; if not, we have the entire length
   je short .done_decompressing ; detect EOD code
   lodsw                   ; grab 16-bit length
 .got_matchlen:
   xchg cx,ax              ; copy match length into cx
   push ds                 ; save ds:si (current pointer to compressed data)
   xchg si,ax          
   push es
   pop ds
   lea si,[bp+di]          ; ds:si now points at back reference in output data
   rep movsb               ; copy match
   xchg si,ax              ; restore ds:si
   pop ds
   jmp .decode_token       ; go decode another token
 .done_decompressing:
   pop ax                  ; retrieve the original decompression offset
   xchg di,ax              ; compute decompressed size
   sub ax,di
   ret                     ; done
 .get_nibble_x:
   cmc                     ; carry set if bit 4 was set
   rcr al,1
   call .get_nibble        ; get nibble for offset bits 0-3
   or al,cl                ; merge nibble
   rol al,1
   xor al,0E1H             ; set offset bits 7-5 to 1
   ret
 .get_nibble:
   neg bh                  ; nibble ready?
   jns .has_nibble
   xchg bx,ax
   lodsb                   ; load two nibbles
   xchg bx,ax
 .has_nibble:
   mov cl,4                ; swap 4 high and low bits of nibble
   ror bl,cl
   mov cl,0FH
   and cl,bl
   ret
--- a/Tools/unix/lzsa/asm/8088/decompress_speed_v1.S
+++ b/Tools/unix/lzsa/asm/8088/decompress_speed_v1.S
@ -0,0 +1,236 @@
 ;  decompress_speed_v1.S - time-efficient decompressor implementation for 8088
 ;  NASM syntax.
 ;
 ;  Usual DOS assembler SMALL model assumptions apply.  This code:
 ;  - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
 ;  - Is interrupt-safe
 ;  - Is not re-entrant (do not decompress while already running decompression)
 ;  - Trashes all data and segment registers
 ;
 ;  Copyright (C) 2019 Jim Leonard, Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
        segment .text
        bits 16
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA1 block
 ;  inputs:
 ;  * ds:si: raw LZSA1 block
 ;  * es:di: output buffer
 ;  output:
 ;  * ax:    decompressed size
 ;  ---------------------------------------------------------------------------
 ; Must declare this in the code segment:
 SHR4table:
        DB 00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h,00h
        DB 01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h,01h
        DB 02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h,02h
        DB 03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h,03h
        DB 04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h,04h
        DB 05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h,05h
        DB 06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h,06h
        DB 07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h,07h
 lzsa1_decompress_speed:
        push    di              ;remember decompression offset
        cld                     ;ensure string ops move forward
        mov     bx,SHR4table
        xor     cx,cx
 .decode_token:
        xchg    cx,ax           ;clear ah (cx = 0 from match copy's rep movsb)
        lodsb                   ;read token byte: O|LLL|MMMM
        mov     dx,ax           ;copy our token to dl for later MMMM handling
        and     al,070H         ;isolate literals length in token (LLL)
        jz      .check_offset_size ;if LLL=0, we have no literals; goto match
        cmp     al,070H         ;LITERALS_RUN_LEN?
        jne     .got_literals  ;no, we have full count from token; go copy
        lodsb                   ;grab extra length byte
        add     al,07H          ;add LITERALS_RUN_LEN
        jnc     .got_literals_exact ;if no overflow, we have full count
        je      .big_literals
 .mid_literals:
        lodsb                   ;grab single extra length byte
        inc     ah              ;add 256
        xchg    cx,ax           ;with longer counts, we can save some time
        shr     cx,1            ;by doing a word copy instead of a byte copy.
        rep     movsw           ;We don't need to account for overlap because
        adc     cx,0            ;source for literals isn't the output buffer.
        rep     movsb
        jmp     .check_offset_size
 .big_literals:
        lodsw                   ;grab 16-bit extra length
        xchg    cx,ax           ;with longer counts, we can save some time
        shr     cx,1            ;by doing a word copy instead of a byte copy.
        rep     movsw
        adc     cx,0
        rep     movsb
        jmp     .check_offset_size
 .got_literals:
        cs   xlat               ;shift literals length into place
 .got_literals_exact:
        xchg    cx,ax
        rep     movsb           ;copy cx literals from ds:si to es:di
 .check_offset_size:
        test    dl,dl           ;check match offset size in token (O bit)
        js      .get_long_offset ;load absolute 16-bit match offset
        mov     ah,0ffh         ;set up high byte
        lodsb                   ;load low byte
 .get_match_length:
        xchg    dx,ax           ;dx: match offset  ax: original token
        and     al,0FH          ;isolate match length in token (MMMM)
        cmp     al,0FH          ;MATCH_RUN_LEN?
        jne     .got_matchlen_short  ;no, we have the full match length from the token, go copy
        lodsb                   ;grab extra length byte
        add     al,012H         ;add MIN_MATCH_SIZE + MATCH_RUN_LEN
        jnc     .do_long_copy  ;if no overflow, we have the entire length
        jne     .mid_matchlen
        lodsw                   ;grab 16-bit length
        xchg    cx,ax           ;get ready to do a long copy
        jcxz    .done_decompressing ;wait, is it the EOD marker? Exit if so
        jmp     .copy_len_preset ;otherwise, do the copy
 .get_long_offset:
        lodsw                   ;Get 2-byte match offset
        jmp     .get_match_length
 .got_matchlen_short:
        add     al,3            ;add MIN_MATCH_SIZE
        xchg    cx,ax           ;copy match length into cx
        mov     bp,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds=es
        xchg    ax,si           ;save si
        mov     si,di           ;ds:si now points at back reference in output data
        add     si,dx
        rep     movsb           ;copy match
        xchg    si,ax           ;restore si
        mov     ds,bp           ;restore ds
        jmp     .decode_token  ;go decode another token
 .done_decompressing:
        pop     ax              ;retrieve the original decompression offset
        xchg    di,ax           ;compute decompressed size
        sub     ax,di
        ret                     ;done decompressing, exit to caller
 ;With a confirmed longer match length, we have an opportunity to optimize for
 ;the case where a single byte is repeated long enough that we can benefit
 ;from rep movsw to perform the run (instead of rep movsb).
 .mid_matchlen:
        lodsb                   ;grab single extra length byte
        inc     ah              ;add 256
 .do_long_copy:
        xchg    cx,ax           ;copy match length into cx
 .copy_len_preset:
        push    ds              ;save ds
        mov     bp,es
        mov     ds,bp           ;ds=es
        mov     bp,si           ;save si
        mov     si,di           ;ds:si now points at back reference in output data
        add     si,dx
        cmp     dx,-2           ;do we have a byte/word run to optimize?
        jae     .do_run        ;perform a run
 ;You may be tempted to change "jae" to "jge" because DX is a signed number.
 ;Don't!  The total window is 64k, so if you treat this as a signed comparison,
 ;you will get incorrect results for offsets over 32K.
 ;If we're here, we have a long copy and it isn't byte-overlapping (if it
 ;overlapped, we'd be in .do_run_1)  So, let's copy faster with REP MOVSW.
 ;This won't affect 8088 that much, but it speeds up 8086 and higher.
        shr     cx,1
        rep     movsw
        adc     cx,0
        rep     movsb
        mov     si,bp           ;restore si
        pop     ds
        jmp     .decode_token  ;go decode another token
 .do_run:
        je      .do_run_2      ;fall through to byte (common) if not word run
 .do_run_1:
        lodsb                   ;load first byte of run into al
        mov     ah,al
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0
        rep     stosb           ;finish word run
        mov     si,bp           ;restore si
        pop     ds
        jmp     .decode_token  ;go decode another token
 .do_run_2:
        lodsw                   ;load first word of run
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0            ;despite 2-byte offset, compressor might
        rep     stosb           ;output odd length. better safe than sorry.
        mov     si,bp           ;restore si
        pop     ds
        jmp     .decode_token  ;go decode another token
 ;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
 ; original E. Marty code    shuttle 123208 alice 65660 robotron 407338 ***
 ; table for shr al,4        shuttle 120964 alice 63230 robotron 394733 +++
 ; push/pop to mov/mov       shuttle 118176 alice 61835 robotron 386762 +++
 ; movsw for literalcpys     shuttle 124102 alice 64908 robotron 400220 --- rb
 ; stosw for byte runs       shuttle 118897 alice 65040 robotron 403518 --- rb
 ; better stosw for runs     shuttle 117712 alice 65040 robotron 403343 +--
 ; disable RLE by default    shuttle 116924 alice 60783 robotron 381226 +++
 ; optimize got_matchlen     shuttle 115294 alice 59588 robotron 374330 +++
 ; fall through to getML     shuttle 113258 alice 59572 robotron 372004 +++
 ; fall through to midLI     shuttle 113258 alice 59572 robotron 375060 ..- rb
 ; fall through midMaLen     shuttle 113247 alice 59572 robotron 372004 +.+
 ; movsw for litlen > 255    shuttle 113247 alice 59572 robotron 371612 ..+
 ; rep stosw for long runs   shuttle 113247 alice 59572 robotron 371612 ...
 ; rep movsw for long cpys   shuttle 113247 alice 59572 robotron 371035 ..+
 ; xchg/dec ah -> mov ah,val shuttle 112575 alice 59272 robotron 369198 +++
 ; force >12h len.to longcpy shuttle 101998 alice 59266 robotron 364459 +.+
 ; more efficient run branch shuttle 102239 alice 59297 robotron 364716 --- rb
 ; even more eff. run branch shuttle 101998 alice 59266 robotron 364459 ***
 ; BUGFIX - bad sign compare shuttle 101955 alice 59225 robotron 364117 +++
 ; reverse 16-bit len compar shuttle 102000 alice 59263 robotron 364460 --- rb
 ; jcxz for EOD detection    no change to speed, but is 1 byte shorter  +++
 ; force movsw for literals  shuttle 107183 alice 62555 robotron 379524 --- rb
 ; defer shr4 until necessry shuttle 102069 alice 60236 robotron 364096 ---
 ; skip literals if LLL=0    shuttle  98655 alice 57849 robotron 363358 ---
 ; fall through to mid_liter shuttle  98595 alice 57789 robotron 361998 +++
 ; == jumptable experiments begin ==
 ; jumptable for small copys shuttle 101594 alice 61078 robotron 386018 ---
 ; start:xchg instead of mov shuttle 100948 alice 60467 robotron 381112 +++
 ; use table for LLL=0 check shuttle 106972 alice 63333 robotron 388304 --- rb
 ; jmptbl to fallthrough mov shuttle 102532 alice 60760 robotron 383070 ---
 ; cpy fallthrough check_ofs shuttle  98939 alice 58917 robotron 371019 +**
 ; single jumptable jump     shuttle  97528 alice 57264 robotron 362194 ++*
 ; conditional check for L=7 shuttle  98610 alice 58521 robotron 368153 --- rb
 ; rip out the jumptable :-/ shuttle  97616 alice 57128 robotron 360697 +++
 ; defer add MIN_MATCH_SIZE  shuttle  97250 alice 57004 robotron 361191 ++?
 ; cache constants in regs   shuttle 104681 alice 59939 robotron 380125 --- rb
--- a/Tools/unix/lzsa/asm/8088/decompress_speed_v2.S
+++ b/Tools/unix/lzsa/asm/8088/decompress_speed_v2.S
@ -0,0 +1,288 @@
 ;  decompress_speed_v2.S - LZSA v2 time-efficient decompressor implementation for 8088
 ;  NASM syntax.
 ;
 ;  Usual DOS assembler SMALL model assumptions apply.  This code:
 ;  - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
 ;  - Is interrupt-safe
 ;  - Is not re-entrant (do not decompress while already running decompression)
 ;  - Trashes all data and segment registers
 ;
 ;  Copyright (C) 2019 Jim Leonard, Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
        segment .text
        bits 16
 ;While LZSA2 is technically capable of generating a match offset of -2,
 ;this sequence never actually showed up in my LZSA2 test corpus, likely due
 ;to compressor optimizations and the LZSA2 format itself.  If you know your
 ;test data will contain a match offset of -2, you can enable code to write
 ;out the sequence very quickly at the cost of 18 bytes of code.
 HANDLE_WORD_RUN EQU 0
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA2 block
 ;  inputs:
 ;  * ds:si: raw LZSA2 block
 ;  * es:di: output buffer
 ;  output:
 ;  * ax:    decompressed size
 ;  ---------------------------------------------------------------------------
 %macro get_nybble 0
        neg     bh              ;nybble ready?
        jns     %%has_nybble
        xchg    bx,ax
        lodsb                   ;load two nybbles
        xchg    bx,ax
 %%has_nybble:
        mov     cl,4            ;swap 4 high and low bits of nybble
        ror     bl,cl
        mov     cl,0FH
        and     cl,bl
 %endmacro
 lzsa2_decompress_speed:
        push    di              ;remember decompression offset
        cld                     ;make string operations go forward
        xor     cx,cx
        mov     bx,0100H        ;bx used by get_nybble
 .decode_token:
        mov     ax,cx           ;clear ah - cx is zero (and must stay that way)
        lodsb                   ;read token byte: XYZ|LL|MMMM
        mov     dx,ax           ;keep copy of token in dl
        and     al,018H         ;isolate literals length in token (LL)
        jz      .check_offset  ;no literals? stop decoding, go to matches
 ;At this point, al can be in three (unshifted) states: 1, 2, or 3.
 ;3 = not done yet.
        cmp     al,(2 << 3)    ;LITERALS_RUN_LEN_V2? (original: cmp al,03h)
        jb      .lit1b         ;LZSA2 output 1-byte more often, so test first
        je      .lit2b
        mov     cl,3
        shr     al,cl           ;shift literals length into place
        get_nybble              ;cl := get extra literals length nybble
        add     al,cl           ;add len from token to nybble
        cmp     al,012H         ;LITERALS_RUN_LEN_V2 + 15 ?
        jne     .got_literals  ;if not, we have the full literals count
        lodsb                   ;grab extra length byte
        add     al,012H         ;overflow?
        jnc     .got_literals_big ;if not, we have a big full literals count
        lodsw                   ;grab 16-bit extra length
 ;For larger counts, it pays to set up a faster copy
 .got_literals_big:
        xchg    cx,ax
        shr     cx,1
        rep     movsw
        adc     cx,0
        rep     movsb
        jmp     .check_offset
 .got_literals:
        xchg    cx,ax
        rep     movsb           ;copy cx literals from ds:si to es:di
        jmp     .check_offset
 ;LZSA2 likes to produce tiny literals of 1 or 2 bytes.  Handle them here.
 .lit2b:movsb
 .lit1b:movsb
 .check_offset:
        test    dl,dl           ;check match offset mode in token (X bit)
        js      .rep_match_or_large_offset
        cmp     dl,040H         ;check if this is a 5 or 9-bit offset (Y bit)
        jnb     .offset_9_bit
        ;5 bit offset:
        xchg    cx,ax           ;clear ah - cx is zero from prior rep movs
        mov     al,020H         ;shift Z (offset bit 4) in place
        and     al,dl
        shl     al,1
        shl     al,1
        get_nybble              ;get nybble for offset bits 0-3
        or      al,cl           ;merge nybble
        rol     al,1
        xor     al,0E1H         ;set offset bits 7-5 to 1
        dec     ah              ;set offset bits 15-8 to 1
        jmp     .get_match_length
 .rep_match_or_16_bit:
        test    dl,020H         ;test bit Z (offset bit 8)
        jne     .repeat_match  ;rep-match
        ;16 bit offset:
        lodsw                   ;Get 2-byte match offset
        xchg    ah,al
        jmp     .get_match_length
 .offset_9_bit:
        ;9 bit offset:
        xchg    cx,ax           ;clear ah - cx is zero from prior rep movs
        lodsb                   ;get 8 bit offset from stream in A
        dec     ah              ;set offset bits 15-8 to 1
        test    dl,020H         ;test bit Z (offset bit 8)
        je      .get_match_length
        dec     ah              ;clear bit 8 if Z bit is clear
        jmp     .get_match_length
 .rep_match_or_large_offset:
        cmp     dl,0c0H         ;check if this is a 13-bit offset
                                ;or a 16-bit offset/rep match (Y bit)
        jnb     .rep_match_or_16_bit
        ;13 bit offset:
        mov     ah,020H         ;shift Z (offset bit 12) in place
        and     ah,dl
        shl     ah,1
        shl     ah,1
        get_nybble              ;get nybble for offset bits 8-11
        or      ah,cl           ;merge nybble
        rol     ah,1
        xor     ah,0E1H         ;set offset bits 15-13 to 1
        sub     ah,2            ;substract 512
        lodsb                   ;load match offset bits 0-7
 .get_match_length:
        mov     bp,ax           ;bp:=offset
 .repeat_match:
        mov     ax,dx           ;ax: original token
        and     al,07H          ;isolate match length in token (MMM)
        add     al,2            ;add MIN_MATCH_SIZE_V2
        cmp     al,09H          ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
        jne     .got_matchlen  ;no, we have full match length from token
        get_nybble              ;get extra literals length nybble
        add     al,cl           ;add len from token to nybble
        cmp     al,018H         ;MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
        jne     .got_matchlen  ;no, we have full match length from token
        lodsb                   ;grab extra length byte
        add     al,018H         ;overflow?
        jnc     .got_matchlen_big  ;if not, we have entire (big) length
        je      .done_decompressing ; detect EOD code
        lodsw                   ;grab 16-bit length
 ;If we're here, we have a larger match copy and can optimize how we do that
 .got_matchlen_big:
        xchg    cx,ax           ;copy match length into cx
        mov     dx,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds:=es
        xchg    si,ax           ;dx:ax = old ds:si
        mov     si,di           ;ds:si now points at back reference in output data
        add     si,bp
 %if HANDLE_WORD_RUN
        cmp     bp,-2           ;do we have a byte/word run to optimize?
        jae     .do_run        ;perform a run
 %else
        cmp     bp,-1           ;do we have a byte run to optimize?
        je      .do_run_1      ;perform a byte run
 %endif
 ;You may be tempted to change "jae" to "jge" because DX is a signed number.
 ;Don't!  The total window is 64k, so if you treat this as a signed comparison,
 ;you will get incorrect results for offsets over 32K.
 ;
 ;If we're here, we have a long copy and it isn't byte-overlapping (if it
 ;overlapped, we'd be in .do_run_1)  So, let's copy faster with REP MOVSW.
 ;This won't affect 8088 that much, but it speeds up 8086 and higher.
        shr     cx,1
        rep     movsw
        adc     cx,0
        rep     movsb
        xchg    si,ax
        mov     ds,dx           ;restore ds:si
        jmp     .decode_token  ;go decode another token
 ;Smaller match copies handled here:
 .got_matchlen:
        xchg    cx,ax           ;copy match length into cx
        mov     dx,ds           ;save ds
        mov     ax,es
        mov     ds,ax           ;ds:=es
        xchg    si,ax           ;dx:ax = old ds:si
        mov     si,di           ;ds:si = back reference in output data
        add     si,bp
        rep     movsb           ;copy match
        xchg    si,ax
        mov     ds,dx           ;restore ds:si
        jmp     .decode_token  ;go decode another token
 .done_decompressing:
        pop     ax              ;retrieve the original decompression offset
        xchg    di,ax           ;compute decompressed size
        sub     ax,di
        ret                     ;done
 %if HANDLE_WORD_RUN
 .do_run:
        je      .do_run_2      ;fall through to byte (common) if not word run
 %endif
 .do_run_1:
        push    ax
        lodsb                   ;load first byte of run into al
        mov     ah,al
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0
        rep     stosb           ;finish word run
        pop     si
        mov     ds,dx
        jmp     .decode_token  ;go decode another token
 %if HANDLE_WORD_RUN
 .do_run_2:
        push    ax
        lodsw                   ;load first word of run
        shr     cx,1
        rep     stosw           ;perform word run
        adc     cx,0            ;despite 2-byte offset, compressor might
        rep     stosb           ;output odd length. better safe than sorry.
        pop     si
        mov     ds,dx
        jmp     .decode_token  ;go decode another token
 %endif
 ;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
 ;Compression corpus:shuttle alice robotro rletest largetx linewar ...... ..
 ;Start of exercise   160828 113311 665900  238507 1053865 1004237 ******
 ;add al,val -> al,cl 160813 113296 668721  237484 1053604 1003815 ++-+++
 ;sub ah,2 -> dec dec 160907 113585 666744  237484 1056651 1005172 --+*-- rb
 ;mov ax,cx->xchgcxax 159741 112460 660594  237477 1046770  998323 ++++++
 ;unroll get_nibble   152552 106327 621119  237345  982381  942373 ++++++
 ;early exit if LL=0  147242 103842 615559  239318  946863  942932 +++-+-
 ;push/pop->mov/mov   145447 100832 604822  237288  927017  931366 ++++++
 ;push/pop->mov/mov(2)143214  98817 592920  239298  908217  910955 +++-++
 ;rep stos for -1, -2 143289 102812 617087  237164  942081  940688 ---+-- rb
 ;larger literal cpys 143214  98817 591940  238296  907237  909657 **++++
 ;larger copys & runs 132440  98802 586551  178768  904129  896709 ++++++ :-)
 ;smaller lit. copies 131991  99131 583933  177760  901824  898308 +-+++-
 ;swap smal lit compa 131828  99022 585121  177757  901793  894054 ++-*++
 ;compare before shif 130587  95970 569908  177753  889221  872461 +++*++
 ;getmatchlength base 130587  95970 570634  177753  893536  871556 ...... ===
 ; f->rep_match_or_16 xxxxxx  xxxxx 569910  xxxxxx  889266  871435 ..+.++
 ; f->rep_match_or_la 129966  94748 566169  xxxxxx  880870  867030 +++.++ +++
 ; f->offset_9_bit    132126  95258 568869  xxxxxx  893169  870364 -++.-+
 ;final fallthrough   129966  94748 566169  177753  880870  865023 ******
--- a/Tools/unix/lzsa/asm/x86/decompress_small_v1.asm
+++ b/Tools/unix/lzsa/asm/x86/decompress_small_v1.asm
@ -0,0 +1,120 @@
 ;  decompress_small_v1.asm - space-efficient decompressor implementation for x86
 ;
 ;  Copyright (C) 2019 Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
    segment .text
    bits 32
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA1 block
 ;  inputs:
 ;  * esi: raw LZSA1 block
 ;  * edi: output buffer
 ;  output:
 ;  * eax:    decompressed size
 ;  ---------------------------------------------------------------------------
    %ifndef BIN
      global lzsa1_decompress
      global _lzsa1_decompress
    %endif
 lzsa1_decompress:
 _lzsa1_decompress:
    pushad
    ;mov    edi, [esp+32+4]    ; edi = outbuf
    ;mov    esi, [esp+32+8]    ; esi = inbuf
    xor    ecx, ecx
 .decode_token:
    mul    ecx
    lodsb                     ; read token byte: O|LLL|MMMM
    mov    dl, al             ; keep token in dl
    and    al, 070H           ; isolate literals length in token (LLL)
    shr    al, 4              ; shift literals length into place
    cmp    al, 07H            ; LITERALS_RUN_LEN?
    jne    .got_literals      ; no, we have the full literals count from the token, go copy
    lodsb                     ; grab extra length byte
    add    al, 07H            ; add LITERALS_RUN_LEN
    jnc    .got_literals      ; if no overflow, we have the full literals count, go copy
    jne    .mid_literals
    lodsw                     ; grab 16-bit extra length
    jmp    .got_literals
 .mid_literals:
    lodsb                     ; grab single extra length byte
    inc    ah                 ; add 256
 .got_literals:
    xchg   ecx, eax
    rep    movsb              ; copy cx literals from ds:si to es:di
    test   dl, dl             ; check match offset size in token (O bit)
    js     .get_long_offset
    dec     ecx
    xchg    eax, ecx          ; clear ah - cx is zero from the rep movsb above
    lodsb
    jmp     .get_match_length
 .get_long_offset:
    lodsw                     ; Get 2-byte match offset
 .get_match_length:
    xchg    eax, edx          ; edx: match offset  eax: original token
    and     al, 0FH           ; isolate match length in token (MMMM)
    add     al, 3             ; add MIN_MATCH_SIZE
    cmp     al, 012H          ; MATCH_RUN_LEN?
    jne     .got_matchlen     ; no, we have the full match length from the token, go copy
    lodsb                     ; grab extra length byte
    add     al,012H           ; add MIN_MATCH_SIZE + MATCH_RUN_LEN
    jnc     .got_matchlen     ; if no overflow, we have the entire length
    jne     .mid_matchlen       
    lodsw                     ; grab 16-bit length
    test    eax, eax          ; bail if we hit EOD
    je      .done_decompressing 
    jmp     .got_matchlen
 .mid_matchlen:
    lodsb                     ; grab single extra length byte
    inc     ah                ; add 256
 .got_matchlen:
    xchg    ecx, eax          ; copy match length into ecx
    xchg    esi, eax          
    mov     esi, edi          ; esi now points at back reference in output data
    movsx   edx, dx           ; sign-extend dx to 32-bits.
    add     esi, edx
    rep     movsb             ; copy match
    xchg    esi, eax          ; restore esi
    jmp     .decode_token     ; go decode another token
 .done_decompressing:
    sub    edi, [esp+32+4]
    mov    [esp+28], edi      ; eax = decompressed size
    popad
    ret                       ; done
--- a/Tools/unix/lzsa/asm/x86/decompress_small_v2.asm
+++ b/Tools/unix/lzsa/asm/x86/decompress_small_v2.asm
@ -0,0 +1,181 @@
 ;  decompress_small_v2.asm - space-efficient decompressor implementation for x86
 ;
 ;  Copyright (C) 2019 Emmanuel Marty
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
    segment .text
    bits 32
 ;  ---------------------------------------------------------------------------
 ;  Decompress raw LZSA2 block
 ;  inputs:
 ;  * esi: raw LZSA2 block
 ;  * edi: output buffer
 ;  output:
 ;  * eax:    decompressed size
 ;  ---------------------------------------------------------------------------
    %ifndef BIN
      global lzsa2_decompress
      global _lzsa2_decompress
    %endif
 lzsa2_decompress:
 _lzsa2_decompress:
    pushad
    ;mov    edi, [esp+32+4]      ; edi = outbuf
    ;mov    esi, [esp+32+8]      ; esi = inbuf
    xor    ecx, ecx
    xor    ebx, ebx             ; ebx = 0100H
    inc    bh
    xor    ebp, ebp
 .decode_token:
    mul    ecx
    lodsb                       ; read token byte: XYZ|LL|MMMM
    mov    dl, al               ; keep token in dl
    and    al, 018H             ; isolate literals length in token (LL)
    shr    al, 3                ; shift literals length into place
    cmp    al, 03H              ; LITERALS_RUN_LEN_V2?
    jne    .got_literals        ; no, we have the full literals count from the token, go copy
    call   .get_nibble          ; get extra literals length nibble
    add    al, cl               ; add len from token to nibble 
    cmp    al, 012H             ; LITERALS_RUN_LEN_V2 + 15 ?
    jne    .got_literals        ; if not, we have the full literals count, go copy
    lodsb                       ; grab extra length byte
    add    al,012H              ; overflow?
    jnc    .got_literals        ; if not, we have the full literals count, go copy
    lodsw                       ; grab 16-bit extra length
 .got_literals:
    xchg   ecx, eax
    rep    movsb                ; copy ecx literals from esi to edi
    test   dl, 0C0h             ; check match offset mode in token (X bit)
    js     .rep_match_or_large_offset
    ;;cmp dl,040H               ; check if this is a 5 or 9-bit offset (Y bit)
                                ; discovered via the test with bit 6 set
    xchg   ecx, eax             ; clear ah - cx is zero from the rep movsb above
    jne    .offset_9_bit
                                ; 5 bit offset
    cmp    dl, 020H             ; test bit 5
    call   .get_nibble_x
    jmp    .dec_offset_top
 .offset_9_bit:                  ; 9 bit offset
    lodsb                       ; get 8 bit offset from stream in A
    dec    ah                   ; set offset bits 15-8 to 1
    test   dl, 020H             ; test bit Z (offset bit 8)
    je     .get_match_length
 .dec_offset_top:
    dec    ah                   ; clear bit 8 if Z bit is clear
                                ; or set offset bits 15-8 to 1
    jmp    .get_match_length
 .rep_match_or_large_offset:
    ;;cmp dl,0c0H               ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
    jpe    .rep_match_or_16_bit
                                ; 13 bit offset
    cmp    dl, 0A0H             ; test bit 5 (knowing that bit 7 is also set)
    xchg   ah, al
    call   .get_nibble_x
    sub    al, 2                ; substract 512
    jmp    .get_match_length_1
 .rep_match_or_16_bit:
    test   dl, 020H             ; test bit Z (offset bit 8)
    jne    .repeat_match        ; rep-match
                                ; 16 bit offset
    lodsb                       ; Get 2-byte match offset
 .get_match_length_1:
    xchg   ah, al
    lodsb                       ; load match offset bits 0-7
 .get_match_length:
    xchg   ebp, eax             ; ebp: offset
 .repeat_match:
    xchg   eax, edx             ; ax: original token
    and    al, 07H              ; isolate match length in token (MMM)
    add    al, 2                ; add MIN_MATCH_SIZE_V2
    cmp    al, 09H              ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
    jne    .got_matchlen        ; no, we have the full match length from the token, go copy
    call   .get_nibble          ; get extra literals length nibble
    add    al, cl               ; add len from token to nibble 
    cmp    al, 018H             ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
    jne    .got_matchlen        ; no, we have the full match length from the token, go copy
    lodsb                       ; grab extra length byte
    add    al,018H              ; overflow?
    jnc    .got_matchlen        ; if not, we have the entire length
    je     .done_decompressing  ; detect EOD code
    lodsw                       ; grab 16-bit length
 .got_matchlen:
    xchg   ecx, eax             ; copy match length into ecx
    xchg   esi, eax          
    movsx  ebp, bp              ; sign-extend bp to 32-bits
    lea    esi,[ebp+edi]        ; esi now points at back reference in output data
    rep    movsb                ; copy match
    xchg   esi, eax             ; restore esi
    jmp    .decode_token        ; go decode another token
 .done_decompressing:
    sub    edi, [esp+32+4]
    mov    [esp+28], edi
    popad
    ret                         ; done
 .get_nibble_x:
    cmc                         ; carry set if bit 4 was set
    rcr    al, 1
    call   .get_nibble          ; get nibble for offset bits 0-3
    or     al, cl               ; merge nibble
    rol    al, 1
    xor    al, 0E1H             ; set offset bits 7-5 to 1
    ret
 .get_nibble:
    neg    bh                   ; nibble ready?
    jns    .has_nibble
    xchg   ebx, eax
    lodsb                       ; load two nibbles
    xchg   ebx, eax
 .has_nibble:
    mov    cl, 4                ; swap 4 high and low bits of nibble
    ror    bl, cl
    mov    cl, 0FH
    and    cl, bl
    ret
--- a/Tools/unix/lzsa/asm/z80/unlzsa1_fast.asm
+++ b/Tools/unix/lzsa/asm/z80/unlzsa1_fast.asm
@ -0,0 +1,201 @@
 ;
 ;  Speed-optimized LZSA1 decompressor by spke & uniabis (109 bytes)
 ;
 ;  ver.00 by spke for LZSA 0.5.4 (03-24/04/2019, 134 bytes);
 ;  ver.01 by spke for LZSA 0.5.6 (25/04/2019, 110(-24) bytes, +0.2% speed);
 ;  ver.02 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
 ;  ver.03 by uniabis (30/07/2019, 109(-1) bytes, +3.5% speed);
 ;  ver.04 by spke (31/07/2019, small re-organization of macros);
 ;  ver.05 by uniabis (22/08/2019, 107(-2) bytes, same speed);
 ;  ver.06 by spke for LZSA 1.0.7 (27/08/2019, 111(+4) bytes, +2.1% speed);
 ;  ver.07 by spke for LZSA 1.1.0 (25/09/2019, added full revision history);
 ;  ver.08 by spke for LZSA 1.1.2 (22/10/2019, re-organized macros and added an option for unrolled copying of long matches);
 ;  ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed)
 ;
 ;  The data must be compressed using the command line compressor by Emmanuel Marty
 ;  The compression is done as follows:
 ;
 ;  lzsa.exe -f1 -r <sourcefile> <outfile>
 ;
 ;  where option -r asks for the generation of raw (frame-less) data.
 ;
 ;  The decompression is done in the standard way:
 ;
 ;  ld hl,FirstByteOfCompressedData
 ;  ld de,FirstByteOfMemoryForDecompressedData
 ;  call DecompressLZSA1
 ;
 ;  Backward compression is also supported; you can compress files backward using:
 ;
 ;  lzsa.exe -f1 -r -b <sourcefile> <outfile>
 ;
 ;  and decompress the resulting files using:
 ;
 ;  ld hl,LastByteOfCompressedData
 ;  ld de,LastByteOfMemoryForDecompressedData
 ;  call DecompressLZSA1
 ;
 ;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
 ;
 ;  Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
 ;  see https://github.com/emmanuel-marty/lzsa for more information
 ;
 ;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ;	DEFINE	UNROLL_LONG_MATCHES						; uncomment for faster decompression of very compressible data (+57 bytes)
 ;	DEFINE	BACKWARD_DECOMPRESS
 	IFNDEF	BACKWARD_DECOMPRESS
 		MACRO NEXT_HL
 		inc hl
 		ENDM
 		MACRO ADD_OFFSET
 		ex de,hl : add hl,de
 		ENDM
 		MACRO COPY1
 		ldi
 		ENDM
 		MACRO COPYBC
 		ldir
 		ENDM
 	ELSE
 		MACRO NEXT_HL
 		dec hl
 		ENDM
 		MACRO ADD_OFFSET
 		ex de,hl : ld a,e : sub l : ld l,a
 		ld a,d : sbc h : ld h,a						; 4*4+3*4 = 28t / 7 bytes
 		ENDM
 		MACRO COPY1
 		ldd
 		ENDM
 		MACRO COPYBC
 		lddr
 		ENDM
 	ENDIF
@DecompressLZSA1:
 		ld b,0 : jr ReadToken
 NoLiterals:	xor (hl) : NEXT_HL : jp m,LongOffset
 ShortOffset:	push de : ld e,(hl) : ld d,#FF
 		; short matches have length 0+3..14+3
 		add 3 : cp 15+3 : jr nc,LongerMatch
 		; placed here this saves a JP per iteration
 CopyMatch:	ld c,a
 .UseC		NEXT_HL : ex (sp),hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
 		ADD_OFFSET							; BC = len, DE = dest, HL = dest-offset, SP->[src]
 		COPY1 : COPY1 : COPYBC						; BC = 0, DE = dest
 .popSrc		pop hl								; HL = src
 ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
 		; where LLL is the number of literals and MMMM is
 		; a length of the match that follows after the literals
 		ld a,(hl) : and #70 : jr z,NoLiterals
 		cp #70 : jr z,MoreLiterals					; LLL=7 means 7+ literals...
 		rrca : rrca : rrca : rrca : ld c,a				; LLL<7 means 0..6 literals...
 		ld a,(hl) : NEXT_HL
 		COPYBC
 		; the top bit of token is set if the offset contains two bytes
 		and #8F : jp p,ShortOffset
 LongOffset:	; read second byte of the offset
 		push de : ld e,(hl) : NEXT_HL : ld d,(hl)
 		add -128+3 : cp 15+3 : jp c,CopyMatch
 	IFNDEF	UNROLL_LONG_MATCHES
 		; MMMM=15 indicates a multi-byte number of literals
 LongerMatch:	NEXT_HL : add (hl) : jr nc,CopyMatch
 		; the codes are designed to overflow;
 		; the overflow value 1 means read 1 extra byte
 		; and overflow value 0 means read 2 extra bytes
 .code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch.UseC
 .code0		NEXT_HL : ld b,(hl)
 		; the two-byte match length equal to zero
 		; designates the end-of-data marker
 		ld a,b : or c : jr nz,CopyMatch.UseC
 		pop de : ret
 	ELSE
 		; MMMM=15 indicates a multi-byte number of literals
 LongerMatch:	NEXT_HL : add (hl) : jr c,VeryLongMatch
 		ld c,a
 .UseC		NEXT_HL : ex (sp),hl
 		ADD_OFFSET
 		COPY1 : COPY1
 		; this is an unrolled equivalent of LDIR
 		xor a : sub c
 		and 16-1 : add a
 		ld (.jrOffset),a : jr nz,$+2
 .jrOffset	EQU $-1
 .fastLDIR	DUP 16
 		COPY1
 		EDUP
 		jp pe,.fastLDIR
 		jp CopyMatch.popSrc
 VeryLongMatch:	; the codes are designed to overflow;
 		; the overflow value 1 means read 1 extra byte
 		; and overflow value 0 means read 2 extra bytes
 .code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,LongerMatch.UseC
 .code0		NEXT_HL : ld b,(hl)
 		; the two-byte match length equal to zero
 		; designates the end-of-data marker
 		ld a,b : or c : jr nz,LongerMatch.UseC
 		pop de : ret
 	ENDIF
 MoreLiterals:	; there are three possible situations here
 		xor (hl) : NEXT_HL : exa
 		ld a,7 : add (hl) : jr c,ManyLiterals
 CopyLiterals:	ld c,a
 .UseC		NEXT_HL : COPYBC
 		exa : jp p,ShortOffset : jr LongOffset
 ManyLiterals:
 .code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyLiterals.UseC
 .code0		NEXT_HL : ld b,(hl) : jr CopyLiterals.UseC
--- a/Tools/unix/lzsa/asm/z80/unlzsa1_small.asm
+++ b/Tools/unix/lzsa/asm/z80/unlzsa1_small.asm
@ -0,0 +1,135 @@
 ;
 ;  Size-optimized LZSA1 decompressor by spke & uniabis (67 bytes)
 ;
 ;  ver.00 by spke for LZSA 0.5.4 (23/04/2019, 69 bytes);
 ;  ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
 ;  ver.02 by uniabis (30/07/2019, 68(-1) bytes, +3.2% speed);
 ;  ver.03 by spke for LZSA 1.0.7 (31/07/2019, small re-organization of macros);
 ;  ver.04 by spke (06/08/2019, 67(-1) bytes, -1.2% speed);
 ;  ver.05 by spke for LZSA 1.1.0 (25/09/2019, added full revision history)
 ;
 ;  The data must be compressed using the command line compressor by Emmanuel Marty
 ;  The compression is done as follows:
 ;
 ;  lzsa.exe -f1 -r <sourcefile> <outfile>
 ;
 ;  where option -r asks for the generation of raw (frame-less) data.
 ;
 ;  The decompression is done in the standard way:
 ;
 ;  ld hl,FirstByteOfCompressedData
 ;  ld de,FirstByteOfMemoryForDecompressedData
 ;  call DecompressLZSA1
 ;
 ;  Backward compression is also supported; you can compress files backward using:
 ;
 ;  lzsa.exe -f1 -r -b <sourcefile> <outfile>
 ;
 ;  and decompress the resulting files using:
 ;
 ;  ld hl,LastByteOfCompressedData
 ;  ld de,LastByteOfMemoryForDecompressedData
 ;  call DecompressLZSA1
 ;
 ;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
 ;
 ;  Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
 ;  see https://github.com/emmanuel-marty/lzsa for more information
 ;
 ;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ;	DEFINE	BACKWARD_DECOMPRESS
 	IFNDEF	BACKWARD_DECOMPRESS
 		MACRO NEXT_HL
 		inc hl
 		ENDM
 		MACRO ADD_OFFSET
 		ex de,hl : add hl,de
 		ENDM
 		MACRO BLOCKCOPY
 		ldir
 		ENDM
 	ELSE
 		MACRO NEXT_HL
 		dec hl
 		ENDM
 		MACRO ADD_OFFSET
 		push hl : or a : sbc hl,de : pop de				; 11+4+15+10 = 40t / 5 bytes
 		ENDM
 		MACRO BLOCKCOPY
 		lddr
 		ENDM
 	ENDIF
@DecompressLZSA1:
 		ld b,0
 		; first a byte token "O|LLL|MMMM" is read from the stream,
 		; where LLL is the number of literals and MMMM is
 		; a length of the match that follows after the literals
 ReadToken:	ld a,(hl) : NEXT_HL : push af
 		and #70 : jr z,NoLiterals
 		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...
 		cp #07 : call z,ReadLongBA					; LLL=7 means 7+ literals...
 		ld c,a : BLOCKCOPY
 		; next we read the low byte of the -offset
 NoLiterals:	pop af : push de : ld e,(hl) : NEXT_HL : ld d,#FF
 		; the top bit of token is set if
 		; the offset contains the high byte as well
 		or a : jp p,ShortOffset
 LongOffset:	ld d,(hl) : NEXT_HL
 		; last but not least, the match length is read
 ShortOffset:	and #0F : add 3							; MMMM<15 means match lengths 0+3..14+3
 		cp 15+3 : call z,ReadLongBA					; MMMM=15 means lengths 14+3+
 		ld c,a
 		ex (sp),hl							; BC = len, DE = -offset, HL = dest, SP -> [src]
 		ADD_OFFSET							; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
 		BLOCKCOPY							; BC = 0, DE = dest
 		pop hl : jr ReadToken						; HL = src
 		; a standard routine to read extended codes
 		; into registers B (higher byte) and A (lower byte).
 ReadLongBA:	add (hl) : NEXT_HL : ret nc
 		; the codes are designed to overflow;
 		; the overflow value 1 means read 1 extra byte
 		; and overflow value 0 means read 2 extra bytes
 .code1:		ld b,a : ld a,(hl) : NEXT_HL : ret nz
 .code0:		ld c,a : ld b,(hl) : NEXT_HL
 		; the two-byte match length equal to zero
 		; designates the end-of-data marker
 		or b : ld a,c : ret nz
 		pop de : pop de : ret
--- a/Tools/unix/lzsa/asm/z80/unlzsa2_fast.asm
+++ b/Tools/unix/lzsa/asm/z80/unlzsa2_fast.asm
@ -0,0 +1,281 @@
 ;
 ;  Speed-optimized LZSA2 decompressor by spke & uniabis (216 bytes)
 ;
 ;  ver.00 by spke for LZSA 1.0.0 (02-07/06/2019, 218 bytes);
 ;  ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
 ;  ver.02 by spke for LZSA 1.0.6 (27/07/2019, fixed a bug in the backward decompressor);
 ;  ver.03 by uniabis (30/07/2019, 213(-5) bytes, +3.8% speed and support for Hitachi HD64180);
 ;  ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros);
 ;  ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed);
 ;  ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history);
 ;  ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches)
 ;
 ;  The data must be compressed using the command line compressor by Emmanuel Marty
 ;  The compression is done as follows:
 ;
 ;  lzsa.exe -f2 -r <sourcefile> <outfile>
 ;
 ;  where option -r asks for the generation of raw (frame-less) data.
 ;
 ;  The decompression is done in the standard way:
 ;
 ;  ld hl,FirstByteOfCompressedData
 ;  ld de,FirstByteOfMemoryForDecompressedData
 ;  call DecompressLZSA2
 ;
 ;  Backward compression is also supported; you can compress files backward using:
 ;
 ;  lzsa.exe -f2 -r -b <sourcefile> <outfile>
 ;
 ;  and decompress the resulting files using:
 ;
 ;  ld hl,LastByteOfCompressedData
 ;  ld de,LastByteOfMemoryForDecompressedData
 ;  call DecompressLZSA2
 ;
 ;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
 ;
 ;  Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
 ;  see https://github.com/emmanuel-marty/lzsa for more information
 ;
 ;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ;	DEFINE	UNROLL_LONG_MATCHES						; uncomment for faster decompression of very compressible data (+38 bytes)
 ;	DEFINE	BACKWARD_DECOMPRESS						; uncomment for data compressed with option -b
 ;	DEFINE	HD64180								; uncomment for systems using Hitachi HD64180
 	IFNDEF	BACKWARD_DECOMPRESS
 		MACRO NEXT_HL
 		inc hl
 		ENDM
 		MACRO ADD_OFFSET
 		ex de,hl : add hl,de
 		ENDM
 		MACRO COPY1
 		ldi
 		ENDM
 		MACRO COPYBC
 		ldir
 		ENDM
 	ELSE
 		MACRO NEXT_HL
 		dec hl
 		ENDM
 		MACRO ADD_OFFSET
 		ex de,hl : ld a,e : sub l : ld l,a
 		ld a,d : sbc h : ld h,a						; 4*4+3*4 = 28t / 7 bytes
 		ENDM
 		MACRO COPY1
 		ldd
 		ENDM
 		MACRO COPYBC
 		lddr
 		ENDM
 	ENDIF
 	IFNDEF	HD64180
 		MACRO LD_IX_DE
 		ld ixl,e : ld ixh,d
 		ENDM
 		MACRO LD_DE_IX
 		ld e,ixl : ld d,ixh
 		ENDM
 	ELSE
 		MACRO LD_IX_DE
 		push de : pop ix
 		ENDM
 		MACRO LD_DE_IX
 		push ix : pop de
 		ENDM
 	ENDIF
@DecompressLZSA2:
 		; A' stores next nibble as %1111.... or assumed to contain trash
 		; B is assumed to be 0
 		ld b,0 : scf : exa : jr ReadToken
 ManyLiterals:	ld a,18 : add (hl) : NEXT_HL : jr nc,CopyLiterals
 		ld c,(hl) : NEXT_HL
 		ld a,b : ld b,(hl)
 		jr ReadToken.NEXTHLuseBC
 MoreLiterals:	ld b,(hl) : NEXT_HL
 		scf : exa : jr nc,.noUpdate
 			ld a,(hl) : or #F0 : exa
 			ld a,(hl) : NEXT_HL : or #0F
 			rrca : rrca : rrca : rrca
 .noUpdate	;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
 		inc a : jr z,ManyLiterals : sub #F0-3+1
 CopyLiterals:	ld c,a : ld a,b : ld b,0
 		COPYBC
 		push de : or a : jp p,CASE0xx ;: jr CASE1xx
 		cp %11000000 : jr c,CASE10x
 CASE11x		cp %11100000 : jr c,CASE110
 		; "111": repeated offset
 CASE111:	LD_DE_IX : jr MatchLen
 Literals0011:	jr nz,MoreLiterals
 		; if "LL" of the byte token is equal to 0,
 		; there are no literals to copy
 NoLiterals:	or (hl) : NEXT_HL
 		push de : jp m,CASE1xx
 		; short (5 or 9 bit long) offsets
 CASE0xx		ld d,#FF : cp %01000000 : jr c,CASE00x
 		; "01x": the case of the 9-bit offset
 CASE01x:	cp %01100000 : rl d
 ReadOffsetE	ld e,(hl) : NEXT_HL
 SaveOffset:	LD_IX_DE
 MatchLen:	inc a : and %00000111 : jr z,LongerMatch : inc a
 CopyMatch:	ld c,a
 .useC		ex (sp),hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
 		ADD_OFFSET						; BC = len, DE = dest, HL = dest-offset, SP->[src]
 		COPY1
 		COPYBC
 .popSrc		pop hl
 		; compressed data stream contains records
 		; each record begins with the byte token "XYZ|LL|MMM"
 ReadToken:	ld a,(hl) : and %00011000 : jp pe,Literals0011		; process the cases 00 and 11 separately
 		rrca : rrca : rrca
 		ld c,a : ld a,(hl)					; token is re-read for further processing
 .NEXTHLuseBC	NEXT_HL
 		COPYBC
 		; the token and literals are followed by the offset
 		push de : or a : jp p,CASE0xx
 CASE1xx		cp %11000000 : jr nc,CASE11x
 		; "10x": the case of the 13-bit offset
 CASE10x:	ld c,a : exa : jr nc,.noUpdate
 			ld a,(hl) : or #F0 : exa
 			ld a,(hl) : NEXT_HL : or #0F
 			rrca : rrca : rrca : rrca
 .noUpdate	ld d,a : ld a,c
 		cp %10100000 : dec d : rl d : jr ReadOffsetE
 		; "110": 16-bit offset
 CASE110:	ld d,(hl) : NEXT_HL : jr ReadOffsetE
 		; "00x": the case of the 5-bit offset
 CASE00x:	ld c,a : exa : jr nc,.noUpdate
 			ld a,(hl) : or #F0 : exa
 			ld a,(hl) : NEXT_HL : or #0F
 			rrca : rrca : rrca : rrca
 .noUpdate	ld e,a : ld a,c
 		cp %00100000 : rl e : jp SaveOffset
 LongerMatch:	scf : exa : jr nc,.noUpdate
 			ld a,(hl) : or #F0 : exa
 			ld a,(hl) : NEXT_HL : or #0F
 			rrca : rrca : rrca : rrca
 .noUpdate	sub #F0-9 : cp 15+9 : jr c,CopyMatch
 	IFNDEF	UNROLL_LONG_MATCHES
 LongMatch:	add (hl) : NEXT_HL : jr nc,CopyMatch
 		ld c,(hl) : NEXT_HL
 		ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC
 		pop de : ret
 	ELSE
 LongMatch:	add (hl) : NEXT_HL : jr c,VeryLongMatch
 		ld c,a
 .useC		ex (sp),hl
 		ADD_OFFSET
 		COPY1
 		; this is an unrolled equivalent of LDIR
 		xor a : sub c
 		and 8-1 : add a
 		ld (.jrOffset),a : jr nz,$+2
 .jrOffset	EQU $-1
 .fastLDIR	DUP 8
 		COPY1
 		EDUP
 		jp pe,.fastLDIR
 		jp CopyMatch.popSrc
 VeryLongMatch:	ld c,(hl) : NEXT_HL
 		ld b,(hl) : NEXT_HL : jr nz,LongMatch.useC
 		pop de : ret
 	ENDIF
--- a/Tools/unix/lzsa/asm/z80/unlzsa2_small.asm
+++ b/Tools/unix/lzsa/asm/z80/unlzsa2_small.asm
@ -0,0 +1,187 @@
 ;
 ;  Size-optimized LZSA2 decompressor by spke & uniabis (139 bytes)
 ;
 ;  ver.00 by spke for LZSA 1.0.0 (02-09/06/2019, 145 bytes);
 ;  ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
 ;  ver.02 by uniabis (30/07/2019, 144(-1) bytes, +3.3% speed and support for Hitachi HD64180);
 ;  ver.03 by spke for LZSA 1.0.7 (01/08/2019, 140(-4) bytes, -1.4% speed and small re-organization of macros);
 ;  ver.04 by spke for LZSA 1.1.0 (26/09/2019, removed usage of IY, added full revision history)
 ;  ver.05 by spke for LZSA 1.1.1 (11/10/2019, 139(-1) bytes, +0.1% speed)
 ;
 ;  The data must be compressed using the command line compressor by Emmanuel Marty
 ;  The compression is done as follows:
 ;
 ;  lzsa.exe -f2 -r <sourcefile> <outfile>
 ;
 ;  where option -r asks for the generation of raw (frame-less) data.
 ;
 ;  The decompression is done in the standard way:
 ;
 ;  ld hl,FirstByteOfCompressedData
 ;  ld de,FirstByteOfMemoryForDecompressedData
 ;  call DecompressLZSA2
 ;
 ;  Backward compression is also supported; you can compress files backward using:
 ;
 ;  lzsa.exe -f2 -r -b <sourcefile> <outfile>
 ;
 ;  and decompress the resulting files using:
 ;
 ;  ld hl,LastByteOfCompressedData
 ;  ld de,LastByteOfMemoryForDecompressedData
 ;  call DecompressLZSA2
 ;
 ;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
 ;
 ;  Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
 ;  see https://github.com/emmanuel-marty/lzsa for more information
 ;
 ;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
 ;
 ;  This software is provided 'as-is', without any express or implied
 ;  warranty.  In no event will the authors be held liable for any damages
 ;  arising from the use of this software.
 ;
 ;  Permission is granted to anyone to use this software for any purpose,
 ;  including commercial applications, and to alter it and redistribute it
 ;  freely, subject to the following restrictions:
 ;
 ;  1. The origin of this software must not be misrepresented; you must not
 ;     claim that you wrote the original software. If you use this software
 ;     in a product, an acknowledgment in the product documentation would be
 ;     appreciated but is not required.
 ;  2. Altered source versions must be plainly marked as such, and must not be
 ;     misrepresented as being the original software.
 ;  3. This notice may not be removed or altered from any source distribution.
 ;
 ;	DEFINE	BACKWARD_DECOMPRESS						; uncomment for data compressed with option -b
 ;	DEFINE	HD64180								; uncomment for systems using Hitachi HD64180
 	IFNDEF	BACKWARD_DECOMPRESS
 		MACRO NEXT_HL
 		inc hl
 		ENDM
 		MACRO ADD_OFFSET
 		ex de,hl : add hl,de
 		ENDM
 		MACRO BLOCKCOPY
 		ldir
 		ENDM
 	ELSE
 		MACRO NEXT_HL
 		dec hl
 		ENDM
 		MACRO ADD_OFFSET
 		push hl : or a : sbc hl,de : pop de				; 11+4+15+10 = 40t / 5 bytes
 		ENDM
 		MACRO BLOCKCOPY
 		lddr
 		ENDM
 	ENDIF
 	IFNDEF	HD64180
 		MACRO LD_IX_DE
 		ld ixl,e : ld ixh,d
 		ENDM
 		MACRO LD_DE_IX
 		ld e,ixl : ld d,ixh
 		ENDM
 	ELSE
 		MACRO LD_IX_DE
 		push de : pop ix
 		ENDM
 		MACRO LD_DE_IX
 		push ix : pop de
 		ENDM
 	ENDIF
@DecompressLZSA2:
 		xor a : ld b,a : exa : jr ReadToken
 CASE00x:	call ReadNibble
 		ld e,a : ld a,c
 		cp %00100000 : rl e : jr SaveOffset
 CASE0xx		ld d,#FF : cp %01000000 : jr c,CASE00x
 CASE01x:	cp %01100000 : rl d
 OffsetReadE:	ld e,(hl) : NEXT_HL
 SaveOffset:	LD_IX_DE
 MatchLen:	and %00000111 : add 2 : cp 9 : call z,ExtendedCode
 CopyMatch:	ld c,a
 		ex (sp),hl							; BC = len, DE = -offset, HL = dest, SP -> [src]
 		ADD_OFFSET							; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
 		BLOCKCOPY							; BC = 0, DE = dest
 		pop hl								; HL = src
 ReadToken:	ld a,(hl) : NEXT_HL : push af
 		and %00011000 : jr z,NoLiterals
 		rrca : rrca : rrca
 		call pe,ExtendedCode
 		ld c,a
 		BLOCKCOPY
 NoLiterals:	pop af : push de
 		or a : jp p,CASE0xx
 CASE1xx		cp %11000000 : jr nc,CASE11x
 CASE10x:	call ReadNibble
 		ld d,a : ld a,c
 		cp %10100000 ;: rl d
 		dec d : rl d : DB #CA ; jr OffsetReadE				; #CA is JP Z,.. to skip all commands in CASE110 before jr OffsetReadE
 CASE110:	ld d,(hl) : NEXT_HL : jr OffsetReadE
 CASE11x		cp %11100000 : jr c,CASE110
 CASE111:	LD_DE_IX : jr MatchLen
 ExtendedCode:	call ReadNibble : inc a : jr z,ExtraByte
 		sub #F0+1 : add c : ret
 ExtraByte	ld a,15 : add c : add (hl) : NEXT_HL : ret nc
 		ld a,(hl) : NEXT_HL
 		ld b,(hl) : NEXT_HL : ret nz
 		pop de : pop de							; RET is not needed, because RET from ReadNibble is sufficient
 ReadNibble:	ld c,a : xor a : exa : ret m
 UpdateNibble	ld a,(hl) : or #F0 : exa
 		ld a,(hl) : NEXT_HL : or #0F
 		rrca : rrca : rrca : rrca : ret
--- a/Tools/unix/lzsa/pareto_graph.png
+++ b/Tools/unix/lzsa/pareto_graph.png
--- a/Tools/unix/lzsa/src/dictionary.c
+++ b/Tools/unix/lzsa/src/dictionary.c
@ -0,0 +1,101 @@
 /*
 * dictionary.c - dictionary implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include "format.h"
 #include "lib.h"
 /**
 * Load dictionary contents
 *
 * @param pszDictionaryFilename name of dictionary file, or NULL for none
 * @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
 * @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) {
   unsigned char *pDictionaryData = NULL;
   int nDictionaryDataSize = 0;
   if (pszDictionaryFilename) {
      pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE);
      if (!pDictionaryData) {
         return LZSA_ERROR_MEMORY;
      }
      FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
      if (!pDictionaryFile) {
         free(pDictionaryData);
         pDictionaryData = NULL;
         return LZSA_ERROR_DICTIONARY;
      }
      fseek(pDictionaryFile, 0, SEEK_END);
 #ifdef _WIN32
      __int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
 #else
      off_t nDictionaryFileSize = ftello(pDictionaryFile);
 #endif
      if (nDictionaryFileSize > BLOCK_SIZE) {
         /* Use the last BLOCK_SIZE bytes of the dictionary */
         fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
      }
      else {
         fseek(pDictionaryFile, 0, SEEK_SET);
      }
      nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile);
      if (nDictionaryDataSize < 0)
         nDictionaryDataSize = 0;
      fclose(pDictionaryFile);
      pDictionaryFile = NULL;
   }
   *ppDictionaryData = pDictionaryData;
   *pDictionaryDataSize = nDictionaryDataSize;
   return LZSA_OK;
 }
 /**
 * Free dictionary contents
 *
 * @param ppDictionaryData pointer to pointer to dictionary contents
 */
 void lzsa_dictionary_free(void **ppDictionaryData) {
   if (*ppDictionaryData) {
      free(*ppDictionaryData);
      *ppDictionaryData = NULL;
   }
 }
--- a/Tools/unix/lzsa/src/dictionary.h
+++ b/Tools/unix/lzsa/src/dictionary.h
@ -0,0 +1,64 @@
 /*
 * dictionary.h - dictionary definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _DICTIONARY_H
 #define _DICTIONARY_H
 #include <stdlib.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Load dictionary contents
 *
 * @param pszDictionaryFilename name of dictionary file, or NULL for none
 * @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
 * @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
 /**
 * Free dictionary contents
 *
 * @param ppDictionaryData pointer to pointer to dictionary contents
 */
 void lzsa_dictionary_free(void **ppDictionaryData);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _DICTIONARY_H */
--- a/Tools/unix/lzsa/src/expand_block_v1.c
+++ b/Tools/unix/lzsa/src/expand_block_v1.c
@ -0,0 +1,224 @@
 /*
 * expand_block_v1.c - LZSA1 block decompressor implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "format.h"
 #include "expand_block_v1.h"
 #ifdef _MSC_VER
 #define FORCE_INLINE __forceinline
 #else /* _MSC_VER */
 #define FORCE_INLINE __attribute__((always_inline))
 #endif /* _MSC_VER */
 static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) {
   unsigned int nByte;
   const unsigned char *pInBlock = *ppInBlock;
   if (pInBlock < pInBlockEnd) {
      nByte = *pInBlock++;
      (*nLiterals) += nByte;
      if (nByte == 250) {
         if (pInBlock < pInBlockEnd) {
            (*nLiterals) = 256 + ((unsigned int)*pInBlock++);
         }
         else {
            return -1;
         }
      }
      else if (nByte == 249) {
         if ((pInBlock + 1) < pInBlockEnd) {
            (*nLiterals) = ((unsigned int)*pInBlock++);
            (*nLiterals) |= (((unsigned int)*pInBlock++) << 8);
         }
         else {
            return -1;
         }
      }
      *ppInBlock = pInBlock;
      return 0;
   }
   else {
      return -1;
   }
 }
 static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) {
   unsigned int nByte;
   const unsigned char *pInBlock = *ppInBlock;
   if (pInBlock < pInBlockEnd) {
      nByte = *pInBlock++;
      (*nMatchLen) += nByte;
      if (nByte == 239) {
         if (pInBlock < pInBlockEnd) {
            (*nMatchLen) = 256 + ((unsigned int)*pInBlock++);
         }
         else {
            return -1;
         }
      }
      else if (nByte == 238) {
         if ((pInBlock + 1) < pInBlockEnd) {
            (*nMatchLen) = ((unsigned int)*pInBlock++);
            (*nMatchLen) |= (((unsigned int)*pInBlock++) << 8);
         }
         else {
            return -1;
         }
      }
      *ppInBlock = pInBlock;
      return 0;
   }
   else {
      return -1;
   }
 }
 /**
 * Decompress one LZSA1 data block
 *
 * @param pInBlock pointer to compressed data
 * @param nBlockSize size of compressed data, in bytes
 * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
 * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
 * @param nBlockMaxSize total size of output decompression buffer, in bytes
 *
 * @return size of decompressed data in bytes, or -1 for error
 */
 int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
   const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
   unsigned char *pCurOutData = pOutData + nOutDataOffset;
   const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
   const unsigned char *pOutDataFastEnd = pOutDataEnd - 18;
   while (pInBlock < pInBlockEnd) {
      const unsigned char token = *pInBlock++;
      unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
      if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
         memcpy(pCurOutData, pInBlock, 8);
         pInBlock += nLiterals;
         pCurOutData += nLiterals;
      }
      else {
         if (nLiterals == LITERALS_RUN_LEN_V1) {
            if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals))
               return -1;
         }
         if (nLiterals != 0) {
            if ((pInBlock + nLiterals) <= pInBlockEnd &&
               (pCurOutData + nLiterals) <= pOutDataEnd) {
               memcpy(pCurOutData, pInBlock, nLiterals);
               pInBlock += nLiterals;
               pCurOutData += nLiterals;
            }
            else {
               return -1;
            }
         }
      }
      if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
         unsigned int nMatchOffset;
         nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff;
         if (token & 0x80) {
            nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00;
         }
         nMatchOffset++;
         const unsigned char *pSrc = pCurOutData - nMatchOffset;
         if (pSrc >= pOutData) {
            unsigned int nMatchLen = (unsigned int)(token & 0x0f);
            if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 18) <= pOutDataEnd) {
               memcpy(pCurOutData, pSrc, 8);
               memcpy(pCurOutData + 8, pSrc + 8, 8);
               memcpy(pCurOutData + 16, pSrc + 16, 2);
               pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
            }
            else {
               nMatchLen += MIN_MATCH_SIZE_V1;
               if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
                  if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
                     return -1;
                  if (nMatchLen == 0)
                     break;
               }
               if ((pSrc + nMatchLen) <= pOutDataEnd) {
                  if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
                     /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
                     if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
                        const unsigned char *pCopySrc = pSrc;
                        unsigned char *pCopyDst = pCurOutData;
                        const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
                        do {
                           memcpy(pCopyDst, pCopySrc, 16);
                           pCopySrc += 16;
                           pCopyDst += 16;
                        } while (pCopyDst < pCopyEndDst);
                        pCurOutData += nMatchLen;
                     }
                     else {
                        while (nMatchLen) {
                           *pCurOutData++ = *pSrc++;
                           nMatchLen--;
                        }
                     }
                  }
                  else {
                     return -1;
                  }
               }
               else {
                  return -1;
               }
            }
         }
         else {
            return -1;
         }
      }
   }
   return (int)(pCurOutData - (pOutData + nOutDataOffset));
 }
--- a/Tools/unix/lzsa/src/expand_block_v1.h
+++ b/Tools/unix/lzsa/src/expand_block_v1.h
@ -0,0 +1,49 @@
 /*
 * expand_block_v1.h - LZSA1 block decompressor definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _EXPAND_BLOCK_V1_H
 #define _EXPAND_BLOCK_V1_H
 /**
 * Decompress one LZSA1 data block
 *
 * @param pInBlock pointer to compressed data
 * @param nBlockSize size of compressed data, in bytes
 * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
 * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
 * @param nBlockMaxSize total size of output decompression buffer, in bytes
 *
 * @return size of decompressed data in bytes, or -1 for error
 */
 int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
 #endif /* _EXPAND_BLOCK_V1_H */
--- a/Tools/unix/lzsa/src/expand_block_v2.c
+++ b/Tools/unix/lzsa/src/expand_block_v2.c
@ -0,0 +1,253 @@
 /*
 * expand_block_v2.c - LZSA2 block decompressor implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "format.h"
 #include "expand_block_v2.h"
 #ifdef _MSC_VER
 #define FORCE_INLINE __forceinline
 #else /* _MSC_VER */
 #define FORCE_INLINE __attribute__((always_inline))
 #endif /* _MSC_VER */
 static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) {
   if ((*nCurNibbles ^= 1) != 0) {
      const unsigned char *pInBlock = *ppInBlock;
      if (pInBlock < pInBlockEnd) {
         (*nibbles) = *pInBlock++;
         *ppInBlock = pInBlock;
         (*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
         return 0;
      }
      else {
         return -1;
      }
   }
   (*nValue) = (unsigned int)((*nibbles) & 0x0f);
   return 0;
 }
 static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) {
   unsigned int nValue;
   if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) {
      (*nLength) += nValue;
      if (nValue == 15) {
         const unsigned char *pInBlock = *ppInBlock;
         if (pInBlock < pInBlockEnd) {
            (*nLength) += ((unsigned int)*pInBlock++);
            if ((*nLength) == 257) {
               if ((pInBlock + 1) < pInBlockEnd) {
                  (*nLength) = ((unsigned int)*pInBlock++);
                  (*nLength) |= (((unsigned int)*pInBlock++) << 8);
               }
               else {
                  return -1;
               }
            }
            else if ((*nLength) == 256) {
               (*nLength) = 0;
            }
         }
         else {
            return -1;
         }
         *ppInBlock = pInBlock;
      }
      return 0;
   }
   else {
      return -1;
   }
 }
 /**
 * Decompress one LZSA2 data block
 *
 * @param pInBlock pointer to compressed data
 * @param nBlockSize size of compressed data, in bytes
 * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
 * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
 * @param nBlockMaxSize total size of output decompression buffer, in bytes
 *
 * @return size of decompressed data in bytes, or -1 for error
 */
 int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
   const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
   unsigned char *pCurOutData = pOutData + nOutDataOffset;
   const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
   const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
   int nCurNibbles = 0;
   unsigned char nibbles;
   int nMatchOffset = 0;
   while (pInBlock < pInBlockEnd) {
      const unsigned char token = *pInBlock++;
      unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
      if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
         memcpy(pCurOutData, pInBlock, 4);
         pInBlock += nLiterals;
         pCurOutData += nLiterals;
      }
      else {
         if (nLiterals == LITERALS_RUN_LEN_V2) {
            if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals))
               return -1;
         }
         if (nLiterals != 0) {
            if ((pInBlock + nLiterals) <= pInBlockEnd &&
               (pCurOutData + nLiterals) <= pOutDataEnd) {
               memcpy(pCurOutData, pInBlock, nLiterals);
               pInBlock += nLiterals;
               pCurOutData += nLiterals;
            }
            else {
               return -1;
            }
         }
      }
      if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */
         unsigned char nOffsetMode = token & 0xc0;
         unsigned int nValue;
         switch (nOffsetMode) {
         case 0x00:
            /* 5 bit offset */
            if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
               return -1;
            nMatchOffset = nValue << 1;
            nMatchOffset |= ((token & 0x20) >> 5);
            nMatchOffset ^= 0x1e;
            nMatchOffset++;
            break;
         case 0x40:
            /* 9 bit offset */
            nMatchOffset = (unsigned int)(*pInBlock++);
            nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
            nMatchOffset ^= 0x0ff;
            nMatchOffset++;
            break;
         case 0x80:
            /* 13 bit offset */
            if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
               return -1;
            nMatchOffset = (unsigned int)(*pInBlock++);
            nMatchOffset |= (nValue << 9);
            nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
            nMatchOffset ^= 0x1eff;
            nMatchOffset += (512 + 1);
            break;
         default:
            /* Check if this is a 16 bit offset or a rep-match */
            if ((token & 0x20) == 0) {
               /* 16 bit offset */
               nMatchOffset = (((unsigned int)(*pInBlock++)) << 8);
               if (pInBlock >= pInBlockEnd) return -1;
               nMatchOffset |= (unsigned int)(*pInBlock++);
               nMatchOffset ^= 0xffff;
               nMatchOffset++;
            }
            break;
         }
         const unsigned char *pSrc = pCurOutData - nMatchOffset;
         if (pSrc >= pOutData) {
            unsigned int nMatchLen = (unsigned int)(token & 0x07);
            if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 10) <= pOutDataEnd) {
               memcpy(pCurOutData, pSrc, 8);
               memcpy(pCurOutData + 8, pSrc + 8, 2);
               pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
            }
            else {
               nMatchLen += MIN_MATCH_SIZE_V2;
               if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
                  if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
                     return -1;
                  if (nMatchLen == 0)
                     break;
               }
               if ((pSrc + nMatchLen) <= pOutDataEnd) {
                  if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
                     /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
                     if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
                        const unsigned char *pCopySrc = pSrc;
                        unsigned char *pCopyDst = pCurOutData;
                        const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
                        do {
                           memcpy(pCopyDst, pCopySrc, 16);
                           pCopySrc += 16;
                           pCopyDst += 16;
                        } while (pCopyDst < pCopyEndDst);
                        pCurOutData += nMatchLen;
                     }
                     else {
                        while (nMatchLen) {
                           *pCurOutData++ = *pSrc++;
                           nMatchLen--;
                        }
                     }
                  }
                  else {
                     return -1;
                  }
               }
               else {
                  return -1;
               }
            }
         }
         else {
            return -1;
         }
      }
   }
   return (int)(pCurOutData - (pOutData + nOutDataOffset));
 }
--- a/Tools/unix/lzsa/src/expand_block_v2.h
+++ b/Tools/unix/lzsa/src/expand_block_v2.h
@ -0,0 +1,49 @@
 /*
 * expand_block_v2.h - LZSA2 block decompressor definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _EXPAND_BLOCK_V2_H
 #define _EXPAND_BLOCK_V2_H
 /**
 * Decompress one LZSA2 data block
 *
 * @param pInBlock pointer to compressed data
 * @param nBlockSize size of compressed data, in bytes
 * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
 * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
 * @param nBlockMaxSize total size of output decompression buffer, in bytes
 *
 * @return size of decompressed data in bytes, or -1 for error
 */
 int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
 #endif /* _EXPAND_BLOCK_V2_H */
--- a/Tools/unix/lzsa/src/expand_context.c
+++ b/Tools/unix/lzsa/src/expand_context.c
@ -0,0 +1,76 @@
 /*
 * expand_context.h - decompressor context definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "expand_context.h"
 #include "expand_block_v1.h"
 #include "expand_block_v2.h"
 #include "lib.h"
 /**
 * Decompress one data block
 *
 * @param pInBlock pointer to compressed data
 * @param nBlockSize size of compressed data, in bytes
 * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
 * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
 * @param nBlockMaxSize total size of output decompression buffer, in bytes
 * @param nFormatVersion version of format to use (1-2)
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 *
 * @return size of decompressed data in bytes, or -1 for error
 */
 int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags) {
   int nDecompressedSize;
   if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
      lzsa_reverse_buffer(pInBlock, nBlockSize);
   }
   if (nFormatVersion == 1)
      nDecompressedSize = lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
   else if (nFormatVersion == 2)
      nDecompressedSize = lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
   else
      nDecompressedSize = -1;
   if (nDecompressedSize != -1 && (nFlags & LZSA_FLAG_RAW_BACKWARD)) {
      lzsa_reverse_buffer(pOutData + nOutDataOffset, nDecompressedSize);
   }
   if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
      lzsa_reverse_buffer(pInBlock, nBlockSize);
   }
   return nDecompressedSize;
 }
--- a/Tools/unix/lzsa/src/expand_context.h
+++ b/Tools/unix/lzsa/src/expand_context.h
@ -0,0 +1,61 @@
 /*
 * expand_context.h - decompressor context definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _EXPAND_CONTEXT_H
 #define _EXPAND_CONTEXT_H
 #include <stdlib.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Decompress one data block
 *
 * @param pInBlock pointer to compressed data
 * @param nBlockSize size of compressed data, in bytes
 * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
 * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
 * @param nBlockMaxSize total size of output decompression buffer, in bytes
 * @param nFormatVersion version of format to use (1-2)
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 *
 * @return size of decompressed data in bytes, or -1 for error
 */
 int lzsa_decompressor_expand_block(unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion, const int nFlags);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _EXPAND_CONTEXT_H */
--- a/Tools/unix/lzsa/src/expand_inmem.c
+++ b/Tools/unix/lzsa/src/expand_inmem.c
@ -0,0 +1,163 @@
 /*
 * expand_inmem.c - in-memory decompression implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "expand_inmem.h"
 #include "lib.h"
 #include "frame.h"
 #define BLOCK_SIZE 65536
 /**
 * Get maximum decompressed size of compressed data
 *
 * @param pFileData compressed data
 * @param nFileSize compressed size in bytes
 *
 * @return maximum decompressed size
 */
 size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize) {
   const unsigned char *pCurFileData = pFileData;
   const unsigned char *pEndFileData = pCurFileData + nFileSize;
   int nFormatVersion = 0;
   size_t nMaxDecompressedSize = 0;
   const int nHeaderSize = lzsa_get_header_size();
   /* Check header */
   if ((pCurFileData + nHeaderSize) > pEndFileData ||
       lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
      return -1;
   pCurFileData += nHeaderSize;
   while (pCurFileData < pEndFileData) {
      unsigned int nBlockDataSize = 0;
      int nIsUncompressed = 0;
      const int nFrameSize = lzsa_get_frame_size();
      /* Decode frame header */
      if ((pCurFileData + nFrameSize) > pEndFileData ||
          lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
         return -1;
      pCurFileData += nFrameSize;
      if (!nBlockDataSize)
         break;
      /* Add one potentially full block to the decompressed size */
      nMaxDecompressedSize += BLOCK_SIZE;
      if ((pCurFileData + nBlockDataSize) > pEndFileData)
         return -1;
      pCurFileData += nBlockDataSize;
   }
   return nMaxDecompressedSize;
 }
 /**
 * Decompress data in memory
 *
 * @param pFileData compressed data
 * @param pOutBuffer buffer for decompressed data
 * @param nFileSize compressed size in bytes
 * @param nMaxOutBufferSize maximum capacity of decompression buffer
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param pFormatVersion pointer to format version, updated if this function is successful
 *
 * @return actual decompressed size, or -1 for error
 */
 size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion) {
   unsigned char *pCurFileData = pFileData;
   const unsigned char *pEndFileData = pCurFileData + nFileSize;
   unsigned char *pCurOutBuffer = pOutBuffer;
   const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
   int nPreviousBlockSize;
   const int nHeaderSize = lzsa_get_header_size();
   if (nFlags & LZSA_FLAG_RAW_BLOCK) {
      return (size_t)lzsa_decompressor_expand_block(pFileData, (int)nFileSize, pOutBuffer, 0, (int)nMaxOutBufferSize, *pFormatVersion, nFlags);
   }
   /* Check header */
   if ((pCurFileData + nHeaderSize) > pEndFileData ||
      lzsa_decode_header(pCurFileData, nHeaderSize, pFormatVersion) != 0)
      return -1;
   pCurFileData += nHeaderSize;
   nPreviousBlockSize = 0;
   while (pCurFileData < pEndFileData) {
      unsigned int nBlockDataSize = 0;
      int nIsUncompressed = 0;
      const int nFrameSize = lzsa_get_frame_size();
      /* Decode frame header */
      if ((pCurFileData + nFrameSize) > pEndFileData ||
          lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
         return -1;
      pCurFileData += nFrameSize;
      if (!nBlockDataSize)
         break;
      if (!nIsUncompressed) {
         int nDecompressedSize;
         /* Decompress block */
         if ((pCurFileData + nBlockDataSize) > pEndFileData)
            return -1;
         nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), *pFormatVersion, nFlags);
         if (nDecompressedSize < 0)
            return -1;
         pCurOutBuffer += nDecompressedSize;
         nPreviousBlockSize = nDecompressedSize;
      }
      else {
         /* Copy uncompressed block */
         if ((pCurFileData + nBlockDataSize) > pEndFileData)
            return -1;
         if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer)
            return -1;
         memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize);
         pCurOutBuffer += nBlockDataSize;
      }
      pCurFileData += nBlockDataSize;
   }
   return (int)(pCurOutBuffer - pOutBuffer);
 }
--- a/Tools/unix/lzsa/src/expand_inmem.h
+++ b/Tools/unix/lzsa/src/expand_inmem.h
@ -0,0 +1,70 @@
 /*
 * expand_inmem.h - in-memory decompression definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _EXPAND_INMEM_H
 #define _EXPAND_INMEM_H
 #include <stdlib.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Get maximum decompressed size of compressed data
 *
 * @param pFileData compressed data
 * @param nFileSize compressed size in bytes
 *
 * @return maximum decompressed size
 */
 size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize);
 /**
 * Decompress data in memory
 *
 * @param pFileData compressed data
 * @param pOutBuffer buffer for decompressed data
 * @param nFileSize compressed size in bytes
 * @param nMaxOutBufferSize maximum capacity of decompression buffer
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param pFormatVersion pointer to format version, updated if this function is successful
 *
 * @return actual decompressed size, or -1 for error
 */
 size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _EXPAND_INMEM_H */
--- a/Tools/unix/lzsa/src/expand_streaming.c
+++ b/Tools/unix/lzsa/src/expand_streaming.c
@ -0,0 +1,236 @@
 /*
 * expand_streaming.c - streaming decompression definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "expand_streaming.h"
 #include "format.h"
 #include "frame.h"
 #include "lib.h"
 /*-------------- File API -------------- */
 /**
 * Decompress file
 *
 * @param pszInFilename name of input(compressed) file to decompress
 * @param pszOutFilename name of output(decompressed) file to generate
 * @param pszDictionaryFilename name of dictionary file, or NULL for none
 * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
 * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
 * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
 * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
                                   long long *pOriginalSize, long long *pCompressedSize) {
   lzsa_stream_t inStream, outStream;
   void *pDictionaryData = NULL;
   int nDictionaryDataSize = 0;
   lzsa_status_t nStatus;
   if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
      return LZSA_ERROR_SRC;
   }
   if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
      inStream.close(&inStream);
      return LZSA_ERROR_DST;
   }
   nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
   if (nStatus) {
      outStream.close(&outStream);
      inStream.close(&inStream);
      return nStatus;
   }
   nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize);
   lzsa_dictionary_free(&pDictionaryData);
   outStream.close(&outStream);
   inStream.close(&inStream);
   return nStatus;
 }
 /*-------------- Streaming API -------------- */
 /**
 * Decompress stream
 *
 * @param pInStream input(compressed) stream to decompress
 * @param pOutStream output(decompressed) stream to write to
 * @param pDictionaryData dictionary contents, or NULL for none
 * @param nDictionaryDataSize size of dictionary contents, or 0
 * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
 * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
 * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
 * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
      long long *pOriginalSize, long long *pCompressedSize) {
   long long nOriginalSize = 0LL, nCompressedSize = 0LL;
   unsigned char cFrameData[16];
   unsigned char *pInBlock;
   unsigned char *pOutData;
   if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
      const int nHeaderSize = lzsa_get_header_size();
      memset(cFrameData, 0, 16);
      if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) {
         return LZSA_ERROR_SRC;
      }
      if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
         return LZSA_ERROR_FORMAT;
      }
      nCompressedSize += (long long)nHeaderSize;
   }
   pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
   if (!pInBlock) {
      return LZSA_ERROR_MEMORY;
   }
   pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
   if (!pOutData) {
      free(pInBlock);
      pInBlock = NULL;
      return LZSA_ERROR_MEMORY;
   }
   int nDecompressionError = 0;
   int nPrevDecompressedSize = 0;
   int nNumBlocks = 0;
   while (!pInStream->eof(pInStream) && !nDecompressionError) {
      unsigned int nBlockSize = 0;
      int nIsUncompressed = 0;
      if (nPrevDecompressedSize != 0) {
         memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
      }
      else if (nDictionaryDataSize && pDictionaryData) {
         nPrevDecompressedSize = nDictionaryDataSize;
         memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize);
      }
      if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
         const int nFrameSize = lzsa_get_frame_size();
         memset(cFrameData, 0, 16);
         if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) {
            if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
               nDecompressionError = LZSA_ERROR_FORMAT;
               nBlockSize = 0;
            }
            nCompressedSize += (long long)nFrameSize;
         }
         else {
            nDecompressionError = LZSA_ERROR_SRC;
            nBlockSize = 0;
         }
      }
      else {
         if (!nNumBlocks)
            nBlockSize = BLOCK_SIZE;
         else
            nBlockSize = 0;
      }
      if (nBlockSize != 0) {
         int nDecompressedSize = 0;
         if ((int)nBlockSize > BLOCK_SIZE) {
            nDecompressionError = LZSA_ERROR_FORMAT;
            break;
         }
         size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
         if (nFlags & LZSA_FLAG_RAW_BLOCK) {
            nBlockSize = (unsigned int)nReadBytes;
         }
         if (nReadBytes == nBlockSize) {
            nCompressedSize += (long long)nReadBytes;
            if (nIsUncompressed) {
               memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
               nDecompressedSize = nBlockSize;
            }
            else {
               nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion, nFlags);
               if (nDecompressedSize < 0) {
                  nDecompressionError = LZSA_ERROR_DECOMPRESSION;
                  break;
               }
            }
            if (nDecompressedSize != 0) {
               nOriginalSize += (long long)nDecompressedSize;
               if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize)
                  nDecompressionError = LZSA_ERROR_DST;
               nPrevDecompressedSize = nDecompressedSize;
               nDecompressedSize = 0;
            }
         }
         else {
            break;
         }
         nNumBlocks++;
      }
      else {
         break;
      }
   }
   free(pOutData);
   pOutData = NULL;
   free(pInBlock);
   pInBlock = NULL;
   *pOriginalSize = nOriginalSize;
   *pCompressedSize = nCompressedSize;
   return nDecompressionError;
 }
--- a/Tools/unix/lzsa/src/expand_streaming.h
+++ b/Tools/unix/lzsa/src/expand_streaming.h
@ -0,0 +1,86 @@
 /*
 * expand_streaming.h - streaming decompression definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _EXPAND_STREAMING_H
 #define _EXPAND_STREAMING_H
 #include "stream.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Forward declaration */
 typedef enum _lzsa_status_t lzsa_status_t;
 /*-------------- File API -------------- */
 /**
 * Decompress file
 *
 * @param pszInFilename name of input(compressed) file to decompress
 * @param pszOutFilename name of output(decompressed) file to generate
 * @param pszDictionaryFilename name of dictionary file, or NULL for none
 * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
 * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
 * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
 * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
   long long *pOriginalSize, long long *pCompressedSize);
 /*-------------- Streaming API -------------- */
 /**
 * Decompress stream
 *
 * @param pInStream input(compressed) stream to decompress
 * @param pOutStream output(decompressed) stream to write to
 * @param pDictionaryData dictionary contents, or NULL for none
 * @param nDictionaryDataSize size of dictionary contents, or 0
 * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
 * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
 * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
 * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
   long long *pOriginalSize, long long *pCompressedSize);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _EXPAND_STREAMING_H */
--- a/Tools/unix/lzsa/src/format.h
+++ b/Tools/unix/lzsa/src/format.h
@ -0,0 +1,51 @@
 /*
 * format.h - byte stream format definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _FORMAT_H
 #define _FORMAT_H
 #define MIN_OFFSET 1
 #define MAX_OFFSET 0xffff
 #define MAX_VARLEN 0xffff
 #define BLOCK_SIZE 65536
 #define MIN_MATCH_SIZE_V1 3
 #define LITERALS_RUN_LEN_V1 7
 #define MATCH_RUN_LEN_V1 15
 #define MIN_MATCH_SIZE_V2 2
 #define LITERALS_RUN_LEN_V2 3
 #define MATCH_RUN_LEN_V2 7
 #endif /* _FORMAT_H */
--- a/Tools/unix/lzsa/src/frame.c
+++ b/Tools/unix/lzsa/src/frame.c
@ -0,0 +1,189 @@
 /*
 * frame.c - frame implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "frame.h"
 #define LZSA_ID_0   0x7b
 #define LZSA_ID_1   0x9e
 /**
 * Get compressed file header size
 *
 * @return file header size
 */
 int lzsa_get_header_size(void) {
   return 3;
 }
 /**
 * Get compressed frame header size
 *
 * @return frame header size
 */
 int lzsa_get_frame_size(void) {
   return 3;
 }
 /**
 * Encode file header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
   if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
      pFrameData[0] = LZSA_ID_0;                         /* Magic number */
      pFrameData[1] = LZSA_ID_1;
      pFrameData[2] = (nFormatVersion == 2) ? 0x20 : 0;  /* Format version 1 */
      return 3;
   }
   else {
      return -1;
   }
 }
 /**
 * Encode compressed block frame header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 * @param nBlockDataSize compressed block's data size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_compressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize) {
   if (nMaxFrameDataSize >= 3 && nBlockDataSize <= 0x7fffff) {
      pFrameData[0] = nBlockDataSize & 0xff;
      pFrameData[1] = (nBlockDataSize >> 8) & 0xff;
      pFrameData[2] = (nBlockDataSize >> 16) & 0x7f;
      return 3;
   }
   else {
      return -1;
   }
 }
 /**
 * Encode uncompressed block frame header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 * @param nBlockDataSize uncompressed block's data size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_uncompressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize) {
   if (nMaxFrameDataSize >= 3 && nBlockDataSize <= 0x7fffff) {
      pFrameData[0] = nBlockDataSize & 0xff;
      pFrameData[1] = (nBlockDataSize >> 8) & 0xff;
      pFrameData[2] = ((nBlockDataSize >> 16) & 0x7f) | 0x80;   /* Uncompressed block */
      return 3;
   }
   else {
      return -1;
   }
 }
 /**
 * Encode terminal frame header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataSize) {
   if (nMaxFrameDataSize >= 3) {
      pFrameData[0] = 0x00;         /* EOD frame */
      pFrameData[1] = 0x00;
      pFrameData[2] = 0x00;
      return 3;
   }
   else {
      return -1;
   }
 }
 /**
 * Decode file header
 *
 * @param pFrameData data bytes
 * @param nFrameDataSize number of bytes to decode
 *
 * @return 0 for success, or -1 for failure
 */
 int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion) {
   if (nFrameDataSize != 3 ||
      pFrameData[0] != LZSA_ID_0 ||
      pFrameData[1] != LZSA_ID_1 ||
      (pFrameData[2] & 0x1f) != 0 ||
      ((pFrameData[2] & 0xe0) != 0x00 && (pFrameData[2] & 0xe0) != 0x20)) {
      return -1;
   }
   else {
      *nFormatVersion = (pFrameData[2] & 0xe0) ? 2 : 1;
      return 0;
   }
 }
 /**
 * Decode frame header
 *
 * @param pFrameData data bytes
 * @param nFrameDataSize number of bytes to decode
 * @param nBlockSize pointer to block size, updated if this function succeeds (set to 0 if this is the terminal frame)
 * @param nIsUncompressed pointer to compressed block flag, updated if this function succeeds
 *
 * @return 0 for success, or -1 for failure
 */
 int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed) {
   if (nFrameDataSize == 3) {
      *nBlockSize = ((unsigned int)pFrameData[0]) |
         (((unsigned int)pFrameData[1]) << 8) |
         (((unsigned int)pFrameData[2]) << 16);
      *nIsUncompressed = ((*nBlockSize & 0x800000) != 0) ? 1 : 0;
      *nBlockSize &= 0x7fffff;
      return 0;
   }
   else {
      return -1;
   }
 }
--- a/Tools/unix/lzsa/src/frame.h
+++ b/Tools/unix/lzsa/src/frame.h
@ -0,0 +1,122 @@
 /*
 * frame.h - frame definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _FRAME_H
 #define _FRAME_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Get compressed file header size
 *
 * @return file header size
 */
 int lzsa_get_header_size(void);
 /**
 * Get compressed frame header size
 *
 * @return frame header size
 */
 int lzsa_get_frame_size(void);
 /**
 * Encode file header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
 /**
 * Encode compressed block frame header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 * @param nBlockDataSize compressed block's data size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_compressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize);
 /**
 * Encode uncompressed block frame header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 * @param nBlockDataSize uncompressed block's data size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_uncompressed_block_frame(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nBlockDataSize);
 /**
 * Encode terminal frame header
 *
 * @param pFrameData encoding buffer
 * @param nMaxFrameDataSize max encoding buffer size, in bytes
 *
 * @return number of encoded bytes, or -1 for failure
 */
 int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataSize);
 /**
 * Decode file header
 *
 * @param pFrameData data bytes
 * @param nFrameDataSize number of bytes to decode
 *
 * @return 0 for success, or -1 for failure
 */
 int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion);
 /**
 * Decode frame header
 *
 * @param pFrameData data bytes
 * @param nFrameDataSize number of bytes to decode
 * @param nBlockSize pointer to block size, updated if this function succeeds (set to 0 if this is the terminal frame)
 * @param nIsUncompressed pointer to compressed block flag, updated if this function succeeds
 *
 * @return 0 for success, or -1 for failure
 */
 int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _FRAME_H */
--- a/Tools/unix/lzsa/src/lib.h
+++ b/Tools/unix/lzsa/src/lib.h
@ -0,0 +1,95 @@
 /*
 * lib.h - LZSA library definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _LIB_H
 #define _LIB_H
 #include "stream.h"
 #include "dictionary.h"
 #include "frame.h"
 #include "format.h"
 #include "shrink_context.h"
 #include "shrink_streaming.h"
 #include "shrink_inmem.h"
 #include "expand_context.h"
 #include "expand_streaming.h"
 #include "expand_inmem.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /** High level status for compression and decompression */
 typedef enum _lzsa_status_t {
   LZSA_OK = 0,                           /**< Success */
   LZSA_ERROR_SRC,                        /**< Error reading input */
   LZSA_ERROR_DST,                        /**< Error reading output */
   LZSA_ERROR_DICTIONARY,                 /**< Error reading dictionary */
   LZSA_ERROR_MEMORY,                     /**< Out of memory */
   /* Compression-specific status codes */
   LZSA_ERROR_COMPRESSION,                /**< Internal compression error */
   LZSA_ERROR_RAW_TOOLARGE,               /**< Input is too large to be compressed to a raw block */
   LZSA_ERROR_RAW_UNCOMPRESSED,           /**< Input is incompressible and raw blocks don't support uncompressed data */
   /* Decompression-specific status codes */
   LZSA_ERROR_FORMAT,                     /**< Invalid input format or magic number when decompressing */
   LZSA_ERROR_DECOMPRESSION,              /**< Internal decompression error */
 } lzsa_status_t;
 /* Compression flags */
 #define LZSA_FLAG_FAVOR_RATIO    (1<<0)      /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
 #define LZSA_FLAG_RAW_BLOCK      (1<<1)      /**< 1 to emit raw block */
 #define LZSA_FLAG_RAW_BACKWARD   (1<<2)      /**< 1 to compress or decompress raw block backward */
 /**
 * Reverse bytes in the specified buffer
 *
 * @param pBuffer pointer to buffer whose contents are to be reversed
 * @param nBufferSize size of buffer in bytes
 */
 static inline void lzsa_reverse_buffer(unsigned char *pBuffer, const int nBufferSize) {
   int nMidPoint = nBufferSize / 2;
   int i, j;
   for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
      unsigned char c = pBuffer[i];
      pBuffer[i] = pBuffer[j];
      pBuffer[j] = c;
   }
 }
 #ifdef __cplusplus
 }
 #endif
 #endif /* _LIB_H */
--- a/Tools/unix/lzsa/src/libdivsufsort/CHANGELOG.md
+++ b/Tools/unix/lzsa/src/libdivsufsort/CHANGELOG.md
@ -0,0 +1,21 @@
 # libdivsufsort Change Log
 See full changelog at: https://github.com/y-256/libdivsufsort/commits
 ## [2.0.1] - 2010-11-11
 ### Fixed
 * Wrong variable used in `divbwt` function
 * Enclose some string variables with double quotation marks in include/CMakeLists.txt
 * Fix typo in include/CMakeLists.txt
 ## 2.0.0 - 2008-08-23
 ### Changed
 * Switch the build system to [CMake](http://www.cmake.org/)
 * Improve the performance of the suffix-sorting algorithm
 ### Added
 * OpenMP support
 * 64-bit version of divsufsort
 [Unreleased]: https://github.com/y-256/libdivsufsort/compare/2.0.1...HEAD
 [2.0.1]: https://github.com/y-256/libdivsufsort/compare/2.0.0...2.0.1
--- a/Tools/unix/lzsa/src/libdivsufsort/CMakeLists.txt
+++ b/Tools/unix/lzsa/src/libdivsufsort/CMakeLists.txt
@ -0,0 +1,99 @@
 ### cmake file for building libdivsufsort Package ###
 cmake_minimum_required(VERSION 2.4.4)
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules")
 include(AppendCompilerFlags)
 ## Project information ##
 project(libdivsufsort C)
 set(PROJECT_VENDOR "Yuta Mori")
 set(PROJECT_CONTACT "yuta.256@gmail.com")
 set(PROJECT_URL "https://github.com/y-256/libdivsufsort")
 set(PROJECT_DESCRIPTION "A lightweight suffix sorting library")
 include(VERSION.cmake)
 ## CPack configuration ##
 set(CPACK_GENERATOR "TGZ;TBZ2;ZIP")
 set(CPACK_SOURCE_GENERATOR "TGZ;TBZ2;ZIP")
 include(ProjectCPack)
 ## Project options ##
 option(BUILD_SHARED_LIBS "Set to OFF to build static libraries" ON)
 option(BUILD_EXAMPLES "Build examples" ON)
 option(BUILD_DIVSUFSORT64 "Build libdivsufsort64" OFF)
 option(USE_OPENMP "Use OpenMP for parallelization" OFF)
 option(WITH_LFS "Enable Large File Support" ON)
 ## Installation directories ##
 set(LIB_SUFFIX "" CACHE STRING "Define suffix of directory name (32 or 64)")
 set(CMAKE_INSTALL_RUNTIMEDIR "" CACHE PATH "Specify the output directory for dll runtimes (default is bin)")
 if(NOT CMAKE_INSTALL_RUNTIMEDIR)
  set(CMAKE_INSTALL_RUNTIMEDIR "${CMAKE_INSTALL_PREFIX}/bin")
 endif(NOT CMAKE_INSTALL_RUNTIMEDIR)
 set(CMAKE_INSTALL_LIBDIR "" CACHE PATH "Specify the output directory for libraries (default is lib)")
 if(NOT CMAKE_INSTALL_LIBDIR)
  set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}")
 endif(NOT CMAKE_INSTALL_LIBDIR)
 set(CMAKE_INSTALL_INCLUDEDIR "" CACHE PATH "Specify the output directory for header files (default is include)")
 if(NOT CMAKE_INSTALL_INCLUDEDIR)
  set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_PREFIX}/include")
 endif(NOT CMAKE_INSTALL_INCLUDEDIR)
 set(CMAKE_INSTALL_PKGCONFIGDIR "" CACHE PATH "Specify the output directory for pkgconfig files (default is lib/pkgconfig)")
 if(NOT CMAKE_INSTALL_PKGCONFIGDIR)
  set(CMAKE_INSTALL_PKGCONFIGDIR "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
 endif(NOT CMAKE_INSTALL_PKGCONFIGDIR)
 ## Build type ##
 if(NOT CMAKE_BUILD_TYPE)
  set(CMAKE_BUILD_TYPE "Release")
 elseif(CMAKE_BUILD_TYPE STREQUAL "Debug")
  set(CMAKE_VERBOSE_MAKEFILE ON)
 endif(NOT CMAKE_BUILD_TYPE)
 ## Compiler options ##
 if(MSVC)
  append_c_compiler_flags("/W4" "VC" CMAKE_C_FLAGS)
  append_c_compiler_flags("/Oi;/Ot;/Ox;/Oy" "VC" CMAKE_C_FLAGS_RELEASE)
  if(USE_OPENMP)
    append_c_compiler_flags("/openmp" "VC" CMAKE_C_FLAGS)
  endif(USE_OPENMP)
 elseif(BORLAND)
  append_c_compiler_flags("-w" "BCC" CMAKE_C_FLAGS)
  append_c_compiler_flags("-Oi;-Og;-Os;-Ov;-Ox" "BCC" CMAKE_C_FLAGS_RELEASE)
 else(MSVC)
  if(CMAKE_COMPILER_IS_GNUCC)
    append_c_compiler_flags("-Wall" "GCC" CMAKE_C_FLAGS)
    append_c_compiler_flags("-fomit-frame-pointer" "GCC" CMAKE_C_FLAGS_RELEASE)
    if(USE_OPENMP)
      append_c_compiler_flags("-fopenmp" "GCC" CMAKE_C_FLAGS)
    endif(USE_OPENMP)
  else(CMAKE_COMPILER_IS_GNUCC)
    append_c_compiler_flags("-Wall" "UNKNOWN" CMAKE_C_FLAGS)
    append_c_compiler_flags("-fomit-frame-pointer" "UNKNOWN" CMAKE_C_FLAGS_RELEASE)
    if(USE_OPENMP)
      append_c_compiler_flags("-fopenmp;-openmp;-omp" "UNKNOWN" CMAKE_C_FLAGS)
    endif(USE_OPENMP)
  endif(CMAKE_COMPILER_IS_GNUCC)
 endif(MSVC)
 ## Add definitions ##
 add_definitions(-DHAVE_CONFIG_H=1 -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
 ## Add subdirectories ##
 add_subdirectory(pkgconfig)
 add_subdirectory(include)
 add_subdirectory(lib)
 if(BUILD_EXAMPLES)
  add_subdirectory(examples)
 endif(BUILD_EXAMPLES)
 ## Add 'uninstall' target ##
 CONFIGURE_FILE(
  "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/cmake_uninstall.cmake.in"
  "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake"
  IMMEDIATE @ONLY)
 ADD_CUSTOM_TARGET(uninstall
  "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/CMakeModules/cmake_uninstall.cmake")
--- a/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/AppendCompilerFlags.cmake
@ -0,0 +1,38 @@
 include(CheckCSourceCompiles)
 include(CheckCXXSourceCompiles)
 macro(append_c_compiler_flags _flags _name _result)
  set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
  string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
  string(TOUPPER "${cname}" cname)
  foreach(flag ${_flags})
    string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
    string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
    string(TOUPPER "${flagname}" flagname)
    set(have_flag "HAVE_${cname}_${flagname}")
    set(CMAKE_REQUIRED_FLAGS "${flag}")
    check_c_source_compiles("int main() { return 0; }" ${have_flag})
    if(${have_flag})
      set(${_result} "${${_result}} ${flag}")
    endif(${have_flag})
  endforeach(flag)
  set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
 endmacro(append_c_compiler_flags)
 macro(append_cxx_compiler_flags _flags _name _result)
  set(SAFE_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
  string(REGEX REPLACE "[-+/ ]" "_" cname "${_name}")
  string(TOUPPER "${cname}" cname)
  foreach(flag ${_flags})
    string(REGEX REPLACE "^[-+/ ]+(.*)[-+/ ]*$" "\\1" flagname "${flag}")
    string(REGEX REPLACE "[-+/ ]" "_" flagname "${flagname}")
    string(TOUPPER "${flagname}" flagname)
    set(have_flag "HAVE_${cname}_${flagname}")
    set(CMAKE_REQUIRED_FLAGS "${flag}")
    check_cxx_source_compiles("int main() { return 0; }" ${have_flag})
    if(${have_flag})
      set(${_result} "${${_result}} ${flag}")
    endif(${have_flag})
  endforeach(flag)
  set(CMAKE_REQUIRED_FLAGS ${SAFE_CMAKE_REQUIRED_FLAGS})
 endmacro(append_cxx_compiler_flags)
--- a/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/CheckFunctionKeywords.cmake
@ -0,0 +1,15 @@
 include(CheckCSourceCompiles)
 macro(check_function_keywords _wordlist)
  set(${_result} "")
  foreach(flag ${_wordlist})
    string(REGEX REPLACE "[-+/ ()]" "_" flagname "${flag}")
    string(TOUPPER "${flagname}" flagname)
    set(have_flag "HAVE_${flagname}")
    check_c_source_compiles("${flag} void func(); void func() { } int main() { func(); return 0; }" ${have_flag})
    if(${have_flag} AND NOT ${_result})
      set(${_result} "${flag}")
 #      break()
    endif(${have_flag} AND NOT ${_result})
  endforeach(flag)
 endmacro(check_function_keywords)
--- a/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/CheckLFS.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/CheckLFS.cmake
@ -0,0 +1,109 @@
 ## Checks for large file support ##
 include(CheckIncludeFile)
 include(CheckSymbolExists)
 include(CheckTypeSize)
 macro(check_lfs _isenable)
  set(LFS_OFF_T "")
  set(LFS_FOPEN "")
  set(LFS_FSEEK "")
  set(LFS_FTELL "")
  set(LFS_PRID "")
  if(${_isenable})
    set(SAFE_CMAKE_REQUIRED_DEFINITIONS "${CMAKE_REQUIRED_DEFINITIONS}")
    set(CMAKE_REQUIRED_DEFINITIONS ${CMAKE_REQUIRED_DEFINITIONS}
        -D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64
        -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS)
    check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
    check_include_file("inttypes.h" HAVE_INTTYPES_H)
    check_include_file("stddef.h" HAVE_STDDEF_H)
    check_include_file("stdint.h" HAVE_STDINT_H)
    # LFS type1: 8 <= sizeof(off_t), fseeko, ftello
    check_type_size("off_t" SIZEOF_OFF_T)
    if(SIZEOF_OFF_T GREATER 7)
      check_symbol_exists("fseeko" "stdio.h" HAVE_FSEEKO)
      check_symbol_exists("ftello" "stdio.h" HAVE_FTELLO)
      if(HAVE_FSEEKO AND HAVE_FTELLO)
        set(LFS_OFF_T "off_t")
        set(LFS_FOPEN "fopen")
        set(LFS_FSEEK "fseeko")
        set(LFS_FTELL "ftello")
        check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
        if(HAVE_PRIDMAX)
          set(LFS_PRID "PRIdMAX")
        else(HAVE_PRIDMAX)
          check_type_size("long" SIZEOF_LONG)
          check_type_size("int" SIZEOF_INT)
          if(SIZEOF_OFF_T GREATER SIZEOF_LONG)
            set(LFS_PRID "\"lld\"")
          elseif(SIZEOF_LONG GREATER SIZEOF_INT)
            set(LFS_PRID "\"ld\"")
          else(SIZEOF_OFF_T GREATER SIZEOF_LONG)
            set(LFS_PRID "\"d\"")
          endif(SIZEOF_OFF_T GREATER SIZEOF_LONG)
        endif(HAVE_PRIDMAX)
      endif(HAVE_FSEEKO AND HAVE_FTELLO)
    endif(SIZEOF_OFF_T GREATER 7)
    # LFS type2: 8 <= sizeof(off64_t), fopen64, fseeko64, ftello64
    if(NOT LFS_OFF_T)
      check_type_size("off64_t" SIZEOF_OFF64_T)
      if(SIZEOF_OFF64_T GREATER 7)
        check_symbol_exists("fopen64" "stdio.h" HAVE_FOPEN64)
        check_symbol_exists("fseeko64" "stdio.h" HAVE_FSEEKO64)
        check_symbol_exists("ftello64" "stdio.h" HAVE_FTELLO64)
        if(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
          set(LFS_OFF_T "off64_t")
          set(LFS_FOPEN "fopen64")
          set(LFS_FSEEK "fseeko64")
          set(LFS_FTELL "ftello64")
          check_symbol_exists("PRIdMAX" "inttypes.h" HAVE_PRIDMAX)
          if(HAVE_PRIDMAX)
            set(LFS_PRID "PRIdMAX")
          else(HAVE_PRIDMAX)
            check_type_size("long" SIZEOF_LONG)
            check_type_size("int" SIZEOF_INT)
            if(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
              set(LFS_PRID "\"lld\"")
            elseif(SIZEOF_LONG GREATER SIZEOF_INT)
              set(LFS_PRID "\"ld\"")
            else(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
              set(LFS_PRID "\"d\"")
            endif(SIZEOF_OFF64_T GREATER SIZEOF_LONG)
          endif(HAVE_PRIDMAX)
        endif(HAVE_FOPEN64 AND HAVE_FSEEKO64 AND HAVE_FTELLO64)
      endif(SIZEOF_OFF64_T GREATER 7)
    endif(NOT LFS_OFF_T)
    # LFS type3: 8 <= sizeof(__int64), _fseeki64, _ftelli64
    if(NOT LFS_OFF_T)
      check_type_size("__int64" SIZEOF___INT64)
      if(SIZEOF___INT64 GREATER 7)
        check_symbol_exists("_fseeki64" "stdio.h" HAVE__FSEEKI64)
        check_symbol_exists("_ftelli64" "stdio.h" HAVE__FTELLI64)
        if(HAVE__FSEEKI64 AND HAVE__FTELLI64)
          set(LFS_OFF_T "__int64")
          set(LFS_FOPEN "fopen")
          set(LFS_FSEEK "_fseeki64")
          set(LFS_FTELL "_ftelli64")
          set(LFS_PRID  "\"I64d\"")
        endif(HAVE__FSEEKI64 AND HAVE__FTELLI64)
      endif(SIZEOF___INT64 GREATER 7)
    endif(NOT LFS_OFF_T)
    set(CMAKE_REQUIRED_DEFINITIONS "${SAFE_CMAKE_REQUIRED_DEFINITIONS}")
  endif(${_isenable})
  if(NOT LFS_OFF_T)
    ## not found
    set(LFS_OFF_T "long")
    set(LFS_FOPEN "fopen")
    set(LFS_FSEEK "fseek")
    set(LFS_FTELL "ftell")
    set(LFS_PRID  "\"ld\"")
  endif(NOT LFS_OFF_T)
 endmacro(check_lfs)
--- a/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/ProjectCPack.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/ProjectCPack.cmake
@ -0,0 +1,38 @@
 # If the cmake version includes cpack, use it
 IF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
  SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "${PROJECT_DESCRIPTION}")
  SET(CPACK_PACKAGE_VENDOR "${PROJECT_VENDOR}")
  SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
  SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
  SET(CPACK_PACKAGE_VERSION_MAJOR "${PROJECT_VERSION_MAJOR}")
  SET(CPACK_PACKAGE_VERSION_MINOR "${PROJECT_VERSION_MINOR}")
  SET(CPACK_PACKAGE_VERSION_PATCH "${PROJECT_VERSION_PATCH}")
 #  SET(CPACK_PACKAGE_INSTALL_DIRECTORY "${PROJECT_NAME} ${PROJECT_VERSION}")
  SET(CPACK_SOURCE_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION_FULL}")
  IF(NOT DEFINED CPACK_SYSTEM_NAME)
    SET(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
  ENDIF(NOT DEFINED CPACK_SYSTEM_NAME)
  IF(${CPACK_SYSTEM_NAME} MATCHES Windows)
    IF(CMAKE_CL_64)
      SET(CPACK_SYSTEM_NAME win64-${CMAKE_SYSTEM_PROCESSOR})
    ELSE(CMAKE_CL_64)
      SET(CPACK_SYSTEM_NAME win32-${CMAKE_SYSTEM_PROCESSOR})
    ENDIF(CMAKE_CL_64)
  ENDIF(${CPACK_SYSTEM_NAME} MATCHES Windows)
  IF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
    SET(CPACK_PACKAGE_FILE_NAME "${CPACK_SOURCE_PACKAGE_FILE_NAME}-${CPACK_SYSTEM_NAME}")
  ENDIF(NOT DEFINED CPACK_PACKAGE_FILE_NAME)
  SET(CPACK_PACKAGE_CONTACT "${PROJECT_CONTACT}")
  IF(UNIX)
    SET(CPACK_STRIP_FILES "")
    SET(CPACK_SOURCE_STRIP_FILES "")
 #    SET(CPACK_PACKAGE_EXECUTABLES "ccmake" "CMake")
  ENDIF(UNIX)
  SET(CPACK_SOURCE_IGNORE_FILES "/CVS/" "/build/" "/\\\\.build/" "/\\\\.svn/" "~$")
  # include CPack model once all variables are set
  INCLUDE(CPack)
 ENDIF(EXISTS "${CMAKE_ROOT}/Modules/CPack.cmake")
--- a/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in
+++ b/Tools/unix/lzsa/src/libdivsufsort/CMakeModules/cmake_uninstall.cmake.in
@ -0,0 +1,36 @@
 IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
  MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"")
 ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
 STRING(REGEX REPLACE "\n" ";" files "${files}")
 SET(NUM 0)
 FOREACH(file ${files})
  IF(EXISTS "$ENV{DESTDIR}${file}")
    MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - found")
    SET(UNINSTALL_CHECK_${NUM} 1)
  ELSE(EXISTS "$ENV{DESTDIR}${file}")
    MESSAGE(STATUS "Looking for \"$ENV{DESTDIR}${file}\" - not found")
    SET(UNINSTALL_CHECK_${NUM} 0)
  ENDIF(EXISTS "$ENV{DESTDIR}${file}")
  MATH(EXPR NUM "1 + ${NUM}")
 ENDFOREACH(file)
 SET(NUM 0)
 FOREACH(file ${files})
  IF(${UNINSTALL_CHECK_${NUM}})
    MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"")
    EXEC_PROGRAM(
      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
      OUTPUT_VARIABLE rm_out
      RETURN_VALUE rm_retval
      )
    IF(NOT "${rm_retval}" STREQUAL 0)
      MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"")
    ENDIF(NOT "${rm_retval}" STREQUAL 0)
  ENDIF(${UNINSTALL_CHECK_${NUM}})
  MATH(EXPR NUM "1 + ${NUM}")
 ENDFOREACH(file)
 FILE(REMOVE "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
--- a/Tools/unix/lzsa/src/libdivsufsort/LICENSE
+++ b/Tools/unix/lzsa/src/libdivsufsort/LICENSE
@ -0,0 +1,21 @@
 The MIT License (MIT)
 Copyright (c) 2003 Yuta Mori All rights reserved.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/Tools/unix/lzsa/src/libdivsufsort/README.md
+++ b/Tools/unix/lzsa/src/libdivsufsort/README.md
@ -0,0 +1,140 @@
 # libdivsufsort
 libdivsufsort is a software library that implements a lightweight suffix array construction algorithm.
 ## News
 * 2015-03-21: The project has moved from [Google Code](http://code.google.com/p/libdivsufsort/) to [GitHub](https://github.com/y-256/libdivsufsort)
 ## Introduction
 This library provides a simple and an efficient C API to construct a suffix array and a Burrows-Wheeler transformed string from a given string over a constant-size alphabet.
 The algorithm runs in O(n log n) worst-case time using only 5n+O(1) bytes of memory space, where n is the length of
 the string.
 ## Build requirements
 * An ANSI C Compiler (e.g. GNU GCC)
 * [CMake](http://www.cmake.org/ "CMake") version 2.4.2 or newer
 * CMake-supported build tool
 ## Building on GNU/Linux
 1. Get the source code from GitHub. You can either
    * use git to clone the repository
    ```
    git clone https://github.com/y-256/libdivsufsort.git
    ```
    * or download a [zip file](../../archive/master.zip) directly
 2. Create a `build` directory in the package source directory.
 ```shell
 $ cd libdivsufsort
 $ mkdir build
 $ cd build
 ```
 3. Configure the package for your system.
 If you want to install to a different location,  change the -DCMAKE_INSTALL_PREFIX option.
 ```shell
 $ cmake -DCMAKE_BUILD_TYPE="Release" \
 -DCMAKE_INSTALL_PREFIX="/usr/local" ..
 ```
 4. Compile the package.
 ```shell
 $ make
 ```
 5. (Optional) Install the library and header files.
 ```shell
 $ sudo make install
 ```
 ## API
 ```c
 /* Data types */
 typedef int32_t saint_t;
 typedef int32_t saidx_t;
 typedef uint8_t sauchar_t;
 /*
 * Constructs the suffix array of a given string.
 * @param T[0..n-1] The input string.
 * @param SA[0..n-1] The output array or suffixes.
 * @param n The length of the given string.
 * @return 0 if no error occurred, -1 or -2 otherwise.
 */
 saint_t
 divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
 /*
 * Constructs the burrows-wheeler transformed string of a given string.
 * @param T[0..n-1] The input string.
 * @param U[0..n-1] The output string. (can be T)
 * @param A[0..n-1] The temporary array. (can be NULL)
 * @param n The length of the given string.
 * @return The primary index if no error occurred, -1 or -2 otherwise.
 */
 saidx_t
 divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
 ```
 ## Example Usage
 ```c
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <divsufsort.h>
 int main() {
    // intput data
    char *Text = "abracadabra";
    int n = strlen(Text);
    int i, j;
    // allocate
    int *SA = (int *)malloc(n * sizeof(int));
    // sort
    divsufsort((unsigned char *)Text, SA, n);
    // output
    for(i = 0; i < n; ++i) {
        printf("SA[%2d] = %2d: ", i, SA[i]);
        for(j = SA[i]; j < n; ++j) {
            printf("%c", Text[j]);
        }
        printf("$\n");
    }
    // deallocate
    free(SA);
    return 0;
 }
 ```
 See the [examples](examples) directory for a few other examples.
 ## Benchmarks
 See [Benchmarks](https://github.com/y-256/libdivsufsort/blob/wiki/SACA_Benchmarks.md) page for details.
 ## License
 libdivsufsort is released under the [MIT license](LICENSE "MIT license").
 > The MIT License (MIT)
 >
 > Copyright (c) 2003 Yuta Mori All rights reserved.
 >
 > Permission is hereby granted, free of charge, to any person obtaining a copy
 > of this software and associated documentation files (the "Software"), to deal
 > in the Software without restriction, including without limitation the rights
 > to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 > copies of the Software, and to permit persons to whom the Software is
 > furnished to do so, subject to the following conditions:
 >
 > The above copyright notice and this permission notice shall be included in all
 > copies or substantial portions of the Software.
 >
 > THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 > IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 > FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 > AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 > OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 > SOFTWARE.
 ## Author
 * Yuta Mori
--- a/Tools/unix/lzsa/src/libdivsufsort/VERSION.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/VERSION.cmake
@ -0,0 +1,23 @@
 set(PROJECT_VERSION_MAJOR "2")
 set(PROJECT_VERSION_MINOR "0")
 set(PROJECT_VERSION_PATCH "2")
 set(PROJECT_VERSION_EXTRA "-1")
 set(PROJECT_VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}")
 set(PROJECT_VERSION_FULL "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}${PROJECT_VERSION_EXTRA}")
 set(LIBRARY_VERSION "3.0.1")
 set(LIBRARY_SOVERSION "3")
 ## Git revision number ##
 if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
  execute_process(COMMAND git describe --tags HEAD
    WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
    OUTPUT_VARIABLE GIT_DESCRIBE_TAGS ERROR_QUIET)
  if(GIT_DESCRIBE_TAGS)
    string(REGEX REPLACE "^v(.*)" "\\1" GIT_REVISION "${GIT_DESCRIBE_TAGS}")
    string(STRIP "${GIT_REVISION}" GIT_REVISION)
    if(GIT_REVISION)
      set(PROJECT_VERSION_FULL "${GIT_REVISION}")
    endif(GIT_REVISION)
  endif(GIT_DESCRIBE_TAGS)
 endif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
--- a/Tools/unix/lzsa/src/libdivsufsort/examples/CMakeLists.txt
+++ b/Tools/unix/lzsa/src/libdivsufsort/examples/CMakeLists.txt
@ -0,0 +1,11 @@
 ## Add definitions ##
 add_definitions(-D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64)
 ## Targets ##
 include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
                    "${CMAKE_CURRENT_BINARY_DIR}/../include")
 link_directories("${CMAKE_CURRENT_BINARY_DIR}/../lib")
 foreach(src suftest mksary sasearch bwt unbwt)
  add_executable(${src} ${src}.c)
  target_link_libraries(${src} divsufsort)
 endforeach(src)
--- a/Tools/unix/lzsa/src/libdivsufsort/examples/bwt.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/examples/bwt.c
@ -0,0 +1,220 @@
 /*
 * bwt.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #if HAVE_CONFIG_H
 # include "config.h"
 #endif
 #include <stdio.h>
 #if HAVE_STRING_H
 # include <string.h>
 #endif
 #if HAVE_STDLIB_H
 # include <stdlib.h>
 #endif
 #if HAVE_MEMORY_H
 # include <memory.h>
 #endif
 #if HAVE_STDDEF_H
 # include <stddef.h>
 #endif
 #if HAVE_STRINGS_H
 # include <strings.h>
 #endif
 #if HAVE_SYS_TYPES_H
 # include <sys/types.h>
 #endif
 #if HAVE_IO_H && HAVE_FCNTL_H
 # include <io.h>
 # include <fcntl.h>
 #endif
 #include <time.h>
 #include <divsufsort.h>
 #include "lfs.h"
 static
 size_t
 write_int(FILE *fp, saidx_t n) {
  unsigned char c[4];
  c[0] = (unsigned char)((n >>  0) & 0xff), c[1] = (unsigned char)((n >>  8) & 0xff),
  c[2] = (unsigned char)((n >> 16) & 0xff), c[3] = (unsigned char)((n >> 24) & 0xff);
  return fwrite(c, sizeof(unsigned char), 4, fp);
 }
 static
 void
 print_help(const char *progname, int status) {
  fprintf(stderr,
          "bwt, a burrows-wheeler transform program, version %s.\n",
          divsufsort_version());
  fprintf(stderr, "usage: %s [-b num] INFILE OUTFILE\n", progname);
  fprintf(stderr, "  -b num    set block size to num MiB [1..512] (default: 32)\n\n");
  exit(status);
 }
 int
 main(int argc, const char *argv[]) {
  FILE *fp, *ofp;
  const char *fname, *ofname;
  sauchar_t *T;
  saidx_t *SA;
  LFS_OFF_T n;
  size_t m;
  saidx_t pidx;
  clock_t start,finish;
  saint_t i, blocksize = 32, needclose = 3;
  /* Check arguments. */
  if((argc == 1) ||
     (strcmp(argv[1], "-h") == 0) ||
     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
  if((argc != 3) && (argc != 5)) { print_help(argv[0], EXIT_FAILURE); }
  i = 1;
  if(argc == 5) {
    if(strcmp(argv[i], "-b") != 0) { print_help(argv[0], EXIT_FAILURE); }
    blocksize = atoi(argv[i + 1]);
    if(blocksize < 0) { blocksize = 1; }
    else if(512 < blocksize) { blocksize = 512; }
    i += 2;
  }
  blocksize <<= 20;
  /* Open a file for reading. */
  if(strcmp(argv[i], "-") != 0) {
 #if HAVE_FOPEN_S
    if(fopen_s(&fp, fname = argv[i], "rb") != 0) {
 #else
    if((fp = LFS_FOPEN(fname = argv[i], "rb")) == NULL) {
 #endif
      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
 #if HAVE__SETMODE && HAVE__FILENO
    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
 #endif
    fp = stdin;
    fname = "stdin";
    needclose ^= 1;
  }
  i += 1;
  /* Open a file for writing. */
  if(strcmp(argv[i], "-") != 0) {
 #if HAVE_FOPEN_S
    if(fopen_s(&ofp, ofname = argv[i], "wb") != 0) {
 #else
    if((ofp = LFS_FOPEN(ofname = argv[i], "wb")) == NULL) {
 #endif
      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
 #if HAVE__SETMODE && HAVE__FILENO
    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
 #endif
    ofp = stdout;
    ofname = "stdout";
    needclose ^= 2;
  }
  /* Get the file size. */
  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
    n = LFS_FTELL(fp);
    rewind(fp);
    if(n < 0) {
      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
    if(0x20000000L < n) { n = 0x20000000L; }
    if((blocksize == 0) || (n < blocksize)) { blocksize = (saidx_t)n; }
  } else if(blocksize == 0) { blocksize = 32 << 20; }
  /* Allocate 5blocksize bytes of memory. */
  T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
  SA = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
  if((T == NULL) || (SA == NULL)) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  /* Write the blocksize. */
  if(write_int(ofp, blocksize) != 4) {
    fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  fprintf(stderr, "  BWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
  start = clock();
  for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, fp)); n += m) {
    /* Burrows-Wheeler Transform. */
    pidx = divbwt(T, T, SA, m);
    if(pidx < 0) {
      fprintf(stderr, "%s (bw_transform): %s.\n",
        argv[0],
        (pidx == -1) ? "Invalid arguments" : "Cannot allocate memory");
      exit(EXIT_FAILURE);
    }
    /* Write the bwted data. */
    if((write_int(ofp, pidx) != 4) ||
       (fwrite(T, sizeof(sauchar_t), m, ofp) != m)) {
      fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  }
  if(ferror(fp)) {
    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  finish = clock();
  fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
    n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
  /* Close files */
  if(needclose & 1) { fclose(fp); }
  if(needclose & 2) { fclose(ofp); }
  /* Deallocate memory. */
  free(SA);
  free(T);
  return 0;
 }
--- a/Tools/unix/lzsa/src/libdivsufsort/examples/mksary.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/examples/mksary.c
@ -0,0 +1,193 @@
 /*
 * mksary.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #if HAVE_CONFIG_H
 # include "config.h"
 #endif
 #include <stdio.h>
 #if HAVE_STRING_H
 # include <string.h>
 #endif
 #if HAVE_STDLIB_H
 # include <stdlib.h>
 #endif
 #if HAVE_MEMORY_H
 # include <memory.h>
 #endif
 #if HAVE_STDDEF_H
 # include <stddef.h>
 #endif
 #if HAVE_STRINGS_H
 # include <strings.h>
 #endif
 #if HAVE_SYS_TYPES_H
 # include <sys/types.h>
 #endif
 #if HAVE_IO_H && HAVE_FCNTL_H
 # include <io.h>
 # include <fcntl.h>
 #endif
 #include <time.h>
 #include <divsufsort.h>
 #include "lfs.h"
 static
 void
 print_help(const char *progname, int status) {
  fprintf(stderr,
          "mksary, a simple suffix array builder, version %s.\n",
          divsufsort_version());
  fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
  exit(status);
 }
 int
 main(int argc, const char *argv[]) {
  FILE *fp, *ofp;
  const char *fname, *ofname;
  sauchar_t *T;
  saidx_t *SA;
  LFS_OFF_T n;
  clock_t start, finish;
  saint_t needclose = 3;
  /* Check arguments. */
  if((argc == 1) ||
     (strcmp(argv[1], "-h") == 0) ||
     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
  if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
  /* Open a file for reading. */
  if(strcmp(argv[1], "-") != 0) {
 #if HAVE_FOPEN_S
    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
 #else
    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
 #endif
      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
 #if HAVE__SETMODE && HAVE__FILENO
    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
 #endif
    fp = stdin;
    fname = "stdin";
    needclose ^= 1;
  }
  /* Open a file for writing. */
  if(strcmp(argv[2], "-") != 0) {
 #if HAVE_FOPEN_S
    if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
 #else
    if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
 #endif
      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
 #if HAVE__SETMODE && HAVE__FILENO
    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
 #endif
    ofp = stdout;
    ofname = "stdout";
    needclose ^= 2;
  }
  /* Get the file size. */
  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
    n = LFS_FTELL(fp);
    rewind(fp);
    if(n < 0) {
      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
    if(0x7fffffff <= n) {
      fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
      exit(EXIT_FAILURE);
    }
  } else {
    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  /* Allocate 5blocksize bytes of memory. */
  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
  if((T == NULL) || (SA == NULL)) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  /* Read n bytes of data. */
  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
    fprintf(stderr, "%s: %s `%s': ",
      argv[0],
      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
      fname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  if(needclose & 1) { fclose(fp); }
  /* Construct the suffix array. */
  fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
  start = clock();
  if(divsufsort(T, SA, (saidx_t)n) != 0) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  finish = clock();
  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
  /* Write the suffix array. */
  if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) {
    fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  if(needclose & 2) { fclose(ofp); }
  /* Deallocate memory. */
  free(SA);
  free(T);
  return 0;
 }
--- a/Tools/unix/lzsa/src/libdivsufsort/examples/sasearch.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/examples/sasearch.c
@ -0,0 +1,165 @@
 /*
 * sasearch.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #if HAVE_CONFIG_H
 # include "config.h"
 #endif
 #include <stdio.h>
 #if HAVE_STRING_H
 # include <string.h>
 #endif
 #if HAVE_STDLIB_H
 # include <stdlib.h>
 #endif
 #if HAVE_MEMORY_H
 # include <memory.h>
 #endif
 #if HAVE_STDDEF_H
 # include <stddef.h>
 #endif
 #if HAVE_STRINGS_H
 # include <strings.h>
 #endif
 #if HAVE_SYS_TYPES_H
 # include <sys/types.h>
 #endif
 #if HAVE_IO_H && HAVE_FCNTL_H
 # include <io.h>
 # include <fcntl.h>
 #endif
 #include <divsufsort.h>
 #include "lfs.h"
 static
 void
 print_help(const char *progname, int status) {
  fprintf(stderr,
          "sasearch, a simple SA-based full-text search tool, version %s\n",
          divsufsort_version());
  fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname);
  exit(status);
 }
 int
 main(int argc, const char *argv[]) {
  FILE *fp;
  const char *P;
  sauchar_t *T;
  saidx_t *SA;
  LFS_OFF_T n;
  size_t Psize;
  saidx_t i, size, left;
  if((argc == 1) ||
     (strcmp(argv[1], "-h") == 0) ||
     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
  if(argc != 4) { print_help(argv[0], EXIT_FAILURE); }
  P = argv[1];
  Psize = strlen(P);
  /* Open a file for reading. */
 #if HAVE_FOPEN_S
  if(fopen_s(&fp, argv[2], "rb") != 0) {
 #else
  if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) {
 #endif
    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  /* Get the file size. */
  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
    n = LFS_FTELL(fp);
    rewind(fp);
    if(n < 0) {
      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  /* Allocate 5n bytes of memory. */
  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
  if((T == NULL) || (SA == NULL)) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  /* Read n bytes of data. */
  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
    fprintf(stderr, "%s: %s `%s': ",
      argv[0],
      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
      argv[2]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  fclose(fp);
  /* Open the SA file for reading. */
 #if HAVE_FOPEN_S
  if(fopen_s(&fp, argv[3], "rb") != 0) {
 #else
  if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) {
 #endif
    fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  /* Read n * sizeof(saidx_t) bytes of data. */
  if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) {
    fprintf(stderr, "%s: %s `%s': ",
      argv[0],
      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
      argv[3]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  fclose(fp);
  /* Search and print */
  size = sa_search(T, (saidx_t)n,
                   (const sauchar_t *)P, (saidx_t)Psize,
                   SA, (saidx_t)n, &left);
  for(i = 0; i < size; ++i) {
    fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]);
  }
  /* Deallocate memory. */
  free(SA);
  free(T);
  return 0;
 }
--- a/Tools/unix/lzsa/src/libdivsufsort/examples/suftest.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/examples/suftest.c
@ -0,0 +1,164 @@
 /*
 * suftest.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #if HAVE_CONFIG_H
 # include "config.h"
 #endif
 #include <stdio.h>
 #if HAVE_STRING_H
 # include <string.h>
 #endif
 #if HAVE_STDLIB_H
 # include <stdlib.h>
 #endif
 #if HAVE_MEMORY_H
 # include <memory.h>
 #endif
 #if HAVE_STDDEF_H
 # include <stddef.h>
 #endif
 #if HAVE_STRINGS_H
 # include <strings.h>
 #endif
 #if HAVE_SYS_TYPES_H
 # include <sys/types.h>
 #endif
 #if HAVE_IO_H && HAVE_FCNTL_H
 # include <io.h>
 # include <fcntl.h>
 #endif
 #include <time.h>
 #include <divsufsort.h>
 #include "lfs.h"
 static
 void
 print_help(const char *progname, int status) {
  fprintf(stderr,
          "suftest, a suffixsort tester, version %s.\n",
          divsufsort_version());
  fprintf(stderr, "usage: %s FILE\n\n", progname);
  exit(status);
 }
 int
 main(int argc, const char *argv[]) {
  FILE *fp;
  const char *fname;
  sauchar_t *T;
  saidx_t *SA;
  LFS_OFF_T n;
  clock_t start, finish;
  saint_t needclose = 1;
  /* Check arguments. */
  if((argc == 1) ||
     (strcmp(argv[1], "-h") == 0) ||
     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
  if(argc != 2) { print_help(argv[0], EXIT_FAILURE); }
  /* Open a file for reading. */
  if(strcmp(argv[1], "-") != 0) {
 #if HAVE_FOPEN_S
    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
 #else
    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
 #endif
      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
 #if HAVE__SETMODE && HAVE__FILENO
    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
 #endif
    fp = stdin;
    fname = "stdin";
    needclose = 0;
  }
  /* Get the file size. */
  if(LFS_FSEEK(fp, 0, SEEK_END) == 0) {
    n = LFS_FTELL(fp);
    rewind(fp);
    if(n < 0) {
      fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
    if(0x7fffffff <= n) {
      fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname);
      exit(EXIT_FAILURE);
    }
  } else {
    fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  /* Allocate 5n bytes of memory. */
  T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t));
  SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t));
  if((T == NULL) || (SA == NULL)) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  /* Read n bytes of data. */
  if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) {
    fprintf(stderr, "%s: %s `%s': ",
      argv[0],
      (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
      argv[1]);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  if(needclose & 1) { fclose(fp); }
  /* Construct the suffix array. */
  fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n);
  start = clock();
  if(divsufsort(T, SA, (saidx_t)n) != 0) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  finish = clock();
  fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC);
  /* Check the suffix array. */
  if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); }
  /* Deallocate memory. */
  free(SA);
  free(T);
  return 0;
 }
--- a/Tools/unix/lzsa/src/libdivsufsort/examples/unbwt.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/examples/unbwt.c
@ -0,0 +1,207 @@
 /*
 * unbwt.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #if HAVE_CONFIG_H
 # include "config.h"
 #endif
 #include <stdio.h>
 #if HAVE_STRING_H
 # include <string.h>
 #endif
 #if HAVE_STDLIB_H
 # include <stdlib.h>
 #endif
 #if HAVE_MEMORY_H
 # include <memory.h>
 #endif
 #if HAVE_STDDEF_H
 # include <stddef.h>
 #endif
 #if HAVE_STRINGS_H
 # include <strings.h>
 #endif
 #if HAVE_SYS_TYPES_H
 # include <sys/types.h>
 #endif
 #if HAVE_IO_H && HAVE_FCNTL_H
 # include <io.h>
 # include <fcntl.h>
 #endif
 #include <time.h>
 #include <divsufsort.h>
 #include "lfs.h"
 static
 size_t
 read_int(FILE *fp, saidx_t *n) {
  unsigned char c[4];
  size_t m = fread(c, sizeof(unsigned char), 4, fp);
  if(m == 4) {
    *n = (c[0] <<  0) | (c[1] <<  8) |
         (c[2] << 16) | (c[3] << 24);
  }
  return m;
 }
 static
 void
 print_help(const char *progname, int status) {
  fprintf(stderr,
          "unbwt, an inverse burrows-wheeler transform program, version %s.\n",
          divsufsort_version());
  fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname);
  exit(status);
 }
 int
 main(int argc, const char *argv[]) {
  FILE *fp, *ofp;
  const char *fname, *ofname;
  sauchar_t *T;
  saidx_t *A;
  LFS_OFF_T n;
  size_t m;
  saidx_t pidx;
  clock_t start, finish;
  saint_t err, blocksize, needclose = 3;
  /* Check arguments. */
  if((argc == 1) ||
     (strcmp(argv[1], "-h") == 0) ||
     (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); }
  if(argc != 3) { print_help(argv[0], EXIT_FAILURE); }
  /* Open a file for reading. */
  if(strcmp(argv[1], "-") != 0) {
 #if HAVE_FOPEN_S
    if(fopen_s(&fp, fname = argv[1], "rb") != 0) {
 #else
    if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) {
 #endif
      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
 #if HAVE__SETMODE && HAVE__FILENO
    if(_setmode(_fileno(stdin), _O_BINARY) == -1) {
      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
 #endif
    fp = stdin;
    fname = "stdin";
    needclose ^= 1;
  }
  /* Open a file for writing. */
  if(strcmp(argv[2], "-") != 0) {
 #if HAVE_FOPEN_S
    if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) {
 #else
    if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) {
 #endif
      fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  } else {
 #if HAVE__SETMODE && HAVE__FILENO
    if(_setmode(_fileno(stdout), _O_BINARY) == -1) {
      fprintf(stderr, "%s: Cannot set mode: ", argv[0]);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
 #endif
    ofp = stdout;
    ofname = "stdout";
    needclose ^= 2;
  }
  /* Read the blocksize. */
  if(read_int(fp, &blocksize) != 4) {
    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  /* Allocate 5blocksize bytes of memory. */
  T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t));
  A = (saidx_t *)malloc(blocksize * sizeof(saidx_t));
  if((T == NULL) || (A == NULL)) {
    fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
    exit(EXIT_FAILURE);
  }
  fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize);
  start = clock();
  for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) {
    /* Read blocksize bytes of data. */
    if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) {
      fprintf(stderr, "%s: %s `%s': ",
        argv[0],
        (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
        fname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
    /* Inverse Burrows-Wheeler Transform. */
    if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) {
      fprintf(stderr, "%s (reverseBWT): %s.\n",
        argv[0],
        (err == -1) ? "Invalid data" : "Cannot allocate memory");
      exit(EXIT_FAILURE);
    }
    /* Write m bytes of data. */
    if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) {
      fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname);
      perror(NULL);
      exit(EXIT_FAILURE);
    }
  }
  if(ferror(fp)) {
    fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname);
    perror(NULL);
    exit(EXIT_FAILURE);
  }
  finish = clock();
  fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n",
    n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
  /* Close files */
  if(needclose & 1) { fclose(fp); }
  if(needclose & 2) { fclose(ofp); }
  /* Deallocate memory. */
  free(A);
  free(T);
  return 0;
 }
--- a/Tools/unix/lzsa/src/libdivsufsort/include/CMakeLists.txt
+++ b/Tools/unix/lzsa/src/libdivsufsort/include/CMakeLists.txt
@ -0,0 +1,162 @@
 include(CheckIncludeFiles)
 include(CheckIncludeFile)
 include(CheckSymbolExists)
 include(CheckTypeSize)
 include(CheckFunctionKeywords)
 include(CheckLFS)
 ## Checks for header files ##
 check_include_file("inttypes.h" HAVE_INTTYPES_H)
 check_include_file("memory.h" HAVE_MEMORY_H)
 check_include_file("stddef.h" HAVE_STDDEF_H)
 check_include_file("stdint.h" HAVE_STDINT_H)
 check_include_file("stdlib.h" HAVE_STDLIB_H)
 check_include_file("string.h" HAVE_STRING_H)
 check_include_file("strings.h" HAVE_STRINGS_H)
 check_include_file("sys/types.h" HAVE_SYS_TYPES_H)
 if(HAVE_INTTYPES_H)
  set(INCFILE "#include <inttypes.h>")
 elseif(HAVE_STDINT_H)
  set(INCFILE "#include <stdint.h>")
 else(HAVE_INTTYPES_H)
  set(INCFILE "")
 endif(HAVE_INTTYPES_H)
 ## create configuration files from .cmake file ##
 if(BUILD_EXAMPLES)
  ## Checks for WinIO ##
  if(WIN32)
    check_include_file("io.h" HAVE_IO_H)
    check_include_file("fcntl.h" HAVE_FCNTL_H)
    check_symbol_exists("_setmode" "io.h;fcntl.h" HAVE__SETMODE)
    if(NOT HAVE__SETMODE)
      check_symbol_exists("setmode" "io.h;fcntl.h" HAVE_SETMODE)
    endif(NOT HAVE__SETMODE)
    check_symbol_exists("_fileno" "stdio.h" HAVE__FILENO)
    check_symbol_exists("fopen_s" "stdio.h" HAVE_FOPEN_S)
    check_symbol_exists("_O_BINARY" "fcntl.h" HAVE__O_BINARY)
  endif(WIN32)
  ## Checks for large file support ##
  check_lfs(WITH_LFS)
  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lfs.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/lfs.h" @ONLY)
 endif(BUILD_EXAMPLES)
 ## generate config.h ##
 check_function_keywords("inline;__inline;__inline__;__declspec(dllexport);__declspec(dllimport)")
 if(HAVE_INLINE)
  set(INLINE "inline")
 elseif(HAVE___INLINE)
  set(INLINE "__inline")
 elseif(HAVE___INLINE__)
  set(INLINE "__inline__")
 else(HAVE_INLINE)
  set(INLINE "")
 endif(HAVE_INLINE)
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/config.h")
 ## Checks for types ##
 # sauchar_t (8bit)
 check_type_size("uint8_t" UINT8_T)
 if(HAVE_UINT8_T)
  set(SAUCHAR_TYPE "uint8_t")
 else(HAVE_UINT8_T)
  check_type_size("unsigned char" SIZEOF_UNSIGNED_CHAR)
  if("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
    set(SAUCHAR_TYPE "unsigned char")
  else("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
    message(FATAL_ERROR "Cannot find unsigned 8-bit integer type")
  endif("${SIZEOF_UNSIGNED_CHAR}" STREQUAL "1")
 endif(HAVE_UINT8_T)
 # saint_t (32bit)
 check_type_size("int32_t" INT32_T)
 if(HAVE_INT32_T)
  set(SAINT32_TYPE "int32_t")
  check_symbol_exists("PRId32" "inttypes.h" HAVE_PRID32)
  if(HAVE_PRID32)
    set(SAINT32_PRId "PRId32")
  else(HAVE_PRID32)
    set(SAINT32_PRId "\"d\"")
  endif(HAVE_PRID32)
 else(HAVE_INT32_T)
  check_type_size("int" SIZEOF_INT)
  check_type_size("long" SIZEOF_LONG)
  check_type_size("short" SIZEOF_SHORT)
  check_type_size("__int32" SIZEOF___INT32)
  if("${SIZEOF_INT}" STREQUAL "4")
    set(SAINT32_TYPE "int")
    set(SAINT32_PRId "\"d\"")
  elseif("${SIZEOF_LONG}" STREQUAL "4")
    set(SAINT32_TYPE "long")
    set(SAINT32_PRId "\"ld\"")
  elseif("${SIZEOF_SHORT}" STREQUAL "4")
    set(SAINT32_TYPE "short")
    set(SAINT32_PRId "\"d\"")
  elseif("${SIZEOF___INT32}" STREQUAL "4")
    set(SAINT32_TYPE "__int32")
    set(SAINT32_PRId "\"d\"")
  else("${SIZEOF_INT}" STREQUAL "4")
    message(FATAL_ERROR "Cannot find 32-bit integer type")
  endif("${SIZEOF_INT}" STREQUAL "4")
 endif(HAVE_INT32_T)
 # saint64_t (64bit)
 if(BUILD_DIVSUFSORT64)
  check_type_size("int64_t" INT64_T)
  if(HAVE_INT64_T)
    set(SAINT64_TYPE "int64_t")
    check_symbol_exists("PRId64" "inttypes.h" HAVE_PRID64)
    if(HAVE_PRID64)
      set(SAINT64_PRId "PRId64")
    else(HAVE_PRID64)
      set(SAINT64_PRId "\"lld\"")
    endif(HAVE_PRID64)
  else(HAVE_INT64_T)
    check_type_size("int" SIZEOF_INT)
    check_type_size("long" SIZEOF_LONG)
    check_type_size("long long" SIZEOF_LONG_LONG)
    check_type_size("__int64" SIZEOF___INT64)
    if("${SIZEOF_INT}" STREQUAL "8")
      set(SAINT64_TYPE "int")
      set(SAINT64_PRId "\"d\"")
    elseif("${SIZEOF_LONG}" STREQUAL "8")
      set(SAINT64_TYPE "long")
      set(SAINT64_PRId "\"ld\"")
    elseif("${SIZEOF_LONG_LONG}" STREQUAL "8")
      set(SAINT64_TYPE "long long")
      set(SAINT64_PRId "\"lld\"")
    elseif("${SIZEOF___INT64}" STREQUAL "8")
      set(SAINT64_TYPE "__int64")
      set(SAINT64_PRId "\"I64d\"")
    else("${SIZEOF_INT}" STREQUAL "8")
      message(SEND_ERROR "Cannot find 64-bit integer type")
      set(BUILD_DIVSUFSORT64 OFF)
    endif("${SIZEOF_INT}" STREQUAL "8")
  endif(HAVE_INT64_T)
 endif(BUILD_DIVSUFSORT64)
 ## generate divsufsort.h ##
 set(DIVSUFSORT_IMPORT "")
 set(DIVSUFSORT_EXPORT "")
 if(BUILD_SHARED_LIBS)
  if(HAVE___DECLSPEC_DLLIMPORT_)
    set(DIVSUFSORT_IMPORT "__declspec(dllimport)")
  endif(HAVE___DECLSPEC_DLLIMPORT_)
  if(HAVE___DECLSPEC_DLLEXPORT_)
    set(DIVSUFSORT_EXPORT "__declspec(dllexport)")
  endif(HAVE___DECLSPEC_DLLEXPORT_)
 endif(BUILD_SHARED_LIBS)
 set(W64BIT "")
 set(SAINDEX_TYPE "${SAINT32_TYPE}")
 set(SAINDEX_PRId "${SAINT32_PRId}")
 set(SAINT_PRId "${SAINT32_PRId}")
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
               "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" @ONLY)
 install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 if(BUILD_DIVSUFSORT64)
  set(W64BIT "64")
  set(SAINDEX_TYPE "${SAINT64_TYPE}")
  set(SAINDEX_PRId "${SAINT64_PRId}")
  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/divsufsort.h.cmake"
                 "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" @ONLY)
  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/divsufsort64.h" DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 endif(BUILD_DIVSUFSORT64)
--- a/Tools/unix/lzsa/src/libdivsufsort/include/config.h.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/include/config.h.cmake
@ -0,0 +1,81 @@
 /*
 * config.h for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #ifndef _CONFIG_H
 #define _CONFIG_H 1
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 /** Define to the version of this package. **/
 #cmakedefine PROJECT_VERSION_FULL "${PROJECT_VERSION_FULL}"
 /** Define to 1 if you have the header files. **/
 #cmakedefine HAVE_INTTYPES_H 1
 #cmakedefine HAVE_STDDEF_H 1
 #cmakedefine HAVE_STDINT_H 1
 #cmakedefine HAVE_STDLIB_H 1
 #cmakedefine HAVE_STRING_H 1
 #cmakedefine HAVE_STRINGS_H 1
 #cmakedefine HAVE_MEMORY_H 1
 #cmakedefine HAVE_SYS_TYPES_H 1
 /** for WinIO **/
 #cmakedefine HAVE_IO_H 1
 #cmakedefine HAVE_FCNTL_H 1
 #cmakedefine HAVE__SETMODE 1
 #cmakedefine HAVE_SETMODE 1
 #cmakedefine HAVE__FILENO 1
 #cmakedefine HAVE_FOPEN_S 1
 #cmakedefine HAVE__O_BINARY 1
 #ifndef HAVE__SETMODE
 # if HAVE_SETMODE
 #  define _setmode setmode
 #  define HAVE__SETMODE 1
 # endif
 # if HAVE__SETMODE && !HAVE__O_BINARY
 #  define _O_BINARY 0
 #  define HAVE__O_BINARY 1
 # endif
 #endif
 /** for inline **/
 #ifndef INLINE
 # define INLINE @INLINE@
 #endif
 /** for VC++ warning **/
 #ifdef _MSC_VER
 #pragma warning(disable: 4127)
 #endif
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
 #endif /* _CONFIG_H */
--- a/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort.h
+++ b/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort.h
@ -0,0 +1,189 @@
 /*
 * divsufsort.h for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #ifndef _DIVSUFSORT_H
 #define _DIVSUFSORT_H 1
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 #define DIVSUFSORT_API
 /*- Datatypes -*/
 #ifndef SAUCHAR_T
 #define SAUCHAR_T
 typedef unsigned char sauchar_t;
 #endif /* SAUCHAR_T */
 #ifndef SAINT_T
 #define SAINT_T
 typedef int saint_t;
 #endif /* SAINT_T */
 #ifndef SAIDX_T
 #define SAIDX_T
 typedef int saidx_t;
 #endif /* SAIDX_T */
 #ifndef PRIdSAIDX_T
 #define PRIdSAIDX_T "d"
 #endif
 /*- divsufsort context */
 typedef struct _divsufsort_ctx_t {
   saidx_t *bucket_A;
   saidx_t *bucket_B;
 } divsufsort_ctx_t;
 /*- Prototypes -*/
 /**
 * Initialize suffix array context
 *
 * @return 0 for success, or non-zero in case of an error
 */
 int divsufsort_init(divsufsort_ctx_t *ctx);
 /**
 * Destroy suffix array context
 *
 * @param ctx suffix array context to destroy
 */
 void divsufsort_destroy(divsufsort_ctx_t *ctx);
 /**
 * Constructs the suffix array of a given string.
 * @param ctx suffix array context
 * @param T[0..n-1] The input string.
 * @param SA[0..n-1] The output array of suffixes.
 * @param n The length of the given string.
 * @return 0 if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
 #if 0
 /**
 * Constructs the burrows-wheeler transformed string of a given string.
 * @param T[0..n-1] The input string.
 * @param U[0..n-1] The output string. (can be T)
 * @param A[0..n-1] The temporary array. (can be NULL)
 * @param n The length of the given string.
 * @return The primary index if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saidx_t
 divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
 /**
 * Returns the version of the divsufsort library.
 * @return The version number string.
 */
 DIVSUFSORT_API
 const char *
 divsufsort_version(void);
 /**
 * Constructs the burrows-wheeler transformed string of a given string and suffix array.
 * @param T[0..n-1] The input string.
 * @param U[0..n-1] The output string. (can be T)
 * @param SA[0..n-1] The suffix array. (can be NULL)
 * @param n The length of the given string.
 * @param idx The output primary index.
 * @return 0 if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saint_t
 bw_transform(const sauchar_t *T, sauchar_t *U,
             saidx_t *SA /* can NULL */,
             saidx_t n, saidx_t *idx);
 /**
 * Inverse BW-transforms a given BWTed string.
 * @param T[0..n-1] The input string.
 * @param U[0..n-1] The output string. (can be T)
 * @param A[0..n-1] The temporary array. (can be NULL)
 * @param n The length of the given string.
 * @param idx The primary index.
 * @return 0 if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saint_t
 inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
                     saidx_t *A /* can NULL */,
                     saidx_t n, saidx_t idx);
 /**
 * Checks the correctness of a given suffix array.
 * @param T[0..n-1] The input string.
 * @param SA[0..n-1] The input suffix array.
 * @param n The length of the given string.
 * @param verbose The verbose mode.
 * @return 0 if no error occurred.
 */
 DIVSUFSORT_API
 saint_t
 sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
 /**
 * Search for the pattern P in the string T.
 * @param T[0..Tsize-1] The input string.
 * @param Tsize The length of the given string.
 * @param P[0..Psize-1] The input pattern string.
 * @param Psize The length of the given pattern string.
 * @param SA[0..SAsize-1] The input suffix array.
 * @param SAsize The length of the given suffix array.
 * @param idx The output index.
 * @return The count of matches if no error occurred, -1 otherwise.
 */
 DIVSUFSORT_API
 saidx_t
 sa_search(const sauchar_t *T, saidx_t Tsize,
          const sauchar_t *P, saidx_t Psize,
          const saidx_t *SA, saidx_t SAsize,
          saidx_t *left);
 /**
 * Search for the character c in the string T.
 * @param T[0..Tsize-1] The input string.
 * @param Tsize The length of the given string.
 * @param SA[0..SAsize-1] The input suffix array.
 * @param SAsize The length of the given suffix array.
 * @param c The input character.
 * @param idx The output index.
 * @return The count of matches if no error occurred, -1 otherwise.
 */
 DIVSUFSORT_API
 saidx_t
 sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
                const saidx_t *SA, saidx_t SAsize,
                saint_t c, saidx_t *left);
 #endif
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
 #endif /* _DIVSUFSORT_H */
--- a/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort.h.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort.h.cmake
@ -0,0 +1,180 @@
 /*
 * divsufsort@W64BIT@.h for libdivsufsort@W64BIT@
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #ifndef _DIVSUFSORT@W64BIT@_H
 #define _DIVSUFSORT@W64BIT@_H 1
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
@INCFILE@
 #ifndef DIVSUFSORT_API
 # ifdef DIVSUFSORT_BUILD_DLL
 #  define DIVSUFSORT_API @DIVSUFSORT_EXPORT@
 # else
 #  define DIVSUFSORT_API @DIVSUFSORT_IMPORT@
 # endif
 #endif
 /*- Datatypes -*/
 #ifndef SAUCHAR_T
 #define SAUCHAR_T
 typedef @SAUCHAR_TYPE@ sauchar_t;
 #endif /* SAUCHAR_T */
 #ifndef SAINT_T
 #define SAINT_T
 typedef @SAINT32_TYPE@ saint_t;
 #endif /* SAINT_T */
 #ifndef SAIDX@W64BIT@_T
 #define SAIDX@W64BIT@_T
 typedef @SAINDEX_TYPE@ saidx@W64BIT@_t;
 #endif /* SAIDX@W64BIT@_T */
 #ifndef PRIdSAINT_T
 #define PRIdSAINT_T @SAINT_PRId@
 #endif /* PRIdSAINT_T */
 #ifndef PRIdSAIDX@W64BIT@_T
 #define PRIdSAIDX@W64BIT@_T @SAINDEX_PRId@
 #endif /* PRIdSAIDX@W64BIT@_T */
 /*- Prototypes -*/
 /**
 * Constructs the suffix array of a given string.
 * @param T[0..n-1] The input string.
 * @param SA[0..n-1] The output array of suffixes.
 * @param n The length of the given string.
 * @return 0 if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saint_t
 divsufsort@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t *SA, saidx@W64BIT@_t n);
 /**
 * Constructs the burrows-wheeler transformed string of a given string.
 * @param T[0..n-1] The input string.
 * @param U[0..n-1] The output string. (can be T)
 * @param A[0..n-1] The temporary array. (can be NULL)
 * @param n The length of the given string.
 * @return The primary index if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saidx@W64BIT@_t
 divbwt@W64BIT@(const sauchar_t *T, sauchar_t *U, saidx@W64BIT@_t *A, saidx@W64BIT@_t n);
 /**
 * Returns the version of the divsufsort library.
 * @return The version number string.
 */
 DIVSUFSORT_API
 const char *
 divsufsort@W64BIT@_version(void);
 /**
 * Constructs the burrows-wheeler transformed string of a given string and suffix array.
 * @param T[0..n-1] The input string.
 * @param U[0..n-1] The output string. (can be T)
 * @param SA[0..n-1] The suffix array. (can be NULL)
 * @param n The length of the given string.
 * @param idx The output primary index.
 * @return 0 if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saint_t
 bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
             saidx@W64BIT@_t *SA /* can NULL */,
             saidx@W64BIT@_t n, saidx@W64BIT@_t *idx);
 /**
 * Inverse BW-transforms a given BWTed string.
 * @param T[0..n-1] The input string.
 * @param U[0..n-1] The output string. (can be T)
 * @param A[0..n-1] The temporary array. (can be NULL)
 * @param n The length of the given string.
 * @param idx The primary index.
 * @return 0 if no error occurred, -1 or -2 otherwise.
 */
 DIVSUFSORT_API
 saint_t
 inverse_bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
                     saidx@W64BIT@_t *A /* can NULL */,
                     saidx@W64BIT@_t n, saidx@W64BIT@_t idx);
 /**
 * Checks the correctness of a given suffix array.
 * @param T[0..n-1] The input string.
 * @param SA[0..n-1] The input suffix array.
 * @param n The length of the given string.
 * @param verbose The verbose mode.
 * @return 0 if no error occurred.
 */
 DIVSUFSORT_API
 saint_t
 sufcheck@W64BIT@(const sauchar_t *T, const saidx@W64BIT@_t *SA, saidx@W64BIT@_t n, saint_t verbose);
 /**
 * Search for the pattern P in the string T.
 * @param T[0..Tsize-1] The input string.
 * @param Tsize The length of the given string.
 * @param P[0..Psize-1] The input pattern string.
 * @param Psize The length of the given pattern string.
 * @param SA[0..SAsize-1] The input suffix array.
 * @param SAsize The length of the given suffix array.
 * @param idx The output index.
 * @return The count of matches if no error occurred, -1 otherwise.
 */
 DIVSUFSORT_API
 saidx@W64BIT@_t
 sa_search@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
          const sauchar_t *P, saidx@W64BIT@_t Psize,
          const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
          saidx@W64BIT@_t *left);
 /**
 * Search for the character c in the string T.
 * @param T[0..Tsize-1] The input string.
 * @param Tsize The length of the given string.
 * @param SA[0..SAsize-1] The input suffix array.
 * @param SAsize The length of the given suffix array.
 * @param c The input character.
 * @param idx The output index.
 * @return The count of matches if no error occurred, -1 otherwise.
 */
 DIVSUFSORT_API
 saidx@W64BIT@_t
 sa_simplesearch@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
                const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
                saint_t c, saidx@W64BIT@_t *left);
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
 #endif /* _DIVSUFSORT@W64BIT@_H */
--- a/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort_config.h
+++ b/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort_config.h
@ -0,0 +1,9 @@
 #define HAVE_STRING_H 1
 #define HAVE_STDLIB_H 1
 #define HAVE_MEMORY_H 1
 #define HAVE_STDINT_H 1
 #define INLINE inline
 #ifdef _MSC_VER
 #pragma warning( disable : 4244 )
 #endif /* _MSC_VER */
--- a/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort_private.h
+++ b/Tools/unix/lzsa/src/libdivsufsort/include/divsufsort_private.h
@ -0,0 +1,205 @@
 /*
 * divsufsort_private.h for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #ifndef _DIVSUFSORT_PRIVATE_H
 #define _DIVSUFSORT_PRIVATE_H 1
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 #include "divsufsort_config.h"
 #include <assert.h>
 #include <stdio.h>
 #if HAVE_STRING_H
 # include <string.h>
 #endif
 #if HAVE_STDLIB_H
 # include <stdlib.h>
 #endif
 #if HAVE_MEMORY_H
 # include <memory.h>
 #endif
 #if HAVE_STDDEF_H
 # include <stddef.h>
 #endif
 #if HAVE_STRINGS_H
 # include <strings.h>
 #endif
 #if HAVE_INTTYPES_H
 # include <inttypes.h>
 #else
 # if HAVE_STDINT_H
 #  include <stdint.h>
 # endif
 #endif
 #if defined(BUILD_DIVSUFSORT64)
 # include "divsufsort64.h"
 # ifndef SAIDX_T
 #  define SAIDX_T
 #  define saidx_t saidx64_t
 # endif /* SAIDX_T */
 # ifndef PRIdSAIDX_T
 #  define PRIdSAIDX_T PRIdSAIDX64_T
 # endif /* PRIdSAIDX_T */
 # define divsufsort divsufsort64
 # define divbwt divbwt64
 # define divsufsort_version divsufsort64_version
 # define bw_transform bw_transform64
 # define inverse_bw_transform inverse_bw_transform64
 # define sufcheck sufcheck64
 # define sa_search sa_search64
 # define sa_simplesearch sa_simplesearch64
 # define sssort sssort64
 # define trsort trsort64
 #else
 # include "divsufsort.h"
 #endif
 /*- Constants -*/
 #if !defined(UINT8_MAX)
 # define UINT8_MAX (255)
 #endif /* UINT8_MAX */
 #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
 # undef ALPHABET_SIZE
 #endif
 #if !defined(ALPHABET_SIZE)
 # define ALPHABET_SIZE (UINT8_MAX + 1)
 #endif
 /* for divsufsort.c */
 #define BUCKET_A_SIZE (ALPHABET_SIZE)
 #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
 /* for sssort.c */
 #if defined(SS_INSERTIONSORT_THRESHOLD)
 # if SS_INSERTIONSORT_THRESHOLD < 1
 #  undef SS_INSERTIONSORT_THRESHOLD
 #  define SS_INSERTIONSORT_THRESHOLD (1)
 # endif
 #else
 # define SS_INSERTIONSORT_THRESHOLD (8)
 #endif
 #if defined(SS_BLOCKSIZE)
 # if SS_BLOCKSIZE < 0
 #  undef SS_BLOCKSIZE
 #  define SS_BLOCKSIZE (0)
 # elif 32768 <= SS_BLOCKSIZE
 #  undef SS_BLOCKSIZE
 #  define SS_BLOCKSIZE (32767)
 # endif
 #else
 # define SS_BLOCKSIZE (1024)
 #endif
 /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
 #if SS_BLOCKSIZE == 0
 # if defined(BUILD_DIVSUFSORT64)
 #  define SS_MISORT_STACKSIZE (96)
 # else
 #  define SS_MISORT_STACKSIZE (64)
 # endif
 #elif SS_BLOCKSIZE <= 4096
 # define SS_MISORT_STACKSIZE (16)
 #else
 # define SS_MISORT_STACKSIZE (24)
 #endif
 #if defined(BUILD_DIVSUFSORT64)
 # define SS_SMERGE_STACKSIZE (64)
 #else
 # define SS_SMERGE_STACKSIZE (32)
 #endif
 /* for trsort.c */
 #define TR_INSERTIONSORT_THRESHOLD (8)
 #if defined(BUILD_DIVSUFSORT64)
 # define TR_STACKSIZE (96)
 #else
 # define TR_STACKSIZE (64)
 #endif
 /*- Macros -*/
 #ifndef SWAP
 # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
 #endif /* SWAP */
 #ifndef MIN
 # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
 #endif /* MIN */
 #ifndef MAX
 # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
 #endif /* MAX */
 #define STACK_PUSH(_a, _b, _c, _d)\
  do {\
    assert(ssize < STACK_SIZE);\
    stack[ssize].a = (_a), stack[ssize].b = (_b),\
    stack[ssize].c = (_c), stack[ssize++].d = (_d);\
  } while(0)
 #define STACK_PUSH5(_a, _b, _c, _d, _e)\
  do {\
    assert(ssize < STACK_SIZE);\
    stack[ssize].a = (_a), stack[ssize].b = (_b),\
    stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
  } while(0)
 #define STACK_POP(_a, _b, _c, _d)\
  do {\
    assert(0 <= ssize);\
    if(ssize == 0) { return; }\
    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
    (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
  } while(0)
 #define STACK_POP5(_a, _b, _c, _d, _e)\
  do {\
    assert(0 <= ssize);\
    if(ssize == 0) { return; }\
    (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
    (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
  } while(0)
 /* for divsufsort.c */
 #define BUCKET_A(_c0) bucket_A[(_c0)]
 #if ALPHABET_SIZE == 256
 #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
 #else
 #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
 #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
 #endif
 /*- Private Prototypes -*/
 /* sssort.c */
 void
 sssort(const sauchar_t *Td, const saidx_t *PA,
       saidx_t *first, saidx_t *last,
       saidx_t *buf, saidx_t bufsize,
       saidx_t depth, saidx_t n, saint_t lastsuffix);
 /* trsort.c */
 void
 trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
 #endif /* _DIVSUFSORT_PRIVATE_H */
--- a/Tools/unix/lzsa/src/libdivsufsort/include/lfs.h.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/include/lfs.h.cmake
@ -0,0 +1,56 @@
 /*
 * lfs.h for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #ifndef _LFS_H
 #define _LFS_H 1
 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */
 #ifndef __STRICT_ANSI__
 # define LFS_OFF_T @LFS_OFF_T@
 # define LFS_FOPEN @LFS_FOPEN@
 # define LFS_FTELL @LFS_FTELL@
 # define LFS_FSEEK @LFS_FSEEK@
 # define LFS_PRId  @LFS_PRID@
 #else
 # define LFS_OFF_T long
 # define LFS_FOPEN fopen
 # define LFS_FTELL ftell
 # define LFS_FSEEK fseek
 # define LFS_PRId "ld"
 #endif
 #ifndef PRIdOFF_T
 # define PRIdOFF_T LFS_PRId
 #endif
 #ifdef __cplusplus
 } /* extern "C" */
 #endif /* __cplusplus */
 #endif /* _LFS_H */
--- a/Tools/unix/lzsa/src/libdivsufsort/lib/CMakeLists.txt
+++ b/Tools/unix/lzsa/src/libdivsufsort/lib/CMakeLists.txt
@ -0,0 +1,31 @@
 include_directories("${CMAKE_CURRENT_SOURCE_DIR}/../include"
                    "${CMAKE_CURRENT_BINARY_DIR}/../include")
 set(divsufsort_SRCS divsufsort.c sssort.c trsort.c utils.c)
 ## libdivsufsort ##
 add_library(divsufsort ${divsufsort_SRCS})
 install(TARGETS divsufsort
  RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
 set_target_properties(divsufsort PROPERTIES
  VERSION   "${LIBRARY_VERSION}"
  SOVERSION "${LIBRARY_SOVERSION}"
  DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
  RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
 ## libdivsufsort64 ##
 if(BUILD_DIVSUFSORT64)
  add_library(divsufsort64 ${divsufsort_SRCS})
  install(TARGETS divsufsort64
    RUNTIME DESTINATION ${CMAKE_INSTALL_RUNTIMEDIR}
    LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
    ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
  set_target_properties(divsufsort64 PROPERTIES
    VERSION   "${LIBRARY_VERSION}"
    SOVERSION "${LIBRARY_SOVERSION}"
    DEFINE_SYMBOL DIVSUFSORT_BUILD_DLL
    COMPILE_FLAGS "-DBUILD_DIVSUFSORT64"
    RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/../examples")
 endif(BUILD_DIVSUFSORT64)
--- a/Tools/unix/lzsa/src/libdivsufsort/lib/divsufsort.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/lib/divsufsort.c
@ -0,0 +1,431 @@
 /*
 * divsufsort.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #include "divsufsort_private.h"
 #ifdef _OPENMP
 # include <omp.h>
 #endif
 /*- Private Functions -*/
 /* Sorts suffixes of type B*. */
 static
 saidx_t
 sort_typeBstar(const sauchar_t *T, saidx_t *SA,
               saidx_t *bucket_A, saidx_t *bucket_B,
               saidx_t n) {
  saidx_t *PAb, *ISAb, *buf;
 #ifdef _OPENMP
  saidx_t *curbuf;
  saidx_t l;
 #endif
  saidx_t i, j, k, t, m, bufsize;
  saint_t c0, c1;
 #ifdef _OPENMP
  saint_t d0, d1;
  int tmp;
 #endif
  /* Initialize bucket arrays. */
  for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
  for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
  /* Count the number of occurrences of the first one or two characters of each
     type A, B and B* suffix. Moreover, store the beginning position of all
     type B* suffixes into the array SA. */
  for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
    /* type A suffix. */
    do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
    if(0 <= i) {
      /* type B* suffix. */
      ++BUCKET_BSTAR(c0, c1);
      SA[--m] = i;
      /* type B suffix. */
      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
        ++BUCKET_B(c0, c1);
      }
    }
  }
  m = n - m;
 /*
 note:
  A type B* suffix is lexicographically smaller than a type B suffix that
  begins with the same first two characters.
 */
  /* Calculate the index of start/end point of each bucket. */
  for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
    t = i + BUCKET_A(c0);
    BUCKET_A(c0) = i + j; /* start point */
    i = t + BUCKET_B(c0, c0);
    for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
      j += BUCKET_BSTAR(c0, c1);
      BUCKET_BSTAR(c0, c1) = j; /* end point */
      i += BUCKET_B(c0, c1);
    }
  }
  if(0 < m) {
    /* Sort the type B* suffixes by their first two characters. */
    PAb = SA + n - m; ISAb = SA + m;
    for(i = m - 2; 0 <= i; --i) {
      t = PAb[i], c0 = T[t], c1 = T[t + 1];
      SA[--BUCKET_BSTAR(c0, c1)] = i;
    }
    t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
    SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
    /* Sort the type B* substrings using sssort. */
 #ifdef _OPENMP
    tmp = omp_get_max_threads();
    buf = SA + m, bufsize = (n - (2 * m)) / tmp;
    c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
 #pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp)
    {
      tmp = omp_get_thread_num();
      curbuf = buf + tmp * bufsize;
      k = 0;
      for(;;) {
        #pragma omp critical(sssort_lock)
        {
          if(0 < (l = j)) {
            d0 = c0, d1 = c1;
            do {
              k = BUCKET_BSTAR(d0, d1);
              if(--d1 <= d0) {
                d1 = ALPHABET_SIZE - 1;
                if(--d0 < 0) { break; }
              }
            } while(((l - k) <= 1) && (0 < (l = k)));
            c0 = d0, c1 = d1, j = k;
          }
        }
        if(l == 0) { break; }
        sssort(T, PAb, SA + k, SA + l,
               curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
      }
    }
 #else
    buf = SA + m, bufsize = n - (2 * m);
    for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
      for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
        i = BUCKET_BSTAR(c0, c1);
        if(1 < (j - i)) {
          sssort(T, PAb, SA + i, SA + j,
                 buf, bufsize, 2, n, *(SA + i) == (m - 1));
        }
      }
    }
 #endif
    /* Compute ranks of type B* substrings. */
    for(i = m - 1; 0 <= i; --i) {
      if(0 <= SA[i]) {
        j = i;
        do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
        SA[i + 1] = i - j;
        if(i <= 0) { break; }
      }
      j = i;
      do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
      ISAb[SA[i]] = j;
    }
    /* Construct the inverse suffix array of type B* suffixes using trsort. */
    trsort(ISAb, SA, m, 1);
    /* Set the sorted order of tyoe B* suffixes. */
    for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
      for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
      if(0 <= i) {
        t = i;
        for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
        SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
      }
    }
    /* Calculate the index of start/end point of each bucket. */
    BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
    for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
      i = BUCKET_A(c0 + 1) - 1;
      for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
        t = i - BUCKET_B(c0, c1);
        BUCKET_B(c0, c1) = i; /* end point */
        /* Move all type B* suffixes to the correct position. */
        for(i = t, j = BUCKET_BSTAR(c0, c1);
            j <= k;
            --i, --k) { SA[i] = SA[k]; }
      }
      BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
      BUCKET_B(c0, c0) = i; /* end point */
    }
  }
  return m;
 }
 /* Constructs the suffix array by using the sorted order of type B* suffixes. */
 static
 void
 construct_SA(const sauchar_t *T, saidx_t *SA,
             saidx_t *bucket_A, saidx_t *bucket_B,
             saidx_t n, saidx_t m) {
  saidx_t *i, *j, *k;
  saidx_t s;
  saint_t c0, c1, c2;
  if(0 < m) {
    /* Construct the sorted order of type B suffixes by using
       the sorted order of type B* suffixes. */
    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
      /* Scan the suffix array from right to left. */
      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
          i <= j;
          --j) {
        if(0 < (s = *j)) {
          assert(T[s] == c1);
          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
          assert(T[s - 1] <= T[s]);
          *j = ~s;
          c0 = T[--s];
          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
          if(c0 != c2) {
            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
            k = SA + BUCKET_B(c2 = c0, c1);
          }
          assert(k < j);
          *k-- = s;
        } else {
          assert(((s == 0) && (T[s] == c1)) || (s < 0));
          *j = ~s;
        }
      }
    }
  }
  /* Construct the suffix array by using
     the sorted order of type B suffixes. */
  k = SA + BUCKET_A(c2 = T[n - 1]);
  *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
  /* Scan the suffix array from left to right. */
  for(i = SA, j = SA + n; i < j; ++i) {
    if(0 < (s = *i)) {
      assert(T[s - 1] >= T[s]);
      c0 = T[--s];
      if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
      if(c0 != c2) {
        BUCKET_A(c2) = k - SA;
        k = SA + BUCKET_A(c2 = c0);
      }
      assert(i < k);
      *k++ = s;
    } else {
      assert(s < 0);
      *i = ~s;
    }
  }
 }
 #if 0
 /* Constructs the burrows-wheeler transformed string directly
   by using the sorted order of type B* suffixes. */
 static
 saidx_t
 construct_BWT(const sauchar_t *T, saidx_t *SA,
              saidx_t *bucket_A, saidx_t *bucket_B,
              saidx_t n, saidx_t m) {
  saidx_t *i, *j, *k, *orig;
  saidx_t s;
  saint_t c0, c1, c2;
  if(0 < m) {
    /* Construct the sorted order of type B suffixes by using
       the sorted order of type B* suffixes. */
    for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
      /* Scan the suffix array from right to left. */
      for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
          j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
          i <= j;
          --j) {
        if(0 < (s = *j)) {
          assert(T[s] == c1);
          assert(((s + 1) < n) && (T[s] <= T[s + 1]));
          assert(T[s - 1] <= T[s]);
          c0 = T[--s];
          *j = ~((saidx_t)c0);
          if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
          if(c0 != c2) {
            if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
            k = SA + BUCKET_B(c2 = c0, c1);
          }
          assert(k < j);
          *k-- = s;
        } else if(s != 0) {
          *j = ~s;
 #ifndef NDEBUG
        } else {
          assert(T[s] == c1);
 #endif
        }
      }
    }
  }
  /* Construct the BWTed string by using
     the sorted order of type B suffixes. */
  k = SA + BUCKET_A(c2 = T[n - 1]);
  *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1);
  /* Scan the suffix array from left to right. */
  for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
    if(0 < (s = *i)) {
      assert(T[s - 1] >= T[s]);
      c0 = T[--s];
      *i = c0;
      if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
      if(c0 != c2) {
        BUCKET_A(c2) = k - SA;
        k = SA + BUCKET_A(c2 = c0);
      }
      assert(i < k);
      *k++ = s;
    } else if(s != 0) {
      *i = ~s;
    } else {
      orig = i;
    }
  }
  return orig - SA;
 }
 #endif
 /*---------------------------------------------------------------------------*/
 /**
 * Initialize suffix array context
 *
 * @return 0 for success, or non-zero in case of an error
 */
 int divsufsort_init(divsufsort_ctx_t *ctx) {
   ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
   ctx->bucket_B = NULL;
   if (ctx->bucket_A) {
      ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
      if (ctx->bucket_B)
         return 0;
   }
   divsufsort_destroy(ctx);
   return -1;
 }
 /**
 * Destroy suffix array context
 *
 * @param ctx suffix array context to destroy
 */
 void divsufsort_destroy(divsufsort_ctx_t *ctx) {
   if (ctx->bucket_B) {
      free(ctx->bucket_B);
      ctx->bucket_B = NULL;
   }
   if (ctx->bucket_A) {
      free(ctx->bucket_A);
      ctx->bucket_A = NULL;
   }
 }
 /*- Function -*/
 saint_t
 divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
  saidx_t m;
  saint_t err = 0;
  /* Check arguments. */
  if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
  else if(n == 0) { return 0; }
  else if(n == 1) { SA[0] = 0; return 0; }
  else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
  /* Suffixsort. */
  if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
    m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
    construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
  } else {
    err = -2;
  }
  return err;
 }
 #if 0
 saidx_t
 divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
  saidx_t *B;
  saidx_t *bucket_A, *bucket_B;
  saidx_t m, pidx, i;
  /* Check arguments. */
  if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
  else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
  if((B = A) == NULL) { B = (saidx_t *)malloc((size_t)(n + 1) * sizeof(saidx_t)); }
  bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
  bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
  /* Burrows-Wheeler Transform. */
  if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
    m = sort_typeBstar(T, B, bucket_A, bucket_B, n);
    pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
    /* Copy to output string. */
    U[0] = T[n - 1];
    for(i = 0; i < pidx; ++i) { U[i + 1] = (sauchar_t)B[i]; }
    for(i += 1; i < n; ++i) { U[i] = (sauchar_t)B[i]; }
    pidx += 1;
  } else {
    pidx = -2;
  }
  free(bucket_B);
  free(bucket_A);
  if(A == NULL) { free(B); }
  return pidx;
 }
 const char *
 divsufsort_version(void) {
  return PROJECT_VERSION_FULL;
 }
 #endif
--- a/Tools/unix/lzsa/src/libdivsufsort/lib/divsufsort_utils.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/lib/divsufsort_utils.c
@ -0,0 +1,383 @@
 /*
 * utils.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #include "divsufsort_private.h"
 /*- Private Function -*/
 #if 0
 /* Binary search for inverse bwt. */
 static
 saidx_t
 binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
  saidx_t half, i;
  for(i = 0, half = size >> 1;
      0 < size;
      size = half, half >>= 1) {
    if(A[i + half] < value) {
      i += half + 1;
      half -= (size & 1) ^ 1;
    }
  }
  return i;
 }
 /*- Functions -*/
 /* Burrows-Wheeler transform. */
 saint_t
 bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
             saidx_t n, saidx_t *idx) {
  saidx_t *A, i, j, p, t;
  saint_t c;
  /* Check arguments. */
  if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
  if(n <= 1) {
    if(n == 1) { U[0] = T[0]; }
    *idx = n;
    return 0;
  }
  if((A = SA) == NULL) {
    i = divbwt(T, U, NULL, n);
    if(0 <= i) { *idx = i; i = 0; }
    return (saint_t)i;
  }
  /* BW transform. */
  if(T == U) {
    t = n;
    for(i = 0, j = 0; i < n; ++i) {
      p = t - 1;
      t = A[i];
      if(0 <= p) {
        c = T[j];
        U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
        A[j] = c;
        j++;
      } else {
        *idx = i;
      }
    }
    p = t - 1;
    if(0 <= p) {
      c = T[j];
      U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
      A[j] = c;
    } else {
      *idx = i;
    }
  } else {
    U[0] = T[n - 1];
    for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
    *idx = i + 1;
    for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
  }
  if(SA == NULL) {
    /* Deallocate memory. */
    free(A);
  }
  return 0;
 }
 /* Inverse Burrows-Wheeler transform. */
 saint_t
 inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
                     saidx_t n, saidx_t idx) {
  saidx_t C[ALPHABET_SIZE];
  sauchar_t D[ALPHABET_SIZE];
  saidx_t *B;
  saidx_t i, p;
  saint_t c, d;
  /* Check arguments. */
  if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
     (n < idx) || ((0 < n) && (idx == 0))) {
    return -1;
  }
  if(n <= 1) { return 0; }
  if((B = A) == NULL) {
    /* Allocate n*sizeof(saidx_t) bytes of memory. */
    if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
  }
  /* Inverse BW transform. */
  for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
  for(i = 0; i < n; ++i) { ++C[T[i]]; }
  for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
    p = C[c];
    if(0 < p) {
      C[c] = i;
      D[d++] = (sauchar_t)c;
      i += p;
    }
  }
  for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
  for( ; i < n; ++i)       { B[C[T[i]]++] = i + 1; }
  for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
  for(i = 0, p = idx; i < n; ++i) {
    U[i] = D[binarysearch_lower(C, d, p)];
    p = B[p - 1];
  }
  if(A == NULL) {
    /* Deallocate memory. */
    free(B);
  }
  return 0;
 }
 /* Checks the suffix array SA of the string T. */
 saint_t
 sufcheck(const sauchar_t *T, const saidx_t *SA,
         saidx_t n, saint_t verbose) {
  saidx_t C[ALPHABET_SIZE];
  saidx_t i, p, q, t;
  saint_t c;
  if(verbose) { fprintf(stderr, "sufcheck: "); }
  /* Check arguments. */
  if((T == NULL) || (SA == NULL) || (n < 0)) {
    if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
    return -1;
  }
  if(n == 0) {
    if(verbose) { fprintf(stderr, "Done.\n"); }
    return 0;
  }
  /* check range: [0..n-1] */
  for(i = 0; i < n; ++i) {
    if((SA[i] < 0) || (n <= SA[i])) {
      if(verbose) {
        fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
                        n - 1, i, SA[i]);
      }
      return -2;
    }
  }
  /* check first characters. */
  for(i = 1; i < n; ++i) {
    if(T[SA[i - 1]] > T[SA[i]]) {
      if(verbose) {
        fprintf(stderr, "Suffixes in wrong order.\n"
                        "  T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
                        " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
                        i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
      }
      return -3;
    }
  }
  /* check suffixes. */
  for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
  for(i = 0; i < n; ++i) { ++C[T[i]]; }
  for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
    t = C[i];
    C[i] = p;
    p += t;
  }
  q = C[T[n - 1]];
  C[T[n - 1]] += 1;
  for(i = 0; i < n; ++i) {
    p = SA[i];
    if(0 < p) {
      c = T[--p];
      t = C[c];
    } else {
      c = T[p = n - 1];
      t = q;
    }
    if((t < 0) || (p != SA[t])) {
      if(verbose) {
        fprintf(stderr, "Suffix in wrong position.\n"
                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
                        "  SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
                        t, (0 <= t) ? SA[t] : -1, i, SA[i]);
      }
      return -4;
    }
    if(t != q) {
      ++C[c];
      if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
    }
  }
  if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
  return 0;
 }
 static
 int
 _compare(const sauchar_t *T, saidx_t Tsize,
         const sauchar_t *P, saidx_t Psize,
         saidx_t suf, saidx_t *match) {
  saidx_t i, j;
  saint_t r;
  for(i = suf + *match, j = *match, r = 0;
      (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
  *match = j;
  return (r == 0) ? -(j != Psize) : r;
 }
 /* Search for the pattern P in the string T. */
 saidx_t
 sa_search(const sauchar_t *T, saidx_t Tsize,
          const sauchar_t *P, saidx_t Psize,
          const saidx_t *SA, saidx_t SAsize,
          saidx_t *idx) {
  saidx_t size, lsize, rsize, half;
  saidx_t match, lmatch, rmatch;
  saidx_t llmatch, lrmatch, rlmatch, rrmatch;
  saidx_t i, j, k;
  saint_t r;
  if(idx != NULL) { *idx = -1; }
  if((T == NULL) || (P == NULL) || (SA == NULL) ||
     (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
  if((Tsize == 0) || (SAsize == 0)) { return 0; }
  if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
  for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
      0 < size;
      size = half, half >>= 1) {
    match = MIN(lmatch, rmatch);
    r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
    if(r < 0) {
      i += half + 1;
      half -= (size & 1) ^ 1;
      lmatch = match;
    } else if(r > 0) {
      rmatch = match;
    } else {
      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
      /* left part */
      for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
          0 < lsize;
          lsize = half, half >>= 1) {
        lmatch = MIN(llmatch, lrmatch);
        r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
        if(r < 0) {
          j += half + 1;
          half -= (lsize & 1) ^ 1;
          llmatch = lmatch;
        } else {
          lrmatch = lmatch;
        }
      }
      /* right part */
      for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
          0 < rsize;
          rsize = half, half >>= 1) {
        rmatch = MIN(rlmatch, rrmatch);
        r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
        if(r <= 0) {
          k += half + 1;
          half -= (rsize & 1) ^ 1;
          rlmatch = rmatch;
        } else {
          rrmatch = rmatch;
        }
      }
      break;
    }
  }
  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
  return k - j;
 }
 /* Search for the character c in the string T. */
 saidx_t
 sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
                const saidx_t *SA, saidx_t SAsize,
                saint_t c, saidx_t *idx) {
  saidx_t size, lsize, rsize, half;
  saidx_t i, j, k, p;
  saint_t r;
  if(idx != NULL) { *idx = -1; }
  if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
  if((Tsize == 0) || (SAsize == 0)) { return 0; }
  for(i = j = k = 0, size = SAsize, half = size >> 1;
      0 < size;
      size = half, half >>= 1) {
    p = SA[i + half];
    r = (p < Tsize) ? T[p] - c : -1;
    if(r < 0) {
      i += half + 1;
      half -= (size & 1) ^ 1;
    } else if(r == 0) {
      lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
      /* left part */
      for(half = lsize >> 1;
          0 < lsize;
          lsize = half, half >>= 1) {
        p = SA[j + half];
        r = (p < Tsize) ? T[p] - c : -1;
        if(r < 0) {
          j += half + 1;
          half -= (lsize & 1) ^ 1;
        }
      }
      /* right part */
      for(half = rsize >> 1;
          0 < rsize;
          rsize = half, half >>= 1) {
        p = SA[k + half];
        r = (p < Tsize) ? T[p] - c : -1;
        if(r <= 0) {
          k += half + 1;
          half -= (rsize & 1) ^ 1;
        }
      }
      break;
    }
  }
  if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
  return k - j;
 }
 #endif
--- a/Tools/unix/lzsa/src/libdivsufsort/lib/sssort.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/lib/sssort.c
@ -0,0 +1,815 @@
 /*
 * sssort.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #include "divsufsort_private.h"
 /*- Private Functions -*/
 static const saint_t lg_table[256]= {
 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
 };
 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
 static INLINE
 saint_t
 ss_ilg(saidx_t n) {
 #if SS_BLOCKSIZE == 0
 # if defined(BUILD_DIVSUFSORT64)
  return (n >> 32) ?
          ((n >> 48) ?
            ((n >> 56) ?
              56 + lg_table[(n >> 56) & 0xff] :
              48 + lg_table[(n >> 48) & 0xff]) :
            ((n >> 40) ?
              40 + lg_table[(n >> 40) & 0xff] :
              32 + lg_table[(n >> 32) & 0xff])) :
          ((n & 0xffff0000) ?
            ((n & 0xff000000) ?
              24 + lg_table[(n >> 24) & 0xff] :
              16 + lg_table[(n >> 16) & 0xff]) :
            ((n & 0x0000ff00) ?
               8 + lg_table[(n >>  8) & 0xff] :
               0 + lg_table[(n >>  0) & 0xff]));
 # else
  return (n & 0xffff0000) ?
          ((n & 0xff000000) ?
            24 + lg_table[(n >> 24) & 0xff] :
            16 + lg_table[(n >> 16) & 0xff]) :
          ((n & 0x0000ff00) ?
             8 + lg_table[(n >>  8) & 0xff] :
             0 + lg_table[(n >>  0) & 0xff]);
 # endif
 #elif SS_BLOCKSIZE < 256
  return lg_table[n];
 #else
  return (n & 0xff00) ?
          8 + lg_table[(n >> 8) & 0xff] :
          0 + lg_table[(n >> 0) & 0xff];
 #endif
 }
 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
 #if SS_BLOCKSIZE != 0
 static const saint_t sqq_table[256] = {
  0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
 64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
 90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
 };
 static INLINE
 saidx_t
 ss_isqrt(saidx_t x) {
  saidx_t y, e;
  if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
  e = (x & 0xffff0000) ?
        ((x & 0xff000000) ?
          24 + lg_table[(x >> 24) & 0xff] :
          16 + lg_table[(x >> 16) & 0xff]) :
        ((x & 0x0000ff00) ?
           8 + lg_table[(x >>  8) & 0xff] :
           0 + lg_table[(x >>  0) & 0xff]);
  if(e >= 16) {
    y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
    if(e >= 24) { y = (y + 1 + x / y) >> 1; }
    y = (y + 1 + x / y) >> 1;
  } else if(e >= 8) {
    y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
  } else {
    return sqq_table[x] >> 4;
  }
  return (x < (y * y)) ? y - 1 : y;
 }
 #endif /* SS_BLOCKSIZE != 0 */
 /*---------------------------------------------------------------------------*/
 /* Compares two suffixes. */
 static INLINE
 saint_t
 ss_compare(const sauchar_t *T,
           const saidx_t *p1, const saidx_t *p2,
           saidx_t depth) {
  const sauchar_t *U1, *U2, *U1n, *U2n;
  for(U1 = T + depth + *p1,
      U2 = T + depth + *p2,
      U1n = T + *(p1 + 1) + 2,
      U2n = T + *(p2 + 1) + 2;
      (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
      ++U1, ++U2) {
  }
  return U1 < U1n ?
        (U2 < U2n ? *U1 - *U2 : 1) :
        (U2 < U2n ? -1 : 0);
 }
 /*---------------------------------------------------------------------------*/
 #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
 /* Insertionsort for small size groups */
 static
 void
 ss_insertionsort(const sauchar_t *T, const saidx_t *PA,
                 saidx_t *first, saidx_t *last, saidx_t depth) {
  saidx_t *i, *j;
  saidx_t t;
  saint_t r;
  for(i = last - 2; first <= i; --i) {
    for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
      do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
      if(last <= j) { break; }
    }
    if(r == 0) { *j = ~*j; }
    *(j - 1) = t;
  }
 }
 #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
 /*---------------------------------------------------------------------------*/
 #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
 static INLINE
 void
 ss_fixdown(const sauchar_t *Td, const saidx_t *PA,
           saidx_t *SA, saidx_t i, saidx_t size) {
  saidx_t j, k;
  saidx_t v;
  saint_t c, d, e;
  for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
    d = Td[PA[SA[k = j++]]];
    if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
    if(d <= c) { break; }
  }
  SA[i] = v;
 }
 /* Simple top-down heapsort. */
 static
 void
 ss_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
  saidx_t i, m;
  saidx_t t;
  m = size;
  if((size % 2) == 0) {
    m--;
    if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
  }
  for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
  if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
  for(i = m - 1; 0 < i; --i) {
    t = SA[0], SA[0] = SA[i];
    ss_fixdown(Td, PA, SA, 0, i);
    SA[i] = t;
  }
 }
 /*---------------------------------------------------------------------------*/
 /* Returns the median of three elements. */
 static INLINE
 saidx_t *
 ss_median3(const sauchar_t *Td, const saidx_t *PA,
           saidx_t *v1, saidx_t *v2, saidx_t *v3) {
  saidx_t *t;
  if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
  if(Td[PA[*v2]] > Td[PA[*v3]]) {
    if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
    else { return v3; }
  }
  return v2;
 }
 /* Returns the median of five elements. */
 static INLINE
 saidx_t *
 ss_median5(const sauchar_t *Td, const saidx_t *PA,
           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
  saidx_t *t;
  if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
  if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
  if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
  if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
  if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
  if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
  return v3;
 }
 /* Returns the pivot element. */
 static INLINE
 saidx_t *
 ss_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
  saidx_t *middle;
  saidx_t t;
  t = last - first;
  middle = first + t / 2;
  if(t <= 512) {
    if(t <= 32) {
      return ss_median3(Td, PA, first, middle, last - 1);
    } else {
      t >>= 2;
      return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
    }
  }
  t >>= 3;
  first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
  middle = ss_median3(Td, PA, middle - t, middle, middle + t);
  last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
  return ss_median3(Td, PA, first, middle, last);
 }
 /*---------------------------------------------------------------------------*/
 /* Binary partition for substrings. */
 static INLINE
 saidx_t *
 ss_partition(const saidx_t *PA,
                    saidx_t *first, saidx_t *last, saidx_t depth) {
  saidx_t *a, *b;
  saidx_t t;
  for(a = first - 1, b = last;;) {
    for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
    for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
    if(b <= a) { break; }
    t = ~*b;
    *b = *a;
    *a = t;
  }
  if(first < a) { *first = ~*first; }
  return a;
 }
 /* Multikey introsort for medium size groups. */
 static
 void
 ss_mintrosort(const sauchar_t *T, const saidx_t *PA,
              saidx_t *first, saidx_t *last,
              saidx_t depth) {
 #define STACK_SIZE SS_MISORT_STACKSIZE
  struct { saidx_t *a, *b, c; saint_t d; } stack[STACK_SIZE];
  const sauchar_t *Td;
  saidx_t *a, *b, *c, *d, *e, *f;
  saidx_t s, t;
  saint_t ssize;
  saint_t limit;
  saint_t v, x = 0;
  for(ssize = 0, limit = ss_ilg(last - first);;) {
    if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
 #if 1 < SS_INSERTIONSORT_THRESHOLD
      if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
 #endif
      STACK_POP(first, last, depth, limit);
      continue;
    }
    Td = T + depth;
    if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
    if(limit < 0) {
      for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
        if((x = Td[PA[*a]]) != v) {
          if(1 < (a - first)) { break; }
          v = x;
          first = a;
        }
      }
      if(Td[PA[*first] - 1] < v) {
        first = ss_partition(PA, first, a, depth);
      }
      if((a - first) <= (last - a)) {
        if(1 < (a - first)) {
          STACK_PUSH(a, last, depth, -1);
          last = a, depth += 1, limit = ss_ilg(a - first);
        } else {
          first = a, limit = -1;
        }
      } else {
        if(1 < (last - a)) {
          STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
          first = a, limit = -1;
        } else {
          last = a, depth += 1, limit = ss_ilg(a - first);
        }
      }
      continue;
    }
    /* choose pivot */
    a = ss_pivot(Td, PA, first, last);
    v = Td[PA[*a]];
    SWAP(*first, *a);
    /* partition */
    for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
    if(((a = b) < last) && (x < v)) {
      for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
        if(x == v) { SWAP(*b, *a); ++a; }
      }
    }
    for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
    if((b < (d = c)) && (x > v)) {
      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
        if(x == v) { SWAP(*c, *d); --d; }
      }
    }
    for(; b < c;) {
      SWAP(*b, *c);
      for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
        if(x == v) { SWAP(*b, *a); ++a; }
      }
      for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
        if(x == v) { SWAP(*c, *d); --d; }
      }
    }
    if(a <= d) {
      c = b - 1;
      if((s = a - first) > (t = b - a)) { s = t; }
      for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
      if((s = d - c) > (t = last - d - 1)) { s = t; }
      for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
      a = first + (b - a), c = last - (d - c);
      b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
      if((a - first) <= (last - c)) {
        if((last - c) <= (c - b)) {
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          STACK_PUSH(c, last, depth, limit);
          last = a;
        } else if((a - first) <= (c - b)) {
          STACK_PUSH(c, last, depth, limit);
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          last = a;
        } else {
          STACK_PUSH(c, last, depth, limit);
          STACK_PUSH(first, a, depth, limit);
          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
        }
      } else {
        if((a - first) <= (c - b)) {
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          STACK_PUSH(first, a, depth, limit);
          first = c;
        } else if((last - c) <= (c - b)) {
          STACK_PUSH(first, a, depth, limit);
          STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
          first = c;
        } else {
          STACK_PUSH(first, a, depth, limit);
          STACK_PUSH(c, last, depth, limit);
          first = b, last = c, depth += 1, limit = ss_ilg(c - b);
        }
      }
    } else {
      limit += 1;
      if(Td[PA[*first] - 1] < v) {
        first = ss_partition(PA, first, last, depth);
        limit = ss_ilg(last - first);
      }
      depth += 1;
    }
  }
 #undef STACK_SIZE
 }
 #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
 /*---------------------------------------------------------------------------*/
 #if SS_BLOCKSIZE != 0
 static INLINE
 void
 ss_blockswap(saidx_t *a, saidx_t *b, saidx_t n) {
  saidx_t t;
  for(; 0 < n; --n, ++a, ++b) {
    t = *a, *a = *b, *b = t;
  }
 }
 static INLINE
 void
 ss_rotate(saidx_t *first, saidx_t *middle, saidx_t *last) {
  saidx_t *a, *b, t;
  saidx_t l, r;
  l = middle - first, r = last - middle;
  for(; (0 < l) && (0 < r);) {
    if(l == r) { ss_blockswap(first, middle, l); break; }
    if(l < r) {
      a = last - 1, b = middle - 1;
      t = *a;
      do {
        *a-- = *b, *b-- = *a;
        if(b < first) {
          *a = t;
          last = a;
          if((r -= l + 1) <= l) { break; }
          a -= 1, b = middle - 1;
          t = *a;
        }
      } while(1);
    } else {
      a = first, b = middle;
      t = *a;
      do {
        *a++ = *b, *b++ = *a;
        if(last <= b) {
          *a = t;
          first = a + 1;
          if((l -= r + 1) <= r) { break; }
          a += 1, b = middle;
          t = *a;
        }
      } while(1);
    }
  }
 }
 /*---------------------------------------------------------------------------*/
 static
 void
 ss_inplacemerge(const sauchar_t *T, const saidx_t *PA,
                saidx_t *first, saidx_t *middle, saidx_t *last,
                saidx_t depth) {
  const saidx_t *p;
  saidx_t *a, *b;
  saidx_t len, half;
  saint_t q, r;
  saint_t x;
  for(;;) {
    if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
    else                { x = 0; p = PA +  *(last - 1); }
    for(a = first, len = middle - first, half = len >> 1, r = -1;
        0 < len;
        len = half, half >>= 1) {
      b = a + half;
      q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
      if(q < 0) {
        a = b + 1;
        half -= (len & 1) ^ 1;
      } else {
        r = q;
      }
    }
    if(a < middle) {
      if(r == 0) { *a = ~*a; }
      ss_rotate(a, middle, last);
      last -= middle - a;
      middle = a;
      if(first == middle) { break; }
    }
    --last;
    if(x != 0) { while(*--last < 0) { } }
    if(middle == last) { break; }
  }
 }
 /*---------------------------------------------------------------------------*/
 /* Merge-forward with internal buffer. */
 static
 void
 ss_mergeforward(const sauchar_t *T, const saidx_t *PA,
                saidx_t *first, saidx_t *middle, saidx_t *last,
                saidx_t *buf, saidx_t depth) {
  saidx_t *a, *b, *c, *bufend;
  saidx_t t;
  saint_t r;
  bufend = buf + (middle - first) - 1;
  ss_blockswap(buf, first, middle - first);
  for(t = *(a = first), b = buf, c = middle;;) {
    r = ss_compare(T, PA + *b, PA + *c, depth);
    if(r < 0) {
      do {
        *a++ = *b;
        if(bufend <= b) { *bufend = t; return; }
        *b++ = *a;
      } while(*b < 0);
    } else if(r > 0) {
      do {
        *a++ = *c, *c++ = *a;
        if(last <= c) {
          while(b < bufend) { *a++ = *b, *b++ = *a; }
          *a = *b, *b = t;
          return;
        }
      } while(*c < 0);
    } else {
      *c = ~*c;
      do {
        *a++ = *b;
        if(bufend <= b) { *bufend = t; return; }
        *b++ = *a;
      } while(*b < 0);
      do {
        *a++ = *c, *c++ = *a;
        if(last <= c) {
          while(b < bufend) { *a++ = *b, *b++ = *a; }
          *a = *b, *b = t;
          return;
        }
      } while(*c < 0);
    }
  }
 }
 /* Merge-backward with internal buffer. */
 static
 void
 ss_mergebackward(const sauchar_t *T, const saidx_t *PA,
                 saidx_t *first, saidx_t *middle, saidx_t *last,
                 saidx_t *buf, saidx_t depth) {
  const saidx_t *p1, *p2;
  saidx_t *a, *b, *c, *bufend;
  saidx_t t;
  saint_t r;
  saint_t x;
  bufend = buf + (last - middle) - 1;
  ss_blockswap(buf, middle, last - middle);
  x = 0;
  if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
  else                  { p1 = PA +  *bufend; }
  if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
  else                  { p2 = PA +  *(middle - 1); }
  for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
    r = ss_compare(T, p1, p2, depth);
    if(0 < r) {
      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
      *a-- = *b;
      if(b <= buf) { *buf = t; break; }
      *b-- = *a;
      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
      else       { p1 = PA +  *b; }
    } else if(r < 0) {
      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
      *a-- = *c, *c-- = *a;
      if(c < first) {
        while(buf < b) { *a-- = *b, *b-- = *a; }
        *a = *b, *b = t;
        break;
      }
      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
      else       { p2 = PA +  *c; }
    } else {
      if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
      *a-- = ~*b;
      if(b <= buf) { *buf = t; break; }
      *b-- = *a;
      if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
      *a-- = *c, *c-- = *a;
      if(c < first) {
        while(buf < b) { *a-- = *b, *b-- = *a; }
        *a = *b, *b = t;
        break;
      }
      if(*b < 0) { p1 = PA + ~*b; x |= 1; }
      else       { p1 = PA +  *b; }
      if(*c < 0) { p2 = PA + ~*c; x |= 2; }
      else       { p2 = PA +  *c; }
    }
  }
 }
 /* D&C based merge. */
 static
 void
 ss_swapmerge(const sauchar_t *T, const saidx_t *PA,
             saidx_t *first, saidx_t *middle, saidx_t *last,
             saidx_t *buf, saidx_t bufsize, saidx_t depth) {
 #define STACK_SIZE SS_SMERGE_STACKSIZE
 #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
 #define MERGE_CHECK(a, b, c)\
  do {\
    if(((c) & 1) ||\
       (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
      *(a) = ~*(a);\
    }\
    if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
      *(b) = ~*(b);\
    }\
  } while(0)
  struct { saidx_t *a, *b, *c; saint_t d; } stack[STACK_SIZE];
  saidx_t *l, *r, *lm, *rm;
  saidx_t m, len, half;
  saint_t ssize;
  saint_t check, next;
  for(check = 0, ssize = 0;;) {
    if((last - middle) <= bufsize) {
      if((first < middle) && (middle < last)) {
        ss_mergebackward(T, PA, first, middle, last, buf, depth);
      }
      MERGE_CHECK(first, last, check);
      STACK_POP(first, middle, last, check);
      continue;
    }
    if((middle - first) <= bufsize) {
      if(first < middle) {
        ss_mergeforward(T, PA, first, middle, last, buf, depth);
      }
      MERGE_CHECK(first, last, check);
      STACK_POP(first, middle, last, check);
      continue;
    }
    for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
        0 < len;
        len = half, half >>= 1) {
      if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
                       PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
        m += half + 1;
        half -= (len & 1) ^ 1;
      }
    }
    if(0 < m) {
      lm = middle - m, rm = middle + m;
      ss_blockswap(lm, middle, m);
      l = r = middle, next = 0;
      if(rm < last) {
        if(*rm < 0) {
          *rm = ~*rm;
          if(first < lm) { for(; *--l < 0;) { } next |= 4; }
          next |= 1;
        } else if(first < lm) {
          for(; *r < 0; ++r) { }
          next |= 2;
        }
      }
      if((l - first) <= (last - r)) {
        STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
        middle = lm, last = l, check = (check & 3) | (next & 4);
      } else {
        if((next & 2) && (r == middle)) { next ^= 6; }
        STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
        first = r, middle = rm, check = (next & 3) | (check & 4);
      }
    } else {
      if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
        *middle = ~*middle;
      }
      MERGE_CHECK(first, last, check);
      STACK_POP(first, middle, last, check);
    }
  }
 #undef STACK_SIZE
 }
 #endif /* SS_BLOCKSIZE != 0 */
 /*---------------------------------------------------------------------------*/
 /*- Function -*/
 /* Substring sort */
 void
 sssort(const sauchar_t *T, const saidx_t *PA,
       saidx_t *first, saidx_t *last,
       saidx_t *buf, saidx_t bufsize,
       saidx_t depth, saidx_t n, saint_t lastsuffix) {
  saidx_t *a;
 #if SS_BLOCKSIZE != 0
  saidx_t *b, *middle, *curbuf;
  saidx_t j, k, curbufsize, limit;
 #endif
  saidx_t i;
  if(lastsuffix != 0) { ++first; }
 #if SS_BLOCKSIZE == 0
  ss_mintrosort(T, PA, first, last, depth);
 #else
  if((bufsize < SS_BLOCKSIZE) &&
      (bufsize < (last - first)) &&
      (bufsize < (limit = ss_isqrt(last - first)))) {
    if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
    buf = middle = last - limit, bufsize = limit;
  } else {
    middle = last, limit = 0;
  }
  for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
    ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
 #elif 1 < SS_BLOCKSIZE
    ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
 #endif
    curbufsize = last - (a + SS_BLOCKSIZE);
    curbuf = a + SS_BLOCKSIZE;
    if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
    for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
      ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
    }
  }
 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  ss_mintrosort(T, PA, a, middle, depth);
 #elif 1 < SS_BLOCKSIZE
  ss_insertionsort(T, PA, a, middle, depth);
 #endif
  for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
    if(i & 1) {
      ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
      a -= k;
    }
  }
  if(limit != 0) {
 #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
    ss_mintrosort(T, PA, middle, last, depth);
 #elif 1 < SS_BLOCKSIZE
    ss_insertionsort(T, PA, middle, last, depth);
 #endif
    ss_inplacemerge(T, PA, first, middle, last, depth);
  }
 #endif
  if(lastsuffix != 0) {
    /* Insert last type B* suffix. */
    saidx_t PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
    for(a = first, i = *(first - 1);
        (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
        ++a) {
      *(a - 1) = *a;
    }
    *(a - 1) = i;
  }
 }
--- a/Tools/unix/lzsa/src/libdivsufsort/lib/trsort.c
+++ b/Tools/unix/lzsa/src/libdivsufsort/lib/trsort.c
@ -0,0 +1,586 @@
 /*
 * trsort.c for libdivsufsort
 * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 #include "divsufsort_private.h"
 /*- Private Functions -*/
 static const saint_t lg_table[256]= {
 -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
 };
 static INLINE
 saint_t
 tr_ilg(saidx_t n) {
 #if defined(BUILD_DIVSUFSORT64)
  return (n >> 32) ?
          ((n >> 48) ?
            ((n >> 56) ?
              56 + lg_table[(n >> 56) & 0xff] :
              48 + lg_table[(n >> 48) & 0xff]) :
            ((n >> 40) ?
              40 + lg_table[(n >> 40) & 0xff] :
              32 + lg_table[(n >> 32) & 0xff])) :
          ((n & 0xffff0000) ?
            ((n & 0xff000000) ?
              24 + lg_table[(n >> 24) & 0xff] :
              16 + lg_table[(n >> 16) & 0xff]) :
            ((n & 0x0000ff00) ?
               8 + lg_table[(n >>  8) & 0xff] :
               0 + lg_table[(n >>  0) & 0xff]));
 #else
  return (n & 0xffff0000) ?
          ((n & 0xff000000) ?
            24 + lg_table[(n >> 24) & 0xff] :
            16 + lg_table[(n >> 16) & 0xff]) :
          ((n & 0x0000ff00) ?
             8 + lg_table[(n >>  8) & 0xff] :
             0 + lg_table[(n >>  0) & 0xff]);
 #endif
 }
 /*---------------------------------------------------------------------------*/
 /* Simple insertionsort for small size groups. */
 static
 void
 tr_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
  saidx_t *a, *b;
  saidx_t t, r;
  for(a = first + 1; a < last; ++a) {
    for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
      do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
      if(b < first) { break; }
    }
    if(r == 0) { *b = ~*b; }
    *(b + 1) = t;
  }
 }
 /*---------------------------------------------------------------------------*/
 static INLINE
 void
 tr_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
  saidx_t j, k;
  saidx_t v;
  saidx_t c, d, e;
  for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
    d = ISAd[SA[k = j++]];
    if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
    if(d <= c) { break; }
  }
  SA[i] = v;
 }
 /* Simple top-down heapsort. */
 static
 void
 tr_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
  saidx_t i, m;
  saidx_t t;
  m = size;
  if((size % 2) == 0) {
    m--;
    if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
  }
  for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
  if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
  for(i = m - 1; 0 < i; --i) {
    t = SA[0], SA[0] = SA[i];
    tr_fixdown(ISAd, SA, 0, i);
    SA[i] = t;
  }
 }
 /*---------------------------------------------------------------------------*/
 /* Returns the median of three elements. */
 static INLINE
 saidx_t *
 tr_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
  saidx_t *t;
  if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
  if(ISAd[*v2] > ISAd[*v3]) {
    if(ISAd[*v1] > ISAd[*v3]) { return v1; }
    else { return v3; }
  }
  return v2;
 }
 /* Returns the median of five elements. */
 static INLINE
 saidx_t *
 tr_median5(const saidx_t *ISAd,
           saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
  saidx_t *t;
  if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
  if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
  if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
  if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
  if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
  if(ISAd[*v3] > ISAd[*v4]) { return v4; }
  return v3;
 }
 /* Returns the pivot element. */
 static INLINE
 saidx_t *
 tr_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
  saidx_t *middle;
  saidx_t t;
  t = last - first;
  middle = first + t / 2;
  if(t <= 512) {
    if(t <= 32) {
      return tr_median3(ISAd, first, middle, last - 1);
    } else {
      t >>= 2;
      return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
    }
  }
  t >>= 3;
  first  = tr_median3(ISAd, first, first + t, first + (t << 1));
  middle = tr_median3(ISAd, middle - t, middle, middle + t);
  last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
  return tr_median3(ISAd, first, middle, last);
 }
 /*---------------------------------------------------------------------------*/
 typedef struct _trbudget_t trbudget_t;
 struct _trbudget_t {
  saidx_t chance;
  saidx_t remain;
  saidx_t incval;
  saidx_t count;
 };
 static INLINE
 void
 trbudget_init(trbudget_t *budget, saidx_t chance, saidx_t incval) {
  budget->chance = chance;
  budget->remain = budget->incval = incval;
 }
 static INLINE
 saint_t
 trbudget_check(trbudget_t *budget, saidx_t size) {
  if(size <= budget->remain) { budget->remain -= size; return 1; }
  if(budget->chance == 0) { budget->count += size; return 0; }
  budget->remain += budget->incval - size;
  budget->chance -= 1;
  return 1;
 }
 /*---------------------------------------------------------------------------*/
 static INLINE
 void
 tr_partition(const saidx_t *ISAd,
             saidx_t *first, saidx_t *middle, saidx_t *last,
             saidx_t **pa, saidx_t **pb, saidx_t v) {
  saidx_t *a, *b, *c, *d, *e, *f;
  saidx_t t, s;
  saidx_t x = 0;
  for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
  if(((a = b) < last) && (x < v)) {
    for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
      if(x == v) { SWAP(*b, *a); ++a; }
    }
  }
  for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
  if((b < (d = c)) && (x > v)) {
    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
      if(x == v) { SWAP(*c, *d); --d; }
    }
  }
  for(; b < c;) {
    SWAP(*b, *c);
    for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
      if(x == v) { SWAP(*b, *a); ++a; }
    }
    for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
      if(x == v) { SWAP(*c, *d); --d; }
    }
  }
  if(a <= d) {
    c = b - 1;
    if((s = a - first) > (t = b - a)) { s = t; }
    for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
    if((s = d - c) > (t = last - d - 1)) { s = t; }
    for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
    first += (b - a), last -= (d - c);
  }
  *pa = first, *pb = last;
 }
 static
 void
 tr_copy(saidx_t *ISA, const saidx_t *SA,
        saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
        saidx_t depth) {
  /* sort suffixes of middle partition
     by using sorted order of suffixes of left and right partition. */
  saidx_t *c, *d, *e;
  saidx_t s, v;
  v = b - SA - 1;
  for(c = first, d = a - 1; c <= d; ++c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *++d = s;
      ISA[s] = d - SA;
    }
  }
  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *--d = s;
      ISA[s] = d - SA;
    }
  }
 }
 static
 void
 tr_partialcopy(saidx_t *ISA, const saidx_t *SA,
               saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
               saidx_t depth) {
  saidx_t *c, *d, *e;
  saidx_t s, v;
  saidx_t rank, lastrank, newrank = -1;
  v = b - SA - 1;
  lastrank = -1;
  for(c = first, d = a - 1; c <= d; ++c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *++d = s;
      rank = ISA[s + depth];
      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
      ISA[s] = newrank;
    }
  }
  lastrank = -1;
  for(e = d; first <= e; --e) {
    rank = ISA[*e];
    if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
    if(newrank != rank) { ISA[*e] = newrank; }
  }
  lastrank = -1;
  for(c = last - 1, e = d + 1, d = b; e < d; --c) {
    if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
      *--d = s;
      rank = ISA[s + depth];
      if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
      ISA[s] = newrank;
    }
  }
 }
 static
 void
 tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
             saidx_t *SA, saidx_t *first, saidx_t *last,
             trbudget_t *budget) {
 #define STACK_SIZE TR_STACKSIZE
  struct { const saidx_t *a; saidx_t *b, *c; saint_t d, e; }stack[STACK_SIZE];
  saidx_t *a, *b, *c;
  saidx_t t;
  saidx_t v, x = 0;
  saidx_t incr = ISAd - ISA;
  saint_t limit, next;
  saint_t ssize, trlink = -1;
  for(ssize = 0, limit = tr_ilg(last - first);;) {
    if(limit < 0) {
      if(limit == -1) {
        /* tandem repeat partition */
        tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
        /* update ranks */
        if(a < last) {
          for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
        }
        if(b < last) {
          for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
        }
        /* push */
        if(1 < (b - a)) {
          STACK_PUSH5(NULL, a, b, 0, 0);
          STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
          trlink = ssize - 2;
        }
        if((a - first) <= (last - b)) {
          if(1 < (a - first)) {
            STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
            last = a, limit = tr_ilg(a - first);
          } else if(1 < (last - b)) {
            first = b, limit = tr_ilg(last - b);
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        } else {
          if(1 < (last - b)) {
            STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
            first = b, limit = tr_ilg(last - b);
          } else if(1 < (a - first)) {
            last = a, limit = tr_ilg(a - first);
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        }
      } else if(limit == -2) {
        /* tandem repeat copy */
        a = stack[--ssize].b, b = stack[ssize].c;
        if(stack[ssize].d == 0) {
          tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
        } else {
          if(0 <= trlink) { stack[trlink].d = -1; }
          tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
        }
        STACK_POP5(ISAd, first, last, limit, trlink);
      } else {
        /* sorted partition */
        if(0 <= *first) {
          a = first;
          do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
          first = a;
        }
        if(first < last) {
          a = first; do { *a = ~*a; } while(*++a < 0);
          next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
          if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
          /* push */
          if(trbudget_check(budget, a - first)) {
            if((a - first) <= (last - a)) {
              STACK_PUSH5(ISAd, a, last, -3, trlink);
              ISAd += incr, last = a, limit = next;
            } else {
              if(1 < (last - a)) {
                STACK_PUSH5(ISAd + incr, first, a, next, trlink);
                first = a, limit = -3;
              } else {
                ISAd += incr, last = a, limit = next;
              }
            }
          } else {
            if(0 <= trlink) { stack[trlink].d = -1; }
            if(1 < (last - a)) {
              first = a, limit = -3;
            } else {
              STACK_POP5(ISAd, first, last, limit, trlink);
            }
          }
        } else {
          STACK_POP5(ISAd, first, last, limit, trlink);
        }
      }
      continue;
    }
    if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
      tr_insertionsort(ISAd, first, last);
      limit = -3;
      continue;
    }
    if(limit-- == 0) {
      tr_heapsort(ISAd, first, last - first);
      for(a = last - 1; first < a; a = b) {
        for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
      }
      limit = -3;
      continue;
    }
    /* choose pivot */
    a = tr_pivot(ISAd, first, last);
    SWAP(*first, *a);
    v = ISAd[*first];
    /* partition */
    tr_partition(ISAd, first, first + 1, last, &a, &b, v);
    if((last - first) != (b - a)) {
      next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
      /* update ranks */
      for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
      if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
      /* push */
      if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
        if((a - first) <= (last - b)) {
          if((last - b) <= (b - a)) {
            if(1 < (a - first)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              STACK_PUSH5(ISAd, b, last, limit, trlink);
              last = a;
            } else if(1 < (last - b)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              first = b;
            } else {
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else if((a - first) <= (b - a)) {
            if(1 < (a - first)) {
              STACK_PUSH5(ISAd, b, last, limit, trlink);
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              last = a;
            } else {
              STACK_PUSH5(ISAd, b, last, limit, trlink);
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else {
            STACK_PUSH5(ISAd, b, last, limit, trlink);
            STACK_PUSH5(ISAd, first, a, limit, trlink);
            ISAd += incr, first = a, last = b, limit = next;
          }
        } else {
          if((a - first) <= (b - a)) {
            if(1 < (last - b)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              STACK_PUSH5(ISAd, first, a, limit, trlink);
              first = b;
            } else if(1 < (a - first)) {
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              last = a;
            } else {
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else if((last - b) <= (b - a)) {
            if(1 < (last - b)) {
              STACK_PUSH5(ISAd, first, a, limit, trlink);
              STACK_PUSH5(ISAd + incr, a, b, next, trlink);
              first = b;
            } else {
              STACK_PUSH5(ISAd, first, a, limit, trlink);
              ISAd += incr, first = a, last = b, limit = next;
            }
          } else {
            STACK_PUSH5(ISAd, first, a, limit, trlink);
            STACK_PUSH5(ISAd, b, last, limit, trlink);
            ISAd += incr, first = a, last = b, limit = next;
          }
        }
      } else {
        if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
        if((a - first) <= (last - b)) {
          if(1 < (a - first)) {
            STACK_PUSH5(ISAd, b, last, limit, trlink);
            last = a;
          } else if(1 < (last - b)) {
            first = b;
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        } else {
          if(1 < (last - b)) {
            STACK_PUSH5(ISAd, first, a, limit, trlink);
            first = b;
          } else if(1 < (a - first)) {
            last = a;
          } else {
            STACK_POP5(ISAd, first, last, limit, trlink);
          }
        }
      }
    } else {
      if(trbudget_check(budget, last - first)) {
        limit = tr_ilg(last - first), ISAd += incr;
      } else {
        if(0 <= trlink) { stack[trlink].d = -1; }
        STACK_POP5(ISAd, first, last, limit, trlink);
      }
    }
  }
 #undef STACK_SIZE
 }
 /*---------------------------------------------------------------------------*/
 /*- Function -*/
 /* Tandem repeat sort */
 void
 trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
  saidx_t *ISAd;
  saidx_t *first, *last;
  trbudget_t budget;
  saidx_t t, skip, unsorted;
  trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
 /*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
  for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
    first = SA;
    skip = 0;
    unsorted = 0;
    do {
      if((t = *first) < 0) { first -= t; skip += t; }
      else {
        if(skip != 0) { *(first + skip) = skip; skip = 0; }
        last = SA + ISA[t] + 1;
        if(1 < (last - first)) {
          budget.count = 0;
          tr_introsort(ISA, ISAd, SA, first, last, &budget);
          if(budget.count != 0) { unsorted += budget.count; }
          else { skip = first - last; }
        } else if((last - first) == 1) {
          skip = -1;
        }
        first = last;
      }
    } while(first < (SA + n));
    if(skip != 0) { *(first + skip) = skip; }
    if(unsorted == 0) { break; }
  }
 }
--- a/Tools/unix/lzsa/src/libdivsufsort/pkgconfig/CMakeLists.txt
+++ b/Tools/unix/lzsa/src/libdivsufsort/pkgconfig/CMakeLists.txt
@ -0,0 +1,9 @@
 ## generate libdivsufsort.pc ##
 set(W64BIT "")
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" @ONLY)
 install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
 if(BUILD_DIVSUFSORT64)
  set(W64BIT "64")
  configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" @ONLY)
  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort64.pc" DESTINATION ${CMAKE_INSTALL_PKGCONFIGDIR})
 endif(BUILD_DIVSUFSORT64)
--- a/Tools/unix/lzsa/src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake
+++ b/Tools/unix/lzsa/src/libdivsufsort/pkgconfig/libdivsufsort.pc.cmake
@ -0,0 +1,11 @@
 prefix=@CMAKE_INSTALL_PREFIX@
 exec_prefix=${prefix}
 libdir=@CMAKE_INSTALL_LIBDIR@
 includedir=@CMAKE_INSTALL_INCLUDEDIR@
 Name: @PROJECT_NAME@@W64BIT@
 Description: @PROJECT_DESCRIPTION@
 Version: @PROJECT_VERSION_FULL@
 URL: @PROJECT_URL@
 Libs: -L${libdir} -ldivsufsort@W64BIT@
 Cflags: -I${includedir}
--- a/Tools/unix/lzsa/src/lzsa.c
+++ b/Tools/unix/lzsa/src/lzsa.c
--- a/Tools/unix/lzsa/src/matchfinder.c
+++ b/Tools/unix/lzsa/src/matchfinder.c
@ -0,0 +1,361 @@
 /*
 * matchfinder.c - LZ match finder implementation
 *
 * The following copying information applies to this specific source code file:
 *
 * Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
 * Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
 *
 * To the extent possible under law, the author(s) have dedicated all copyright
 * and related and neighboring rights to this software to the public domain
 * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
 * Dedication (the "CC0").
 *
 * This software is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
 *
 * You should have received a copy of the CC0 along with this software; if not
 * see <http://creativecommons.org/publicdomain/zero/1.0/>.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "matchfinder.h"
 #include "format.h"
 #include "lib.h"
 /**
 * Hash index into TAG_BITS
 *
 * @param nIndex index value
 *
 * @return hash
 */
 static inline int lzsa_get_index_tag(unsigned int nIndex) {
   return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
 }
 /**
 * Parse input data, build suffix array and overlaid data structures to speed up match finding
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
 *
 * @return 0 for success, non-zero for failure
 */
 int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
   unsigned int *intervals = pCompressor->intervals;
   /* Build suffix array from input data */
   if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
      return 100;
   }
   int *PLCP = (int*)pCompressor->pos_data;  /* Use temporarily */
   int *Phi = PLCP;
   int nCurLen = 0;
   int i;
   /* Compute the permuted LCP first (Kärkkäinen method) */
   Phi[intervals[0]] = -1;
   for (i = 1; i < nInWindowSize; i++)
      Phi[intervals[i]] = intervals[i - 1];
   for (i = 0; i < nInWindowSize; i++) {
      if (Phi[i] == -1) {
         PLCP[i] = 0;
         continue;
      }
      int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
      while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
      PLCP[i] = nCurLen;
      if (nCurLen > 0)
         nCurLen--;
   }
   /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
    * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
    * and the interval builder below doesn't need it either. */
   intervals[0] &= POS_MASK;
   int nMinMatchSize = pCompressor->min_match_size;
   if (pCompressor->format_version >= 2) {
      for (i = 1; i < nInWindowSize; i++) {
         int nIndex = (int)(intervals[i] & POS_MASK);
         int nLen = PLCP[nIndex];
         if (nLen < nMinMatchSize)
            nLen = 0;
         if (nLen > LCP_MAX)
            nLen = LCP_MAX;
         int nTaggedLen = 0;
         if (nLen)
            nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
         intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT);
      }
   }
   else {
      for (i = 1; i < nInWindowSize; i++) {
         int nIndex = (int)(intervals[i] & POS_MASK);
         int nLen = PLCP[nIndex];
         if (nLen < nMinMatchSize)
            nLen = 0;
         if (nLen > LCP_AND_TAG_MAX)
            nLen = LCP_AND_TAG_MAX;
         intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
      }
   }
   /**
    * Build intervals for finding matches
    *
    * Methodology and code fragment taken from wimlib (CC0 license):
    * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
    */
   unsigned int * const SA_and_LCP = intervals;
   unsigned int *pos_data = pCompressor->pos_data;
   unsigned int next_interval_idx;
   unsigned int *top = pCompressor->open_intervals;
   unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
   *top = 0;
   intervals[0] = 0;
   next_interval_idx = 1;
   for (int r = 1; r < nInWindowSize; r++) {
      const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
      const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
      const unsigned int top_lcp = *top & LCP_MASK;
      if (next_lcp == top_lcp) {
         /* Continuing the deepest open interval  */
         pos_data[prev_pos] = *top;
      }
      else if (next_lcp > top_lcp) {
         /* Opening a new interval  */
         *++top = next_lcp | next_interval_idx++;
         pos_data[prev_pos] = *top;
      }
      else {
         /* Closing the deepest open interval  */
         pos_data[prev_pos] = *top;
         for (;;) {
            const unsigned int closed_interval_idx = *top-- & POS_MASK;
            const unsigned int superinterval_lcp = *top & LCP_MASK;
            if (next_lcp == superinterval_lcp) {
               /* Continuing the superinterval */
               intervals[closed_interval_idx] = *top;
               break;
            }
            else if (next_lcp > superinterval_lcp) {
               /* Creating a new interval that is a
                * superinterval of the one being
                * closed, but still a subinterval of
                * its superinterval  */
               *++top = next_lcp | next_interval_idx++;
               intervals[closed_interval_idx] = *top;
               break;
            }
            else {
               /* Also closing the superinterval  */
               intervals[closed_interval_idx] = *top;
            }
         }
      }
      prev_pos = next_pos;
   }
   /* Close any still-open intervals.  */
   pos_data[prev_pos] = *top;
   for (; top > pCompressor->open_intervals; top--)
      intervals[*top & POS_MASK] = *(top - 1);
   /* Success */
   return 0;
 }
 /**
 * Find matches at the specified offset in the input window
 *
 * @param pCompressor compression context
 * @param nOffset offset to find matches at, in the input window
 * @param pMatches pointer to returned matches
 * @param nMaxMatches maximum number of matches to return (0 for none)
 * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
 *
 * @return number of matches
 */
 int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize) {
   unsigned int *intervals = pCompressor->intervals;
   unsigned int *pos_data = pCompressor->pos_data;
   unsigned int ref;
   unsigned int super_ref;
   unsigned int match_pos;
   lzsa_match *matchptr;
   int nPrevOffset = 0;
   /**
    * Find matches using intervals
    *
    * Taken from wimlib (CC0 license):
    * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
    */
    /* Get the deepest lcp-interval containing the current suffix. */
   ref = pos_data[nOffset];
   pos_data[nOffset] = 0;
   /* Ascend until we reach a visited interval, the root, or a child of the
    * root.  Link unvisited intervals to the current suffix as we go.  */
   while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
      intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
      ref = super_ref;
   }
   if (super_ref == 0) {
      /* In this case, the current interval may be any of:
       * (1) the root;
       * (2) an unvisited child of the root */
      if (ref != 0)  /* Not the root?  */
         intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
      return 0;
   }
   /* Ascend indirectly via pos_data[] links.  */
   match_pos = super_ref & EXCL_VISITED_MASK;
   matchptr = pMatches;
   if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
      if ((matchptr - pMatches) < nMaxMatches) {
         int nMatchOffset = (int)(nOffset - match_pos);
         if (nMatchOffset <= MAX_OFFSET) {
            matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
            matchptr->offset = (unsigned short)nMatchOffset;
            matchptr++;
            nPrevOffset = nMatchOffset;
         }
      }
   }
   for (;;) {
      if ((super_ref = pos_data[match_pos]) > ref) {
         match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
         if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
            if ((matchptr - pMatches) < nMaxMatches) {
               int nMatchOffset = (int)(nOffset - match_pos);
               if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
                  matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
                  matchptr->offset = (unsigned short)nMatchOffset;
                  matchptr++;
                  nPrevOffset = nMatchOffset;
               }
            }
         }
      }
      while ((super_ref = pos_data[match_pos]) > ref)
         match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
      intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
      pos_data[match_pos] = ref;
      if ((matchptr - pMatches) < nMaxMatches) {
         int nMatchOffset = (int)(nOffset - match_pos);
         if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
            if (pCompressor->format_version >= 2) {
               matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
            }
            else {
               matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
            }
            matchptr->offset = (unsigned short)nMatchOffset;
            matchptr++;
         }
      }
      if (super_ref == 0)
         break;
      ref = super_ref;
      match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
      if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
         if ((matchptr - pMatches) < nMaxMatches) {
            int nMatchOffset = (int)(nOffset - match_pos);
            if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
               matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
               matchptr->offset = (unsigned short)nMatchOffset;
               if ((matchptr->length & 0x7fff) > 2) {
                  matchptr++;
                  nPrevOffset = nMatchOffset;
               }
            }
         }
      }
   }
   return (int)(matchptr - pMatches);
 }
 /**
 * Skip previously compressed bytes
 *
 * @param pCompressor compression context
 * @param nStartOffset current offset in input window (typically 0)
 * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
 */
 void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
   lzsa_match match;
   int i;
   /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
    * we don't store the matches. */
   for (i = nStartOffset; i < nEndOffset; i++) {
      lzsa_find_matches_at(pCompressor, i, &match, 0, 0);
   }
 }
 /**
 * Find all matches for the data to be compressed
 *
 * @param pCompressor compression context
 * @param nMatchesPerOffset maximum number of matches to store for each offset
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 */
 void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset) {
   lzsa_match *pMatch = pCompressor->match;
   int i;
   for (i = nStartOffset; i < nEndOffset; i++) {
      int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset, nEndOffset - nStartOffset);
      while (nMatches < nMatchesPerOffset) {
         pMatch[nMatches].length = 0;
         pMatch[nMatches].offset = 0;
         nMatches++;
      }
      pMatch += nMatchesPerOffset;
   }
 }
--- a/Tools/unix/lzsa/src/matchfinder.h
+++ b/Tools/unix/lzsa/src/matchfinder.h
@ -0,0 +1,91 @@
 /*
 * matchfinder.h - LZ match finder definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _MATCHFINDER_H
 #define _MATCHFINDER_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Forward declarations */
 typedef struct _lzsa_match lzsa_match;
 typedef struct _lzsa_compressor lzsa_compressor;
 /**
 * Parse input data, build suffix array and overlaid data structures to speed up match finding
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
 *
 * @return 0 for success, non-zero for failure
 */
 int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
 /**
 * Find matches at the specified offset in the input window
 *
 * @param pCompressor compression context
 * @param nOffset offset to find matches at, in the input window
 * @param pMatches pointer to returned matches
 * @param nMaxMatches maximum number of matches to return (0 for none)
 * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
 *
 * @return number of matches
 */
 int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize);
 /**
 * Skip previously compressed bytes
 *
 * @param pCompressor compression context
 * @param nStartOffset current offset in input window (typically 0)
 * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
 */
 void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
 /**
 * Find all matches for the data to be compressed
 *
 * @param pCompressor compression context
 * @param nMatchesPerOffset maximum number of matches to store for each offset
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 */
 void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _MATCHFINDER_H */
--- a/Tools/unix/lzsa/src/shrink_block_v1.c
+++ b/Tools/unix/lzsa/src/shrink_block_v1.c
@ -0,0 +1,702 @@
 /*
 * shrink_block_v1.c - LZSA1 block compressor implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "lib.h"
 #include "shrink_block_v1.h"
 #include "format.h"
 /**
 * Get the number of extra bits required to represent a literals length
 *
 * @param nLength literals length
 *
 * @return number of extra bits required
 */
 static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
   if (nLength < LITERALS_RUN_LEN_V1) {
      return 0;
   }
   else {
      if (nLength < 256)
         return 8;
      else {
         if (nLength < 512)
            return 16;
         else
            return 24;
      }
   }
 }
 /**
 * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
 * room to write the bytes.
 *
 * @param pOutData pointer to output buffer
 * @param nOutOffset current write index into output buffer
 * @param nLength literals length
 */
 static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
   if (nLength >= LITERALS_RUN_LEN_V1) {
      if (nLength < 256)
         pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
      else {
         if (nLength < 512) {
            pOutData[nOutOffset++] = 250;
            pOutData[nOutOffset++] = nLength - 256;
         }
         else {
            pOutData[nOutOffset++] = 249;
            pOutData[nOutOffset++] = nLength & 0xff;
            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
         }
      }
   }
   return nOutOffset;
 }
 /**
 * Get the number of extra bits required to represent an encoded match length
 *
 * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
 *
 * @return number of extra bits required
 */
 static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
   if (nLength < MATCH_RUN_LEN_V1) {
      return 0;
   }
   else {
      if ((nLength + MIN_MATCH_SIZE_V1) < 256)
         return 8;
      else {
         if ((nLength + MIN_MATCH_SIZE_V1) < 512)
            return 16;
         else
            return 24;
      }
   }
 }
 /**
 * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
 * room to write the bytes.
 *
 * @param pOutData pointer to output buffer
 * @param nOutOffset current write index into output buffer
 * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
 */
 static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
   if (nLength >= MATCH_RUN_LEN_V1) {
      if ((nLength + MIN_MATCH_SIZE_V1) < 256)
         pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
      else {
         if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
            pOutData[nOutOffset++] = 239;
            pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
         }
         else {
            pOutData[nOutOffset++] = 238;
            pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
            pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
         }
      }
   }
   return nOutOffset;
 }
 /**
 * Get offset encoding cost in bits
 *
 * @param nMatchOffset offset to get cost of
 *
 * @return cost in bits
 */
 static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
   return (nMatchOffset <= 256) ? 8 : 16;
 }
 /**
 * Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
 *
 * @param pCompressor compression context
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 */
 static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) {
   lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << MATCHES_PER_ARRIVAL_SHIFT);
   const int nMinMatchSize = pCompressor->min_match_size;
   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
   const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
   int i, j, n;
   if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
   memset(arrival + (nStartOffset << MATCHES_PER_ARRIVAL_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << MATCHES_PER_ARRIVAL_SHIFT));
   arrival[nStartOffset << MATCHES_PER_ARRIVAL_SHIFT].from_slot = -1;
   for (i = nStartOffset; i != nEndOffset; i++) {
      int m;
      for (j = 0; j < NMATCHES_PER_ARRIVAL_V1 && arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].from_slot; j++) {
         int nPrevCost = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].cost;
         int nCodingChoiceCost = nPrevCost + 8 /* literal */;
         int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 1;
         int nNumLiterals = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].num_literals + 1;
         if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
            nCodingChoiceCost += 8;
         }
         if (!nFavorRatio && nNumLiterals == 1)
            nCodingChoiceCost += MODESWITCH_PENALTY;
         for (n = 0; n < NMATCHES_PER_ARRIVAL_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
            lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n];
            if (pDestArrival->from_slot == 0 ||
               nCodingChoiceCost < pDestArrival->cost ||
               (nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
               memmove(&arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n + 1],
                  &arrival[((i + 1) << MATCHES_PER_ARRIVAL_SHIFT) + n],
                  sizeof(lzsa_arrival) * (NMATCHES_PER_ARRIVAL_V1 - n - 1));
               pDestArrival->cost = nCodingChoiceCost;
               pDestArrival->from_pos = i;
               pDestArrival->from_slot = j + 1;
               pDestArrival->match_offset = 0;
               pDestArrival->match_len = 0;
               pDestArrival->num_literals = nNumLiterals;
               pDestArrival->score = nScore;
               pDestArrival->rep_offset = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].rep_offset;
               break;
            }
         }
      }
      const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
      for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
         int nMatchLen = match[m].length;
         int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
         int nStartingMatchLen, k;
         if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
            nMatchLen = nEndOffset - LAST_LITERALS - i;
         if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
            nStartingMatchLen = nMatchLen;
         else
            nStartingMatchLen = nMinMatchSize;
         for (k = nStartingMatchLen; k <= nMatchLen; k++) {
            int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
            for (j = 0; j < NMATCHES_PER_ARRIVAL_V1 && arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].from_slot; j++) {
               int nPrevCost = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].cost;
               int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
               int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 5;
               int exists = 0;
               if (!nFavorRatio && !arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].num_literals)
                  nCodingChoiceCost += MODESWITCH_PENALTY;
               for (n = 0;
                  n < NMATCHES_PER_ARRIVAL_V1 && arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].from_slot && arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].cost <= nCodingChoiceCost;
                  n++) {
                  if (lzsa_get_offset_cost_v1(arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n].rep_offset) == lzsa_get_offset_cost_v1(match[m].offset)) {
                     exists = 1;
                     break;
                  }
               }
               for (n = 0; !exists && n < NMATCHES_PER_ARRIVAL_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
                  lzsa_arrival *pDestArrival = &arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n];
                  if (pDestArrival->from_slot == 0 ||
                     nCodingChoiceCost < pDestArrival->cost ||
                     (nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
                     memmove(&arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n + 1],
                        &arrival[((i + k) << MATCHES_PER_ARRIVAL_SHIFT) + n],
                        sizeof(lzsa_arrival) * (NMATCHES_PER_ARRIVAL_V1 - n - 1));
                     pDestArrival->cost = nCodingChoiceCost;
                     pDestArrival->from_pos = i;
                     pDestArrival->from_slot = j + 1;
                     pDestArrival->match_offset = match[m].offset;
                     pDestArrival->match_len = k;
                     pDestArrival->num_literals = 0;
                     pDestArrival->score = nScore;
                     pDestArrival->rep_offset = match[m].offset;
                     break;
                  }
               }
            }
         }
      }
   }
   lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0];
   while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
      if (end_arrival->from_pos >= nEndOffset) return;
      pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
      pBestMatch[end_arrival->from_pos].offset = end_arrival->match_offset;
      end_arrival = &arrival[(end_arrival->from_pos << MATCHES_PER_ARRIVAL_SHIFT) + (end_arrival->from_slot - 1)];
   }
 }
 /**
 * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
 * impacting the compression ratio
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param pBestMatch optimal matches to emit
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 *
 * @return non-zero if the number of tokens was reduced, 0 if it wasn't
 */
 static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
   int i;
   int nNumLiterals = 0;
   int nDidReduce = 0;
   for (i = nStartOffset; i < nEndOffset; ) {
      lzsa_match *pMatch = pBestMatch + i;
      if (pMatch->length == 0 &&
         (i + 1) < (nEndOffset - LAST_LITERALS) &&
         pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
         pBestMatch[i + 1].length < MAX_VARLEN &&
         pBestMatch[i + 1].offset &&
         i >= pBestMatch[i + 1].offset &&
         (i + pBestMatch[i + 1].length + 1) <= (nEndOffset - LAST_LITERALS) &&
         !memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
         int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
         int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
         if ((nReducedLenSize - nCurLenSize) <= 8) {
            /* Merge */
            pBestMatch[i].length = pBestMatch[i + 1].length + 1;
            pBestMatch[i].offset = pBestMatch[i + 1].offset;
            pBestMatch[i + 1].length = 0;
            pBestMatch[i + 1].offset = 0;
            nDidReduce = 1;
            continue;
         }
      }
      if (pMatch->length >= MIN_MATCH_SIZE_V1) {
         if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
            (i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
            int nNextIndex = i + pMatch->length;
            int nNextLiterals = 0;
            while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < MIN_MATCH_SIZE_V1) {
               nNextLiterals++;
               nNextIndex++;
            }
            /* This command is a match, is followed by 'nNextLiterals' literals and then by another match, or the end of the input. Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
            if ((8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((pMatch->offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1) +
               8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
               (8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
               /* Reduce */
               int nMatchLen = pMatch->length;
               int j;
               for (j = 0; j < nMatchLen; j++) {
                  pBestMatch[i + j].length = 0;
               }
               nDidReduce = 1;
               continue;
            }
         }
         if ((i + pMatch->length) <= nEndOffset && pMatch->offset > 0 && pMatch->length >= MIN_MATCH_SIZE_V1 &&
            pBestMatch[i + pMatch->length].offset > 0 &&
            pBestMatch[i + pMatch->length].length >= MIN_MATCH_SIZE_V1 &&
            (pMatch->length + pBestMatch[i + pMatch->length].length) >= LEAVE_ALONE_MATCH_SIZE &&
            (pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
            (i + pMatch->length) > pMatch->offset &&
            (i + pMatch->length) > pBestMatch[i + pMatch->length].offset &&
            (i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset &&
            !memcmp(pInWindow + i - pMatch->offset + pMatch->length,
               pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset,
               pBestMatch[i + pMatch->length].length)) {
            int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
            nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
            int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
            if (nCurPartialSize >= nReducedPartialSize) {
               int nMatchLen = pMatch->length;
               /* Join */
               pMatch->length += pBestMatch[i + nMatchLen].length;
               pBestMatch[i + nMatchLen].offset = 0;
               pBestMatch[i + nMatchLen].length = -1;
               continue;
            }
         }
         i += pMatch->length;
         nNumLiterals = 0;
      }
      else {
         nNumLiterals++;
         i++;
      }
   }
   return nDidReduce;
 }
 /**
 * Get compressed data block size
 *
 * @param pCompressor compression context
 * @param pBestMatch optimal matches to emit
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 *
 * @return size of compressed data that will be written to output buffer
 */
 static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
   int i;
   int nNumLiterals = 0;
   int nCompressedSize = 0;
   for (i = nStartOffset; i < nEndOffset; ) {
      const lzsa_match *pMatch = pBestMatch + i;
      if (pMatch->length >= MIN_MATCH_SIZE_V1) {
         int nMatchOffset = pMatch->offset;
         int nMatchLen = pMatch->length;
         int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
         int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
         int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
         int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
         int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
         nCompressedSize += nCommandSize;
         nNumLiterals = 0;
         i += nMatchLen;
      }
      else {
         nNumLiterals++;
         i++;
      }
   }
   {
      int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
      int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
      nCompressedSize += nCommandSize;
      nNumLiterals = 0;
   }
   if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
      nCompressedSize += 8 * 4;
   }
   return nCompressedSize;
 }
 /**
 * Emit block of compressed data
 *
 * @param pCompressor compression context
 * @param pBestMatch optimal matches to emit
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 * @param pOutData pointer to output buffer
 * @param nMaxOutDataSize maximum size of output buffer, in bytes
 *
 * @return size of compressed data in output buffer, or -1 if the data is uncompressible
 */
 static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
   int i;
   int nNumLiterals = 0;
   int nInFirstLiteralOffset = 0;
   int nOutOffset = 0;
   for (i = nStartOffset; i < nEndOffset; ) {
      const lzsa_match *pMatch = pBestMatch + i;
      if (pMatch->length >= MIN_MATCH_SIZE_V1) {
         int nMatchOffset = pMatch->offset;
         int nMatchLen = pMatch->length;
         int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
         int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
         int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
         int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
         int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
         if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
            return -1;
         if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
            return -1;
         pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
         nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
         if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
            pCompressor->stats.min_literals = nNumLiterals;
         if (nNumLiterals > pCompressor->stats.max_literals)
            pCompressor->stats.max_literals = nNumLiterals;
         pCompressor->stats.total_literals += nNumLiterals;
         pCompressor->stats.literals_divisor++;
         if (nNumLiterals != 0) {
            memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
            nOutOffset += nNumLiterals;
            nNumLiterals = 0;
         }
         pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
         if (nTokenLongOffset) {
            pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
         }
         nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
         if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
            pCompressor->stats.min_offset = nMatchOffset;
         if (nMatchOffset > pCompressor->stats.max_offset)
            pCompressor->stats.max_offset = nMatchOffset;
         pCompressor->stats.total_offsets += nMatchOffset;
         if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
            pCompressor->stats.min_match_len = nMatchLen;
         if (nMatchLen > pCompressor->stats.max_match_len)
            pCompressor->stats.max_match_len = nMatchLen;
         pCompressor->stats.total_match_lens += nMatchLen;
         pCompressor->stats.match_divisor++;
         if (nMatchOffset == 1) {
            if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
               pCompressor->stats.min_rle1_len = nMatchLen;
            if (nMatchLen > pCompressor->stats.max_rle1_len)
               pCompressor->stats.max_rle1_len = nMatchLen;
            pCompressor->stats.total_rle1_lens += nMatchLen;
            pCompressor->stats.rle1_divisor++;
         }
         else if (nMatchOffset == 2) {
            if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
               pCompressor->stats.min_rle2_len = nMatchLen;
            if (nMatchLen > pCompressor->stats.max_rle2_len)
               pCompressor->stats.max_rle2_len = nMatchLen;
            pCompressor->stats.total_rle2_lens += nMatchLen;
            pCompressor->stats.rle2_divisor++;
         }
         i += nMatchLen;
         if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
            int nCurSafeDist = (i - nStartOffset) - nOutOffset;
            if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
               pCompressor->safe_dist = nCurSafeDist;
         }
         pCompressor->num_commands++;
      }
      else {
         if (nNumLiterals == 0)
            nInFirstLiteralOffset = i;
         nNumLiterals++;
         i++;
      }
   }
   {
      int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
      int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
      if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
         return -1;
      if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
         pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
      else
         pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
      nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
      if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
         pCompressor->stats.min_literals = nNumLiterals;
      if (nNumLiterals > pCompressor->stats.max_literals)
         pCompressor->stats.max_literals = nNumLiterals;
      pCompressor->stats.total_literals += nNumLiterals;
      pCompressor->stats.literals_divisor++;
      if (nNumLiterals != 0) {
         memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
         nOutOffset += nNumLiterals;
         nNumLiterals = 0;
      }
      if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
         int nCurSafeDist = (i - nStartOffset) - nOutOffset;
         if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
            pCompressor->safe_dist = nCurSafeDist;
      }
      pCompressor->num_commands++;
   }
   if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
      /* Emit EOD marker for raw block */
      if ((nOutOffset + 4) > nMaxOutDataSize)
         return -1;
      pOutData[nOutOffset++] = 0;
      pOutData[nOutOffset++] = 238;
      pOutData[nOutOffset++] = 0;
      pOutData[nOutOffset++] = 0;
   }
   return nOutOffset;
 }
 /**
 * Emit raw block of uncompressible data
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 * @param pOutData pointer to output buffer
 * @param nMaxOutDataSize maximum size of output buffer, in bytes
 *
 * @return size of compressed data in output buffer, or -1 if the data is uncompressible
 */
 static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
   int nNumLiterals = nEndOffset - nStartOffset;
   int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
   int nOutOffset = 0;
   int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;
   if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
      return -1;
   pCompressor->num_commands = 0;
   pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
   nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
   if (nNumLiterals != 0) {
      memcpy(pOutData + nOutOffset, pInWindow + nStartOffset, nNumLiterals);
      nOutOffset += nNumLiterals;
      nNumLiterals = 0;
   }
   pCompressor->num_commands++;
   /* Emit EOD marker for raw block */
   pOutData[nOutOffset++] = 0;
   pOutData[nOutOffset++] = 238;
   pOutData[nOutOffset++] = 0;
   pOutData[nOutOffset++] = 0;
   return nOutOffset;
 }
 /**
 * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
 * @param nInDataSize number of input bytes to compress
 * @param pOutData pointer to output buffer
 * @param nMaxOutDataSize maximum size of output buffer, in bytes
 *
 * @return size of compressed data in output buffer, or -1 if the data is uncompressible
 */
 int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
   int nResult, nBaseCompressedSize;
   /* Compress optimally without breaking ties in favor of less tokens */
   memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
   lzsa_optimize_forward_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
   int nDidReduce;
   int nPasses = 0;
   do {
      nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
      nPasses++;
   } while (nDidReduce && nPasses < 20);
   nBaseCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
   lzsa_match *pBestMatch = pCompressor->best_match - nPreviousBlockSize;
   if (nBaseCompressedSize > 0 && nInDataSize < 65536) {
      int nReducedCompressedSize;
      /* Compress optimally and do break ties in favor of less tokens */
      memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
      lzsa_optimize_forward_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
      nPasses = 0;
      do {
         nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
         nPasses++;
      } while (nDidReduce && nPasses < 20);
      nReducedCompressedSize = lzsa_get_compressed_size_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
      if (nReducedCompressedSize > 0 && nReducedCompressedSize <= nBaseCompressedSize) {
         /* Pick the parse with the reduced number of tokens as it didn't negatively affect the size */
         pBestMatch = pCompressor->improved_match - nPreviousBlockSize;
      }
   }
   nResult = lzsa_write_block_v1(pCompressor, pBestMatch, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
   if (nResult < 0 && pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
      nResult = lzsa_write_raw_uncompressed_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
   }
   return nResult;
 }
--- a/Tools/unix/lzsa/src/shrink_block_v1.h
+++ b/Tools/unix/lzsa/src/shrink_block_v1.h
@ -0,0 +1,53 @@
 /*
 * shrink_block_v1.h - LZSA1 block compressor definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _SHRINK_BLOCK_V1_H
 #define _SHRINK_BLOCK_V1_H
 /* Forward declarations */
 typedef struct _lzsa_compressor lzsa_compressor;
 /**
 * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
 * @param nInDataSize number of input bytes to compress
 * @param pOutData pointer to output buffer
 * @param nMaxOutDataSize maximum size of output buffer, in bytes
 *
 * @return size of compressed data in output buffer, or -1 if the data is uncompressible
 */
 int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
 #endif /* _SHRINK_BLOCK_V1_H */
--- a/Tools/unix/lzsa/src/shrink_block_v2.c
+++ b/Tools/unix/lzsa/src/shrink_block_v2.c
--- a/Tools/unix/lzsa/src/shrink_block_v2.h
+++ b/Tools/unix/lzsa/src/shrink_block_v2.h
@ -0,0 +1,53 @@
 /*
 * shrink_block_v2.h - LZSA2 block compressor definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _SHRINK_BLOCK_V2_H
 #define _SHRINK_BLOCK_V2_H
 /* Forward declarations */
 typedef struct _lzsa_compressor lzsa_compressor;
 /**
 * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
 * @param nInDataSize number of input bytes to compress
 * @param pOutData pointer to output buffer
 * @param nMaxOutDataSize maximum size of output buffer, in bytes
 *
 * @return size of compressed data in output buffer, or -1 if the data is uncompressible
 */
 int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
 #endif /* _SHRINK_BLOCK_V2_H */
--- a/Tools/unix/lzsa/src/shrink_context.c
+++ b/Tools/unix/lzsa/src/shrink_context.c
@ -0,0 +1,221 @@
 /*
 * shrink_context.c - compression context implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "shrink_context.h"
 #include "shrink_block_v1.h"
 #include "shrink_block_v2.h"
 #include "format.h"
 #include "matchfinder.h"
 #include "lib.h"
 /**
 * Initialize compression context
 *
 * @param pCompressor compression context to initialize
 * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
 * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
 * @param nFlags compression flags
 *
 * @return 0 for success, non-zero for failure
 */
 int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
   int nResult;
   int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
   int nMaxMinMatchForFormat = (nFormatVersion == 1) ? 5 : 3;
   nResult = divsufsort_init(&pCompressor->divsufsort_context);
   pCompressor->intervals = NULL;
   pCompressor->pos_data = NULL;
   pCompressor->open_intervals = NULL;
   pCompressor->match = NULL;
   pCompressor->best_match = NULL;
   pCompressor->improved_match = NULL;
   pCompressor->arrival = NULL;
   pCompressor->min_match_size = nMinMatchSize;
   if (pCompressor->min_match_size < nMinMatchSizeForFormat)
      pCompressor->min_match_size = nMinMatchSizeForFormat;
   else if (pCompressor->min_match_size > nMaxMinMatchForFormat)
      pCompressor->min_match_size = nMaxMinMatchForFormat;
   pCompressor->format_version = nFormatVersion;
   pCompressor->flags = nFlags;
   pCompressor->safe_dist = 0;
   pCompressor->num_commands = 0;
   memset(&pCompressor->stats, 0, sizeof(pCompressor->stats));
   pCompressor->stats.min_literals = -1;
   pCompressor->stats.min_match_len = -1;
   pCompressor->stats.min_offset = -1;
   pCompressor->stats.min_rle1_len = -1;
   pCompressor->stats.min_rle2_len = -1;
   if (!nResult) {
      pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
      if (pCompressor->intervals) {
         pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
         if (pCompressor->pos_data) {
            pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
            if (pCompressor->open_intervals) {
               pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << MATCHES_PER_ARRIVAL_SHIFT) * sizeof(lzsa_arrival));
               if (pCompressor->arrival) {
                  pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
                  if (pCompressor->best_match) {
                     pCompressor->improved_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
                     if (pCompressor->improved_match) {
                        if (pCompressor->format_version == 2)
                           pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
                        else
                           pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
                        if (pCompressor->match)
                           return 0;
                     }
                  }
               }
            }
         }
      }
   }
   lzsa_compressor_destroy(pCompressor);
   return 100;
 }
 /**
 * Clean up compression context and free up any associated resources
 *
 * @param pCompressor compression context to clean up
 */
 void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
   divsufsort_destroy(&pCompressor->divsufsort_context);
   if (pCompressor->match) {
      free(pCompressor->match);
      pCompressor->match = NULL;
   }
   if (pCompressor->improved_match) {
      free(pCompressor->improved_match);
      pCompressor->improved_match = NULL;
   }
   if (pCompressor->arrival) {
      free(pCompressor->arrival);
      pCompressor->arrival = NULL;
   }
   if (pCompressor->best_match) {
      free(pCompressor->best_match);
      pCompressor->best_match = NULL;
   }
   if (pCompressor->open_intervals) {
      free(pCompressor->open_intervals);
      pCompressor->open_intervals = NULL;
   }
   if (pCompressor->pos_data) {
      free(pCompressor->pos_data);
      pCompressor->pos_data = NULL;
   }
   if (pCompressor->intervals) {
      free(pCompressor->intervals);
      pCompressor->intervals = NULL;
   }
 }
 /**
 * Compress one block of data
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
 * @param nInDataSize number of input bytes to compress
 * @param pOutData pointer to output buffer
 * @param nMaxOutDataSize maximum size of output buffer, in bytes
 *
 * @return size of compressed data in output buffer, or -1 if the data is uncompressible
 */
 int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
   int nCompressedSize;
   if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
      lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
   }
   if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
      nCompressedSize = -1;
   else {
      if (nPreviousBlockSize) {
         lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
      }
      lzsa_find_all_matches(pCompressor, (pCompressor->format_version == 2) ? NMATCHES_PER_INDEX_V2 : NMATCHES_PER_INDEX_V1, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
      if (pCompressor->format_version == 1) {
         nCompressedSize = lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
         if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
            lzsa_reverse_buffer(pOutData, nCompressedSize);
         }
      }
      else if (pCompressor->format_version == 2) {
         nCompressedSize = lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
         if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
            lzsa_reverse_buffer(pOutData, nCompressedSize);
         }
      }
      else {
         nCompressedSize = -1;
      }
   }
   if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
      lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
   }
   return nCompressedSize;
 }
 /**
 * Get the number of compression commands issued in compressed data blocks
 *
 * @return number of commands
 */
 int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor) {
   return pCompressor->num_commands;
 }
--- a/Tools/unix/lzsa/src/shrink_context.h
+++ b/Tools/unix/lzsa/src/shrink_context.h
@ -0,0 +1,183 @@
 /*
 * shrink_context.h - compression context definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _SHRINK_CONTEXT_H
 #define _SHRINK_CONTEXT_H
 #include "divsufsort.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 #define LCP_BITS 14
 #define TAG_BITS 4
 #define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
 #define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
 #define LCP_SHIFT (31-LCP_BITS)
 #define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
 #define POS_MASK ((1U<<LCP_SHIFT) - 1)
 #define VISITED_FLAG 0x80000000
 #define EXCL_VISITED_MASK  0x7fffffff
 #define NMATCHES_PER_ARRIVAL_V1 8
 #define NMATCHES_PER_ARRIVAL_V2_SMALL 9
 #define NMATCHES_PER_ARRIVAL_V2_BIG 32
 #define MATCHES_PER_ARRIVAL_SHIFT 5
 #define NMATCHES_PER_INDEX_V1 8
 #define MATCHES_PER_INDEX_SHIFT_V1 3
 #define NMATCHES_PER_INDEX_V2 64
 #define MATCHES_PER_INDEX_SHIFT_V2 6
 #define LEAVE_ALONE_MATCH_SIZE 300
 #define LEAVE_ALONE_MATCH_SIZE_SMALL 1000
 #define LAST_LITERALS 0
 #define MODESWITCH_PENALTY 3
 /** One match */
 typedef struct _lzsa_match {
   unsigned short length;
   unsigned short offset;
 } lzsa_match;
 /** Forward arrival slot */
 typedef struct {
   int cost;
   unsigned short rep_offset;
   short from_slot;
   int from_pos;
   unsigned short rep_len;
   int rep_pos;
   int num_literals;
   int score;
   unsigned short match_offset;
   unsigned short match_len;
 } lzsa_arrival;
 /** Compression statistics */
 typedef struct _lzsa_stats {
   int min_literals;
   int max_literals;
   int total_literals;
   int min_offset;
   int max_offset;
   int num_rep_offsets;
   int total_offsets;
   int min_match_len;
   int max_match_len;
   int total_match_lens;
   int min_rle1_len;
   int max_rle1_len;
   int total_rle1_lens;
   int min_rle2_len;
   int max_rle2_len;
   int total_rle2_lens;
   int literals_divisor;
   int match_divisor;
   int rle1_divisor;
   int rle2_divisor;
 } lzsa_stats;
 /** Compression context */
 typedef struct _lzsa_compressor {
   divsufsort_ctx_t divsufsort_context;
   unsigned int *intervals;
   unsigned int *pos_data;
   unsigned int *open_intervals;
   lzsa_match *match;
   lzsa_match *best_match;
   lzsa_match *improved_match;
   lzsa_arrival *arrival;
   int min_match_size;
   int format_version;
   int flags;
   int safe_dist;
   int num_commands;
   lzsa_stats stats;
 } lzsa_compressor;
 /**
 * Initialize compression context
 *
 * @param pCompressor compression context to initialize
 * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
 * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
 * @param nFlags compression flags
 *
 * @return 0 for success, non-zero for failure
 */
 int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
 /**
 * Clean up compression context and free up any associated resources
 *
 * @param pCompressor compression context to clean up
 */
 void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
 /**
 * Compress one block of data
 *
 * @param pCompressor compression context
 * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
 * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
 * @param nInDataSize number of input bytes to compress
 * @param pOutData pointer to output buffer
 * @param nMaxOutDataSize maximum size of output buffer, in bytes
 *
 * @return size of compressed data in output buffer, or -1 if the data is uncompressible
 */
 int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
 /**
 * Get the number of compression commands issued in compressed data blocks
 *
 * @return number of commands
 */
 int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _SHRINK_CONTEXT_H */
--- a/Tools/unix/lzsa/src/shrink_inmem.c
+++ b/Tools/unix/lzsa/src/shrink_inmem.c
@ -0,0 +1,185 @@
 /*
 * shrink_inmem.c - in-memory compression implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "shrink_inmem.h"
 #include "shrink_context.h"
 #include "frame.h"
 #include "format.h"
 #include "lib.h"
 /**
 * Get maximum compressed size of input(source) data
 *
 * @param nInputSize input(source) size in bytes
 *
 * @return maximum compressed size
 */
 size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) {
   return lzsa_get_header_size() + ((nInputSize + (BLOCK_SIZE - 1)) >> 16) * lzsa_get_frame_size() + nInputSize + lzsa_get_frame_size() /* footer */;
 }
 /**
 * Compress memory
 *
 * @param pInputData pointer to input(source) data to compress
 * @param pOutBuffer buffer for compressed data
 * @param nInputSize input(source) size in bytes
 * @param nMaxOutBufferSize maximum capacity of compression buffer
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param nMinMatchSize minimum match size
 * @param nFormatVersion version of format to use (1-2)
 *
 * @return actual compressed size, or -1 for error
 */
 size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
                             const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) {
   lzsa_compressor compressor;
   size_t nOriginalSize = 0;
   size_t nCompressedSize = 0L;
   int nResult;
   int nError = 0;
   nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
   if (nResult != 0) {
      return -1;
   }
   if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
      int nHeaderSize = lzsa_encode_header(pOutBuffer, (int)nMaxOutBufferSize, nFormatVersion);
      if (nHeaderSize < 0)
         nError = LZSA_ERROR_COMPRESSION;
      else {
         nCompressedSize += nHeaderSize;
      }
   }
   int nPreviousBlockSize = 0;
   int nNumBlocks = 0;
   while (nOriginalSize < nInputSize && !nError) {
      int nInDataSize;
      nInDataSize = (int)(nInputSize - nOriginalSize);
      if (nInDataSize > BLOCK_SIZE)
         nInDataSize = BLOCK_SIZE;
      if (nInDataSize > 0) {
         if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
            nError = LZSA_ERROR_RAW_TOOLARGE;
            break;
         }
         int nOutDataSize;
         int nOutDataEnd = (int)(nMaxOutBufferSize - (lzsa_get_frame_size() + nCompressedSize + lzsa_get_frame_size() /* footer */));
         int nFrameSize = lzsa_get_frame_size();
         if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
            nFrameSize = 0;
            nOutDataEnd = (int)(nMaxOutBufferSize - nCompressedSize);
         }
         if (nOutDataEnd > BLOCK_SIZE)
            nOutDataEnd = BLOCK_SIZE;
         nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + nFrameSize + nCompressedSize, nOutDataEnd);
         if (nOutDataSize >= 0) {
            /* Write compressed block */
            if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
               int nBlockheaderSize = lzsa_encode_compressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nOutDataSize);
               if (nBlockheaderSize < 0)
                  nError = LZSA_ERROR_COMPRESSION;
               else {
                  nCompressedSize += nBlockheaderSize;
               }
            }
            if (!nError) {
               nOriginalSize += nInDataSize;
               nCompressedSize += nOutDataSize;
            }
         }
         else {
            /* Write uncompressible, literal block */
            if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
               nError = LZSA_ERROR_RAW_UNCOMPRESSED;
               break;
            }
            int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nInDataSize);
            if (nBlockheaderSize < 0)
               nError = LZSA_ERROR_COMPRESSION;
            else {
               if ((size_t)nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
                  nError = LZSA_ERROR_DST;
               else {
                  memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);
                  nOriginalSize += nInDataSize;
                  nCompressedSize += nBlockheaderSize + nInDataSize;
               }
            }
         }
         nPreviousBlockSize = nInDataSize;
         nNumBlocks++;
      }
   }
   if (!nError) {
      int nFooterSize;
      if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
         nFooterSize = 0;
      }
      else {
         nFooterSize = lzsa_encode_footer_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize));
         if (nFooterSize < 0)
            nError = LZSA_ERROR_COMPRESSION;
      }
      nCompressedSize += nFooterSize;
   }
   lzsa_compressor_destroy(&compressor);
   if (nError) {
      return -1;
   }
   else {
      return nCompressedSize;
   }
 }
--- a/Tools/unix/lzsa/src/shrink_inmem.h
+++ b/Tools/unix/lzsa/src/shrink_inmem.h
@ -0,0 +1,71 @@
 /*
 * shrink_inmem.h - in-memory compression definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _SHRINK_INMEM_H
 #define _SHRINK_INMEM_H
 #include <stdlib.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Get maximum compressed size of input(source) data
 *
 * @param nInputSize input(source) size in bytes
 *
 * @return maximum compressed size
 */
 size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
 /**
 * Compress memory
 *
 * @param pInputData pointer to input(source) data to compress
 * @param pOutBuffer buffer for compressed data
 * @param nInputSize input(source) size in bytes
 * @param nMaxOutBufferSize maximum capacity of compression buffer
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param nMinMatchSize minimum match size
 * @param nFormatVersion version of format to use (1-2)
 *
 * @return actual compressed size, or -1 for error
 */
 size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _SHRINK_INMEM_H */
--- a/Tools/unix/lzsa/src/shrink_streaming.c
+++ b/Tools/unix/lzsa/src/shrink_streaming.c
@ -0,0 +1,320 @@
 /*
 * shrink_streaming.c - streaming compression implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdlib.h>
 #include <string.h>
 #include "shrink_streaming.h"
 #include "format.h"
 #include "frame.h"
 #include "lib.h"
 #ifdef _WIN32
 #include <windows.h>
 #else
 #include <stdio.h>
 #endif
 /**
 * Delete file
 *
 * @param pszInFilename name of file to delete
 */
 static void lzsa_delete_file(const char *pszInFilename) {
 #ifdef _WIN32
   DeleteFileA(pszInFilename);
 #else
   remove(pszInFilename);
 #endif
 }
 /*-------------- File API -------------- */
 /**
 * Compress file
 *
 * @param pszInFilename name of input(source) file to compress
 * @param pszOutFilename name of output(compressed) file to generate
 * @param pszDictionaryFilename name of dictionary file, or NULL for none
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param nMinMatchSize minimum match size
 * @param nFormatVersion version of format to use (1-2)
 * @param progress progress function, called after compressing each block, or NULL for none
 * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
 * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
 * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
 * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
 * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
      void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
   lzsa_stream_t inStream, outStream;
   void *pDictionaryData = NULL;
   int nDictionaryDataSize = 0;
   lzsa_status_t nStatus;
   if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
      return LZSA_ERROR_SRC;
   }
   if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
      inStream.close(&inStream);
      return LZSA_ERROR_DST;
   }
   nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
   if (nStatus) {
      outStream.close(&outStream);
      inStream.close(&inStream);
      lzsa_delete_file(pszOutFilename);
      return nStatus;
   }
   nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist, pStats);
   lzsa_dictionary_free(&pDictionaryData);
   outStream.close(&outStream);
   inStream.close(&inStream);
   if (nStatus) {
      lzsa_delete_file(pszOutFilename);
   }
   return nStatus;
 }
 /*-------------- Streaming API -------------- */
 /**
 * Compress stream
 *
 * @param pInStream input(source) stream to compress
 * @param pOutStream output(compressed) stream to write to
 * @param pDictionaryData dictionary contents, or NULL for none
 * @param nDictionaryDataSize size of dictionary contents, or 0
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param nMinMatchSize minimum match size
 * @param nFormatVersion version of format to use (1-2)
 * @param progress progress function, called after compressing each block, or NULL for none
 * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
 * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
 * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
 * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
 * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
                                   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
                                   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
   unsigned char *pInData, *pOutData;
   lzsa_compressor compressor;
   long long nOriginalSize = 0LL, nCompressedSize = 0LL;
   int nResult;
   unsigned char cFrameData[16];
   int nError = 0;
   int nRawPadding = (nFlags & LZSA_FLAG_RAW_BLOCK) ? 8 : 0;
   pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
   if (!pInData) {
      return LZSA_ERROR_MEMORY;
   }
   memset(pInData, 0, BLOCK_SIZE * 2);
   pOutData = (unsigned char*)malloc(BLOCK_SIZE);
   if (!pOutData) {
      free(pInData);
      pInData = NULL;
      return LZSA_ERROR_MEMORY;
   }
   memset(pOutData, 0, BLOCK_SIZE);
   nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
   if (nResult != 0) {
      free(pOutData);
      pOutData = NULL;
      free(pInData);
      pInData = NULL;
      return LZSA_ERROR_MEMORY;
   }
   if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
      int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
      if (nHeaderSize < 0)
         nError = LZSA_ERROR_COMPRESSION;
      else {
         if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
            nError = LZSA_ERROR_DST;
         nCompressedSize += (long long)nHeaderSize;
      }
   }
   int nPreviousBlockSize = 0;
   int nNumBlocks = 0;
   while (!pInStream->eof(pInStream) && !nError) {
      int nInDataSize;
      if (nPreviousBlockSize) {
         memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
      }
      else if (nDictionaryDataSize && pDictionaryData) {
         nPreviousBlockSize = nDictionaryDataSize;
         memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize);
      }
      nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE);
      if (nInDataSize > 0) {
         if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
            nError = LZSA_ERROR_RAW_TOOLARGE;
            break;
         }
         nDictionaryDataSize = 0;
         int nOutDataSize;
         nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, ((nInDataSize + nRawPadding) >= BLOCK_SIZE) ? BLOCK_SIZE : (nInDataSize + nRawPadding));
         if (nOutDataSize >= 0) {
            /* Write compressed block */
            if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
               int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize);
               if (nBlockheaderSize < 0)
                  nError = LZSA_ERROR_COMPRESSION;
               else {
                  nCompressedSize += (long long)nBlockheaderSize;
                  if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
                     nError = LZSA_ERROR_DST;
                  }
               }
            }
            if (!nError) {
               if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) {
                  nError = LZSA_ERROR_DST;
               }
               else {
                  nOriginalSize += (long long)nInDataSize;
                  nCompressedSize += (long long)nOutDataSize;
               }
            }
         }
         else {
            /* Write uncompressible, literal block */
            if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
               nError = LZSA_ERROR_RAW_UNCOMPRESSED;
               break;
            }
            int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
            if (nBlockheaderSize < 0)
               nError = LZSA_ERROR_COMPRESSION;
            else {
               if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
                  nError = LZSA_ERROR_DST;
               }
               else {
                  if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) {
                     nError = LZSA_ERROR_DST;
                  }
                  else {
                     nOriginalSize += (long long)nInDataSize;
                     nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
                  }
               }
            }
         }
         nPreviousBlockSize = nInDataSize;
         nNumBlocks++;
      }
      if (!nError && !pInStream->eof(pInStream)) {
         if (progress)
            progress(nOriginalSize, nCompressedSize);
      }
   }
   if (!nError) {
      int nFooterSize;
      if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
         nFooterSize = 0;
      }
      else {
         nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
         if (nFooterSize < 0)
            nError = LZSA_ERROR_COMPRESSION;
      }
      if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize)
         nError = LZSA_ERROR_DST;
      nCompressedSize += (long long)nFooterSize;
   }
   if (progress)
      progress(nOriginalSize, nCompressedSize);
   int nCommandCount = lzsa_compressor_get_command_count(&compressor);
   int nSafeDist = compressor.safe_dist;
   if (pStats)
      *pStats = compressor.stats;
   lzsa_compressor_destroy(&compressor);
   free(pOutData);
   pOutData = NULL;
   free(pInData);
   pInData = NULL;
   if (nError) {
      return nError;
   }
   else {
      if (pOriginalSize)
         *pOriginalSize = nOriginalSize;
      if (pCompressedSize)
         *pCompressedSize = nCompressedSize;
      if (pCommandCount)
         *pCommandCount = nCommandCount;
      if (pSafeDist)
         *pSafeDist = nSafeDist;
      return LZSA_OK;
   }
 }
--- a/Tools/unix/lzsa/src/shrink_streaming.h
+++ b/Tools/unix/lzsa/src/shrink_streaming.h
@ -0,0 +1,99 @@
 /*
 * shrink_streaming.h - streaming compression definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _SHRINK_STREAMING_H
 #define _SHRINK_STREAMING_H
 #include "stream.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Forward declaration */
 typedef enum _lzsa_status_t lzsa_status_t;
 typedef struct _lzsa_stats lzsa_stats;
 /*-------------- File API -------------- */
 /**
 * Compress file
 *
 * @param pszInFilename name of input(source) file to compress
 * @param pszOutFilename name of output(compressed) file to generate
 * @param pszDictionaryFilename name of dictionary file, or NULL for none
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param nMinMatchSize minimum match size
 * @param nFormatVersion version of format to use (1-2)
 * @param progress progress function, called after compressing each block, or NULL for none
 * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
 * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
 * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
 * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
 * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
 /*-------------- Streaming API -------------- */
 /**
 * Compress stream
 *
 * @param pInStream input(source) stream to compress
 * @param pOutStream output(compressed) stream to write to
 * @param pDictionaryData dictionary contents, or NULL for none
 * @param nDictionaryDataSize size of dictionary contents, or 0
 * @param nFlags compression flags (LZSA_FLAG_xxx)
 * @param nMinMatchSize minimum match size
 * @param nFormatVersion version of format to use (1-2)
 * @param progress progress function, called after compressing each block, or NULL for none
 * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
 * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
 * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
 * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
 * @param pStats pointer to compression stats that are filled if this function is successful, or NULL
 *
 * @return LZSA_OK for success, or an error value from lzsa_status_t
 */
 lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
   const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
   void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _SHRINK_STREAMING_H */
--- a/Tools/unix/lzsa/src/stream.c
+++ b/Tools/unix/lzsa/src/stream.c
@ -0,0 +1,111 @@
 /*
 * stream.c - streaming I/O  implementation
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "stream.h"
 /**
 * Close file stream
 *
 * @param stream stream
 */
 static void lzsa_filestream_close(lzsa_stream_t *stream) {
   if (stream->obj) {
      fclose((FILE*)stream->obj);
      stream->obj = NULL;
      stream->read = NULL;
      stream->write = NULL;
      stream->eof = NULL;
      stream->close = NULL;
   }
 }
 /**
 * Read from file stream
 *
 * @param stream stream
 * @param ptr buffer to read into
 * @param size number of bytes to read
 *
 * @return number of bytes read
 */
 static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size) {
   return fread(ptr, 1, size, (FILE*)stream->obj);
 }
 /**
 * Write to file stream
 *
 * @param stream stream
 * @param ptr buffer to write from
 * @param size number of bytes to write
 *
 * @return number of bytes written
 */
 static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t size) {
   return fwrite(ptr, 1, size, (FILE*)stream->obj);
 }
 /**
 * Check if file stream has reached the end of the data
 *
 * @param stream stream
 *
 * @return nonzero if the end of the data has been reached, 0 if there is more data
 */
 static int lzsa_filestream_eof(lzsa_stream_t *stream) {
   return feof((FILE*)stream->obj);
 }
 /**
 * Open file and create an I/O stream from it
 *
 * @param stream stream to fill out
 * @param pszInFilename filename
 * @param pszMode open mode, as with fopen()
 *
 * @return 0 for success, nonzero for failure
 */
 int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode) {
   stream->obj = (void*)fopen(pszInFilename, pszMode);
   if (stream->obj) {
      stream->read = lzsa_filestream_read;
      stream->write = lzsa_filestream_write;
      stream->eof = lzsa_filestream_eof;
      stream->close = lzsa_filestream_close;
      return 0;
   }
   else
      return -1;
 }
--- a/Tools/unix/lzsa/src/stream.h
+++ b/Tools/unix/lzsa/src/stream.h
@ -0,0 +1,103 @@
 /*
 * stream.h - streaming I/O definitions
 *
 * Copyright (C) 2019 Emmanuel Marty
 *
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 *
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 *
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 */
 /*
 * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
 *
 * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
 * With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
 * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
 *
 */
 #ifndef _STREAM_H
 #define _STREAM_H
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Forward declaration */
 typedef struct _lzsa_stream_t lzsa_stream_t;
 /* I/O stream */
 typedef struct _lzsa_stream_t {
   /** Opaque stream-specific pointer */
   void *obj;
   /**
    * Read from stream
    *
    * @param stream stream
    * @param ptr buffer to read into
    * @param size number of bytes to read
    *
    * @return number of bytes read
    */
   size_t(*read)(lzsa_stream_t *stream, void *ptr, size_t size);
   /**
    * Write to stream
    *
    * @param stream stream
    * @param ptr buffer to write from
    * @param size number of bytes to write
    *
    * @return number of bytes written
    */
   size_t(*write)(lzsa_stream_t *stream, void *ptr, size_t size);
   /**
    * Check if stream has reached the end of the data
    *
    * @param stream stream
    *
    * @return nonzero if the end of the data has been reached, 0 if there is more data
    */
   int(*eof)(lzsa_stream_t *stream);
   /**
    * Close stream
    *
    * @param stream stream
    */
   void(*close)(lzsa_stream_t *stream);
 } lzsa_stream_t;
 /**
 * Open file and create an I/O stream from it
 *
 * @param stream stream to fill out
 * @param pszInFilename filename
 * @param pszMode open mode, as with fopen()
 *
 * @return 0 for success, nonzero for failure
 */
 int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode);
 #ifdef __cplusplus
 }
 #endif
 #endif /* _STREAM_H */
	`@ -0,0 +1,3 @@`
					`The LZSA code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license.`

					`Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information.`