forked from synopse/mORMot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SynLZO.pas
2000 lines (1893 loc) · 67.9 KB
/
SynLZO.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/// fast LZO Compression routines
// - licensed under a MPL/GPL/LGPL tri-license; version 1.13
unit SynLZO;
{
This file is part of Synopse LZO Compression.
Synopse LZO Compression. Copyright (C) 2017 Arnaud Bouchez
Synopse Informatique - https://synopse.info
*** BEGIN LICENSE BLOCK *****
Version: MPL 1.1/GPL 2.0/LGPL 2.1
The contents of this file are subject to the Mozilla Public License Version
1.1 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.mozilla.org/MPL
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
for the specific language governing rights and limitations under the License.
The Original Code is Synopse LZO Compression.
The Initial Developer of the Original Code is Arnaud Bouchez.
Portions created by the Initial Developer are Copyright (C) 2017
the Initial Developer. All Rights Reserved.
Contributor(s):
Alternatively, the contents of this file may be used under the terms of
either the GNU General Public License Version 2 or later (the "GPL"), or
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
in which case the provisions of the GPL or the LGPL are applicable instead
of those above. If you wish to allow use of your version of this file only
under the terms of either the GPL or the LGPL, and not to allow others to
use your version of this file under the terms of the MPL, indicate your
decision by deleting the provisions above and replace them with the notice
and other provisions required by the GPL or the LGPL. If you do not delete
the provisions above, a recipient may use your version of this file under
the terms of any one of the MPL, the GPL or the LGPL.
***** END LICENSE BLOCK *****
Pascal SynLZO Compression / Decompression library
=================================================
by Arnaud Bouchez http://bouchez.info
* SynLZO is a very FAST portable lossless data compression library
written in optimized pascal code for Delphi 3 up to Delphi 2009
with a tuned asm version available
* offers *extremely* fast compression and decompression
with good compression rate, in comparaison with its speed
* original LZO written in ANSI C - pascal+asm conversion by A.Bouchez
* simple but very fast direct file compression:
SynLZO compressed files read/write is faster than copying plain files!
SynLZO implements a number of algorithms with the following features:
* hashing+dictionary compression in one pass, with no huffman table
* in-memory compression (the dictionary is the input stream itself)
* Decompression is simple and *very* fast
* Algorithm is thread safe
* Algorithm is lossless
* supports overlapping compression and in-place decompression
* direct file compression/decompression using memory mapped files
LZO and the LZO algorithms and implementations are distributed under the terms
of the GNU General Public License (GPL)
(c)1996-2008 Markus F.X.J. Oberhumer http://www.oberhumer.com/opensource/lzo
Delphi/Pascal/asm code conversion (c)2008 Arnaud Bouchez http://bouchez.info
This unit is a full pascal conversion of the lzo algorigthm, by A. Bouchez.
Speed was the goal here, the code is very optimized but not stylish.
The porting was done from the original lzo files, rather than minilzo,
with some enhancements and adaptation for the Delphi pascal compiler.
A special version of this unit was optimized deep in asm code.
Conversion notes:
- this format is NOT stream compatible with any lzo* official format
=> use it internally in your application, not as exchange format
- very small code size (less than 1KB for both compressor/decompressor)
- the uncompressed data length is stored in the beginning of the stream
and can be retrieved easily for proper out_p memory allocation
- compression is 10 times faster than zip, decompression 4 times
- same compression ratio as original C-lzo, ie somewhat decent vs its speed
- this implementation is faster than the original lzo code
- please give correct data to the decompressor (i.e. first CRC in_p data)
=> we recommend our very fast Adler32 procedures from SynCrypto
- up to 3 bytes overflow may be written in the decompression out_p buffer
=> but the GetMem() allocation routine allows this directly
- the code is heavily commented, in order to understand the algorithm
=> it's not obvious to follow, but much easier than the original C code!
=> the unused M1 case was removed from decompressor
- tested and benchmarked with a lot of data types/sizes
=> use the asm code, which is very tuned ($define USEASM)
- the code may be compiled under 64 bits FPC (implemented but not tested)
- tested under Delphi 7, Delphi 2007 and Delphi 2009
- if you include it in your application, please give me some credits:
"use SynLZO compression by http://bouchez.info"
- use at your own risk!
Some benchmark are available at the end of the file (on my Turion TL-65 1.8Ghz)
The results show that our SynLZO implementation is often faster than original
LZO C implementation, especially for compression.
It's a must-have for compression speed, regarding ZIP (compression is
10 times faster, decompression 4 times, with lower compression rate) or others.
You can notice some speed enhancements in the compiler from Delphi 7 to 2009:
the D2009 optimizer deals better with register assignment and 8bits-32bits
conversion. Note the pascal code was optimized by looking at the generated
asm (via Alt-F2), and by proper profiling. D2009 use fpu fast move(), and my
D7 version use the FastCode SSE move() version (which may be blazzing fast
on the upcoming CPUs, with true 128bits memory access - until now, the
use of 128bits registers in the move() code were not faster than 64bits FPU).
=> for the first time, I consider leaving D7 for the new D2009 IDE... there's
a lot to refactor (string->ansistring, PChar->PAnsiChar) in the code,
but it may be worth it, since quite all my code is test-driven, so that I can
be sure the conversion is ok. My only negative impact is that I can't use the
language enhancements if I want to be compatible with CrossKylix, which I
would like to, in order to be able to compile for Linux target. But the new
D2009 IDE is great, even if it seems a bit slow compared to the old D7 one.
And don't speak about the MSHelp system used in Delphi 2009: it's a bulky
piece of software - such coders should be hang. :)
=> an optimized asm version, derivating from D2009 generated code, is provided
and is to be used - this asm version is 100% compatible with the pascal code
Revision history
Version 1.6
- first release, associated with the main Synopse SQLite3 framework
Version 1.13
- code modifications to compile with Delphi 5 compiler
- comment refactoring (mostly for inclusion in SynProject documentation)
- new CompressSynLZO function, for THttpSocket.RegisterCompress - those
functions will return 'synlzo' as ACCEPT-ENCODING: HTTP header parameter
}
interface
{$I Synopse.inc}
uses
Classes;
{.$define LZOFILE}
{ attempt to use in-place file compression using memory mapping mechanism
-> still not fully functional -> so do not define }
/// get maximum possible (worse) compressed size for out_p
function lzopas_compressdestlen(in_len: integer): integer;
/// compress in_p(in_len) into out_p
// - out_p must be at least lzopas_compressdestlen(in_len) bytes long
// - returns compressed size in out_p
function lzopas_compress(in_p: PAnsiChar; in_len: integer; out_p: PAnsiChar): integer;
/// get uncompressed size from lzo-compressed buffer (to reserve memory, e.g.)
function lzopas_decompressdestlen(in_p: PAnsiChar): integer;
/// uncompress in_p(in_len) into out_p (must be allocated before call), returns out_len
// - may write up to out_len+3 bytes in out_p
// - the decompression mode is "fast-unsafe" -> CRC/Adler32 in_p data before call
function lzopas_decompress(in_p: PAnsiChar; in_len: integer; out_p: PAnsiChar): Integer;
/// (de)compress a data content using the SynLZO algorithm
// - as expected by THttpSocket.RegisterCompress
// - will return 'synlzo' as ACCEPT-ENCODING: header parameter
// - will store a hash of both compressed and uncompressed stream: if the
// data is corrupted during transmission, will instantly return ''
function CompressSynLZO(var Data: AnsiString; Compress: boolean): AnsiString;
{$ifdef LZOFILE}
{$ifdef WIN32}
// file compression functions using fast SynLZO library (up to 2GB file size)
// - if you are dealing directly with file compression, this is where to begin
// - SynLZO compressed files read/write is faster than copying plain files :)
type
TCompressFunc = function(in_p: PAnsiChar; in_len: integer; out_p: PAnsiChar): integer;
TCompressLen = function(in_len: integer): integer;
function lzopas_compressfile(const srcFile, dstFile: AnsiString; dstSize: PInteger=nil;
methodLen: TCompressLen=nil; methodComp: TCompressFunc=nil): boolean;
// do the magic: srcFile is compressed into dstFile
// true if sucess; on error, return false and error code in GetLastError
function lzopas_decompressfile(const srcFile, dstFile: AnsiString): boolean;
// uncompress srcFile into dstFile (with checksum and file age restore)
// true if sucess; on error, return false and error code in GetLastError
function lzopas_decompressfilesize(const srcFile: AnsiString): integer;
// -1 if error, decompressed size otherwise
function lzopas_decompressfilecheck(const srcFile: AnsiString): boolean;
// true if file checksum is correct (no transmission fail, e.g.)
{$endif}
{$endif LZOFILE}
implementation
{$ifndef CPUARM}
{$ifndef CPUX64}
{$ifndef CPU64}
{$ifndef BSD}
{$define USEASM}
// if defined, a hand-tuned asm compression code (derivating from one generated
// by Delphi 2009) will be used instead of the slower Delphi3-2007 code
{$endif}
{$endif}
{$endif}
{$endif}
{$ifdef MSWINDOWS}
uses
Windows, SysUtils;
{$endif}
{$ifndef FPC}
type
{$ifdef CPU64}
PtrInt = NativeInt;
PtrUInt = NativeUInt;
{$else}
PtrInt = integer;
PtrUInt = cardinal;
{$endif}
{$ifdef DELPHI5OROLDER}
// Delphi 5 doesn't have those base types defined :(
PCardinal = ^Cardinal;
IntegerArray = array[0..$effffff] of Integer;
PIntegerArray = ^IntegerArray;
{$endif}
{$else}
{$ifdef CPU64}
// CPU64 note: integer must be 4 bytes, word 2 bytes; use of PtrInt for common integer
{$define USEOFFSET}
// the PAnsiPChar hashing trick used in the original lzo algo is not 64bits enabled
// -> define this in order to use 64bits-compatible hashing code (same speed)
// -> it will store offset from in_p beginning, instead of direct adress
// it doesn't change the lzo stream format, only internal hashing dictionary
{$endif}
{$endif}
// for Delphi 2009 compatibility, we use PAnsiChar instead of PChar
// (which uses WideChar in D2009) - this code remain backward compatible with D3-2007
{.$define WITHM1}
// this was an attempt to use M1 (ip<#16) for storing offset $C000..$FFFF
// => some confusion on decompression, and only matters with huge files
// => leave this flag undefined
const
// used for offset storage (M2..M4 patterns, M1 is deprecated)
M2_MAX_LEN = 8;
M3_MAX_LEN = 33;
M4_MAX_LEN = 9;
M4_MARKER = 16;
M3_MARKER = M4_MARKER+16;
M2_MARKER = M3_MARKER+32;
M2_MAX_OFFSET = $0800; // = 1 shl 11 (M2 stores offs0..11)
M3_MAX_OFFSET = $4000; // = 1 shl 14 (M3 stores offs0..14)
M4_MAX_OFFSET = $C000;
{$ifdef WITHM1}
MAX_OFFSET = $FFFF;
{$else}
MAX_OFFSET = M4_MAX_OFFSET-1;
{$endif}
M4_OFF_BITS = 11;
M4_MASK = 7 shl M4_OFF_BITS;
// used for hashing
D_BITS = 14;
D_MASK = (1 shl D_BITS) - 1;
D_HIGH = (D_MASK shr 1)+1;
D_MUL_SHIFT = 5;
D_MUL = (1 shl D_MUL_SHIFT)+1;
{.$define WT}
// if defined, some more test are introduced during compression
// i.e. in some rare cases, we should overlap bytes during decompression
// {$define WT} will check for t=m_off>* every time we test a m_pos[] value
// this is only necessary if move() is not byte-oriented
// if you get some errors (with FPC on CPU other than i386, e.g.), try enable it
// => the compression ratio will be somewhat lower, and decompression slower
// => with the present source code under Delphi, it's NOT necessary AT ALL
// => the movechars() procedure is called only when overlap is possible
// => another way for being 100% sure is to use the safe movechars() only
// => never change the movechars() procedure below
procedure movechars(s,d: PAnsiChar; t: PtrInt);
// very fast code for unaligned and overlapping (see {$define WT}) small blocks
// this code is sometimes used rather than system.Move() by decompress()
var i: PtrInt;
begin
for i := 1 to t do begin
d^ := s^;
inc(d);
inc(s);
end;
end;
function _lzo1x_1_do_compress(in_p: PAnsiChar; in_len: PtrInt;
out_p: PAnsiChar; out out_len: integer): integer;
{$ifdef USEASM}
// code below was extracted from the pascal code generated by Delphi 2009
// after some manual further optimization of the asm, here's something fast...
// faster than the original lzo asm or the most optimized C code generated I got
asm
push eax
mov eax, 16
@@1759: add esp, -4092
push eax
dec eax
jnz @@1759
mov eax, [ebp-4H]
add esp, -24
push ebx
push esi
push edi
mov [ebp-8H], ecx
mov [ebp-4H], edx
mov ebx, eax
mov eax, [ebp-4H]
add eax, ebx
mov [ebp-0CH], eax
mov eax, [ebp-0CH]
sub eax, 9
mov [ebp-10H], eax
mov [ebp-14H], ebx
add ebx, 4
mov eax, [ebp-8H]
mov [ebp-18H], eax
lea eax, [ebp-1001CH]
xor ecx, ecx
mov edx, 65536
call System.@FillChar
jmp @@1760
nop;nop;nop;nop;nop;nop;nop;nop;nop;nop;nop
@@1760: movzx eax, byte ptr [ebx+3]
movzx edx, byte ptr [ebx+2]
shl eax, 6
movzx ecx, byte ptr [ebx+1]
xor eax, edx
movzx edx, byte ptr [ebx]
shl eax, 5
xor eax, ecx
shl eax, 5
xor eax, edx
mov edx, eax
shl eax, 5
add eax, edx
shr eax, 5
and eax, 3FFFH
mov edx,[ebp+eax*4-1001CH]
test edx, edx
jnz @@1762
@@1761: mov [ebp+eax*4-1001CH], ebx
inc ebx
cmp ebx, [ebp-10H]
jc @@1760
jmp @@1788
nop;nop;nop;nop;nop;nop
@@1762: mov esi, ebx
mov edi, edx
sub esi, edx
cmp esi, 49151
jg @@1761
cmp esi, 2048
jle @@1763
mov cl, [ebx+3H]
cmp cl, [edi+3H]
jz @@1763
and eax, 7FFH
xor eax, 201FH
mov edx, [ebp+eax*4-1001CH]
test edx, edx
mov edi, edx
mov esi, ebx
jz @@1761
sub esi, edi
cmp esi, 49151
jg @@1761
cmp esi, 2048
jle @@1763
cmp cl, [edi+3H]
jnz @@1761
@@1763: mov edx, [edi]
cmp dx, word ptr [ebx]
jnz @@1761
shr edx,16
cmp dl, [ebx+2H]
jnz @@1761
mov [ebp+eax*4-1001CH], ebx
mov eax, ebx
sub eax, [ebp-14H]
je @@1768
cmp eax, 3
jg @@1764
mov ecx, [ebp-8H]
add [ebp-8H], eax
mov edx, [ebp-14H]
add [ebp-14H], eax
mov edx, [edx]
or [ecx-2], al
mov [ecx], edx
jmp @@1768
@@1764: cmp eax, 18
jg @@1765
mov ecx,[ebp-8H]
lea eax,[eax-3]
mov [ecx], al
mov edx,[ebp-14H]
inc ecx
push ebx
mov ebx,[edx]
mov [ecx],ebx
dec eax
lea edx,[edx+4]
lea ecx,[ecx+4]
jz @@0
@@1: mov bl,[edx]
mov [ecx],bl
dec eax
lea edx,[edx+1]
lea ecx,[ecx+1]
jnz @@1
@@0: pop ebx
mov [ebp-8H], ecx
mov [ebp-14H], edx
jmp @@1768
@@1765: mov edx, eax
sub edx, 18
mov [ebp-1CH], edx
mov edx, [ebp-8H]
mov byte ptr [edx], 0
inc dword ptr [ebp-8H]
cmp dword ptr [ebp-1CH], 255
jle @@1767
@@1766: sub dword ptr [ebp-1CH], 255
mov edx, [ebp-8H]
mov byte ptr [edx], 0
inc dword ptr [ebp-8H]
cmp dword ptr [ebp-1CH], 255
jg @@1766
@@1767: mov dl, [ebp-1CH]
mov ecx, [ebp-8H]
mov [ecx], dl
inc dword ptr [ebp-8H]
mov edx, [ebp-8H]
mov ecx, [ebp-14H]
xchg ecx, eax
call move
mov eax, ebx
sub eax, [ebp-14H]
add [ebp-8H], eax
add [ebp-14H], eax
@@1768: mov eax,[edi+3H]
mov ecx,[ebx+3H]
cmp al,cl
jne @@1784
cmp ah,ch
jne @@1783
shr eax,16
shr ecx,16
cmp al,cl
jne @@1782
cmp ah,ch
jne @@1781
mov ax,[edi+7H]
mov cx,[ebx+7H]
cmp al,cl
jne @@1780
cmp ah,ch
jne @@1779
add ebx,9
mov eax, [ebp-0CH]
add edi,9
cmp eax, ebx
jbe @@1771
mov dl, [edi]
cmp dl, [ebx]
jnz @@1771
@@1769: inc ebx
inc edi
cmp eax, ebx
jbe @@1771
mov dl, [edi]
cmp dl, [ebx]
jz @@1769
@@1771: mov eax, ebx
sub eax, [ebp-14H]
cmp esi, 16384
jg @@1773
dec esi
cmp eax, 33
jg @@1772
lea esi,[esi*4]
sub eax, 2
mov edx, [ebp-8H]
or eax, 20H
mov word ptr [edx+1], si
mov [edx], al
add edx, 3
mov [ebp-14H], ebx
cmp ebx, [ebp-10H]
mov [ebp-8H],edx
jc @@1760
jmp @@1788
@@1772: sub eax, 33
mov edx, [ebp-8H]
mov byte ptr [edx], 32
jmp @@1775
@@1773: sub esi, 16384
cmp eax, 9
jg @@1774
mov edx, esi
and edx, 4000H
shr edx, 11
or dl, 10H
sub al, 2
or dl, al
mov eax, [ebp-8H]
mov [eax], dl
inc dword ptr [ebp-8H]
jmp @@1778
@@1774: sub eax, 9
mov edx, esi
and edx, 4000H
shr edx, 11
or dl, 10H
mov ecx, [ebp-8H]
mov [ecx], dl
@@1775: inc dword ptr [ebp-8H]
cmp eax, 255
jle @@1777
@@1776: sub eax, 255
mov edx, [ebp-8H]
mov byte ptr [edx], 0
inc dword ptr [ebp-8H]
cmp eax, 255
jg @@1776
@@1777: mov edx, [ebp-8H]
mov [edx], al
inc dword ptr [ebp-8H]
@@1778: lea esi,[esi*4]
mov eax, [ebp-8H]
add dword ptr [ebp-8H], 2
mov word ptr [eax], si
mov [ebp-14H], ebx
cmp ebx, [ebp-10H]
jc @@1760
jmp @@1788
@@1779: inc ebx
@@1780: inc ebx
@@1781: inc ebx
@@1782: inc ebx
@@1783: inc ebx
@@1784: add ebx, 3
@@1785: cmp esi, 2048
jg @@1786
dec esi
mov edx, esi
shr esi, 3
mov ecx, [ebp-14H]
and edx, 07H
lea eax, [ebx-1]
shl esi, 8
sub eax, ecx
lea edx,[edx*4]
shl eax, 5
or eax, edx
mov edx, [ebp-8H]
or eax, esi
add dword ptr [ebp-8H], 2
mov [edx], ax
mov [ebp-14H], ebx
cmp ebx, [ebp-10H]
jc @@1760
jmp @@1788
@@1786: cmp esi, 16384
jg @@1787
lea eax, [ebx-2]
dec esi
sub eax, [ebp-14H]
shl esi, 10
or eax, 20H
mov edx, [ebp-8H]
or eax, esi
add dword ptr [ebp-8H], 3
mov [edx], eax
mov [ebp-14H], ebx
cmp ebx, [ebp-10H]
jc @@1760
jmp @@1788
@@1787: sub esi, 16384
lea eax, [ebx-2]
mov edx, esi
sub eax, [ebp-14H]
and edx, 4000H
or eax, 10H
shr edx, 11
or eax, edx
mov edx, [ebp-8H]
mov [edx], al
inc dword ptr [ebp-8H]
lea esi,[esi*4]
mov eax, [ebp-8H]
mov word ptr [eax], si
add dword ptr [ebp-8H], 2
mov [ebp-14H], ebx
cmp ebx, [ebp-10H]
jc @@1760
@@1788: mov eax, [ebp-8H]
sub eax, [ebp-18H]
mov edx, [ebp+8H]
mov [edx], eax
mov eax, [ebp-0CH]
sub eax, [ebp-14H]
pop edi
pop esi
pop ebx
mov esp, ebp
end;
{$else}
var in_end, ip_end, ii, end_p, m_pos, out_beg: PAnsiChar;
m_off, m_len, dindex, t, tt: PtrInt; // CPU register (32 or 64 bits wide)
{$ifdef USEOFFSET}
dict: array[0..D_MASK] of integer; // for 64bit CPU, store offset from in_p
ip_beg: PAnsiChar;
{$else}
dict: array[0..D_MASK] of PAnsiChar;
{$endif}
label lit, try_match, match, same4, m3_m4_len, m3_m4_offset, m1;
begin
in_end := in_p+in_len; // in_end is max source position
ip_end := in_end-9; // we read till in_p[8]
// use 9 instead of (M2_MAX_LEN-5); // ip_end=in_end-3 is max hashing position
{$ifdef USEOFFSET}
ip_beg := in_p;
{$endif}
ii := in_p;
inc(in_p,4);
out_beg := out_p;
FillChar(dict,sizeof(dict),0); // dict[] must be set to 0 before call
repeat
// 1. hash the p[0]..p[3] bytes
dindex := ((D_MUL * ((((((ord(in_p[3]) shl 6) xor ord(in_p[2])) shl 5)
xor ord(in_p[1])) shl 5) xor ord(in_p[0]))) shr D_MUL_SHIFT) and D_MASK;
// 2. check if already hashed p[0]..p[3] sequence
// 2.1 not hashed -> add hash position in dict[], and continue
{$ifdef USEOFFSET}
if dict[dindex]=0 then begin
lit: dict[dindex] := in_p-ip_beg;
{$else}
if dict[dindex]=nil then begin
lit: dict[dindex] := in_p;
{$endif}
inc(in_p);
if in_p<ip_end then
continue else break;
end else
// 2.2 hashed -> values in m_pos, offset in m_off and test if of some interrest
{$ifdef USEOFFSET}
m_pos := @ip_beg[dict[dindex]]; {$else}
m_pos := dict[dindex];
{$endif}
m_off := in_p-m_pos;
if {$ifdef WT}(m_off<3)or{$endif} (m_off>MAX_OFFSET) then
goto lit else
if (m_off<=M2_MAX_OFFSET) or (m_pos[3]=in_p[3]) then
goto try_match;
// 3. the first hash was not interesting -> try 2nd hash
dindex := (dindex and (D_MASK and $7ff)) xor (D_HIGH or $1f);
// 3.1 not hashed -> add hash position in dict[], and continue
{$ifdef USEOFFSET}
if dict[dindex]=0 then
goto lit else
m_pos := @ip_beg[dict[dindex]];
{$else}
if dict[dindex]=nil then
goto lit else
m_pos := dict[dindex];
{$endif}
// 3.2 hashed -> values in m_pos, offset in m_off and test if of some interrest
m_off := in_p-m_pos;
if {$ifdef WT}(m_off<3)or{$endif} (m_off>MAX_OFFSET) then
goto lit else
if (m_off<=M2_MAX_OFFSET) or (m_pos[3]=in_p[3]) then
goto try_match else
goto lit;
// 4. if of some interrest -> try exact match of in_p[0..2]
try_match:
if (pWord(m_pos)^<>pWord(in_p)^) or (m_pos[2]<>in_p[2]) then
goto lit;
// 5. we have a 3 chars match
match:
// 5.1 update dict[]
{$ifdef USEOFFSET}
dict[dindex] := in_p-ip_beg; {$else}
dict[dindex] := in_p;
{$endif}
// 5.2 store input stream till current position
t := in_p-ii;
if t<>0 then begin
if t<=3 then begin
PByte(out_p-2)^ := PByte(out_p-2)^ or t;
pInteger(out_p)^ := pInteger(ii)^;
inc(out_p,t);
inc(ii,t);
end else
if t<=18 then begin
out_p^ := ansichar(t-3);
inc(out_p);
movechars(ii,out_p,t);
inc(out_p,in_p-ii);
inc(ii,in_p-ii);
end else begin
tt := t-18;
out_p^ := #0; inc(out_p);
while tt>255 do begin // size > 255 are stored as #0
dec(tt,255);
out_p^ := #0;
inc(out_p);
end;
out_p^ := ansichar(tt);
inc(out_p);
// we recommend using FastCode (with SSE2) (included in D2009)
system.move(ii^,out_p^,t);
inc(out_p,in_p-ii);
inc(ii,in_p-ii);
end;
end;
// 5.3 test longest common sequence from in_p[] into m_pos[]
{$ifdef WT}
t := m_off;
{$endif}
// if (pInteger(@m_pos[3])^=pInteger(@in_p[3])^){$ifdef WT}and (t>6){$endif} then goto same4 else
if (m_pos[3]=in_p[3]) {$ifdef WT}and (t>3){$endif} then
if (m_pos[4]=in_p[4]) {$ifdef WT}and (t>4){$endif} then
if (m_pos[5]=in_p[5]) {$ifdef WT}and (t>5){$endif} then
if (m_pos[6]=in_p[6]) {$ifdef WT}and (t>6){$endif} then
same4: if (m_pos[7]=in_p[7]) {$ifdef WT}and (t>7){$endif} then
if (m_pos[8]=in_p[8]) {$ifdef WT}and (t>8){$endif} then begin
// 5.3.1 longest sequence is 9 chars or more
inc(in_p,9);
end_p := in_end;
// if in_p>=end_p then // since in_p<ip_end=in_end-3 we have m_len>=3
// in_p := end_p else this is very slow => done in decompress()
inc(m_pos,M2_MAX_LEN+1);
// get sequence length in m_len
{$ifdef WT}dec(t,9);{$endif}
while (in_p<end_p) and (m_pos^=in_p^) {$ifdef WT}and (t>0){$endif} do begin
inc(in_p);
inc(m_pos);
{$ifdef WT}dec(t);{$endif}
end;
m_len := in_p-ii;
// store m_off + m_len into out_p
// M2 is not possible here, since len>9
if m_off<=M3_MAX_OFFSET then begin
// B0: 0..4=len-2(<32) 5=M3_MARKER
// B1..n-1 #0 = inc(len,255), or len-33
// Wn: 0..1=new 2..15=offs0..13
// M3_MAX_OFFSET = $4000 = 1 shl 14 (offs0..13)
dec(m_off);
if m_len<=33 then begin // M2_MARKER=M3_MARKER+32 -> 1..31 ok
out_p^ := ansichar(integer(M3_MARKER or (m_len-2)));
inc(out_p);
pWord(out_p)^ := m_off shl 2; // Wn: 0..1=new 2..15=offs0..13
inc(out_p,2);
ii := in_p;
if in_p<ip_end then continue else break;
end else begin
dec(m_len,33);
out_p^ := ansichar(M3_MARKER); // 0 -> big len is #0-stored
goto m3_m4_len;
end;
end else
{$ifdef WITHM1} if m_off<M4_MAX_OFFSET then {$endif}
begin // M3_MARKER = M4_MARKER+16 -> 1..15 ok
// B0: 0..2=len-2 3=off14 4=M4_MARKER
// B1..Bn-1 #0 = inc(len,255), or len-9
// Wn: 0..1=new 2..15=offs0..13
// M3=0..$4000 M4=$4000..$bfff (2 pages of offs0..13)
dec(m_off,M3_MAX_OFFSET); // m_off=0..M4_MAX_OFFSET-M3_MAX_OFFSET=$7fff
if (m_len<=M4_MAX_LEN) then begin
out_p^ := ansichar(integer(M4_MARKER or
((m_off and M3_MAX_OFFSET)shr M4_OFF_BITS) or // off14 bit
(m_len-2))); // 3 bits for m_len
inc(out_p);
end else begin
dec(m_len,M4_MAX_LEN);
out_p^ := ansichar(integer(M4_MARKER or
((m_off and M3_MAX_OFFSET)shr M4_OFF_BITS))); // off14 bit
// we store m_len-1=0 -> len is #0-stored
{$ifdef WITHM1} goto m3_m4_len;
end;
end else begin
// B0: 0..2=len-2 (M1 is <16)
// B1..Bn-1 #0 = inc(len,255), or len-9
// Wn: 0..1=new 2..15=offs0..13
// M1=$C000..$FFFF
dec(m_off,M4_MAX_OFFSET);
if (m_len<=M4_MAX_LEN) then begin
out_p^ := ansichar(m_len-2);
inc(out_p);
end else begin
dec(m_len,M4_MAX_LEN);
out_p^ := #0; {$endif}
m3_m4_len: inc(out_p);
while (m_len>255) do begin // size > 255 are stored as #0
dec(m_len,255);
out_p^ := #0;
inc(out_p);
end;
out_p^ := ansichar(m_len);
inc(out_p);
end;
end;
pWord(out_p)^ := m_off shl 2; // Wn: 0..1=new 2..15=offs0..14
inc(out_p,2);
ii := in_p;
if in_p<ip_end then continue else break;
end else inc(in_p,8) // if (m_pos[8]=in_p[8])
else inc(in_p,7)
else inc(in_p,6)
else inc(in_p,5)
else inc(in_p,4)
else inc(in_p,3);
// 5.3.2 one of the (m_pos[*]=in_p[0]) was false -> store m_off + m_len=in_p-ii
// here, we have always m_len=in_p-ii >=3 and <9
if m_off<=M2_MAX_OFFSET then begin // M2_MAX_OFFSET = 1 shl 11 (offs0..10)
// M2 is for len <8, storing offset on 11 bits
// B0: 0..1=new 2..4=offs0..2 5..7=len-1
// B1: 0..7=offs3..10
// len-1>=2 therefore t>=(2 shl 5)=64=M2_MARKER
dec(m_off);
pWord(out_p)^ := integer(((in_p-ii-1)shl 5) or ((m_off and 7)shl 2) or ((m_off shr 3) shl 8));
// out_p[0] := ansichar(((in_p-ii-1)shl 5) or ((m_off and 7)shl 2));
// out_p[1] := ansichar(m_off shr 3);
inc(out_p,2);
ii := in_p;
if in_p<ip_end then continue else break;
end else
if m_off<=M3_MAX_OFFSET then begin // M3_MAX_OFFSET = 1 shl 14 (offs0..13)
// M3 is for every len, storing offset on 14 bits
// B: 0..4=len-2(<32) 5=M3_MARKER
// W: 0..1=new 2..15=offs0..13
dec(m_off);
pInteger(out_p)^ := integer(M3_MARKER or (in_p-ii-2) or (m_off shl 10));
inc(out_p,3);
ii := in_p;
if in_p<ip_end then continue else break;
end else
{$ifdef WITHM1} if m_off<M4_MAX_OFFSET then {$endif}
begin // M3_MARKER = M4_MARKER+16 -> 1..15 ok
// M3 is for every len, storing (offset-M3_MAX_OFFSET) on 15 bits
// B: 0..2=len-2 3=off14 4=M4_MARKER
// W: 0..1=new 2..15=offs0..13
dec(m_off,M3_MAX_OFFSET);
out_p^ := ansichar(integer(M4_MARKER or (in_p-ii-2) or // len-2
((m_off and M3_MAX_OFFSET)shr M4_OFF_BITS))); // off14 bit
m1: inc(out_p);
pWord(out_p)^ := m_off shl 2; // off0..13 bits
inc(out_p,2);
ii := in_p;
if in_p<ip_end then continue else break;
{$ifdef WITHM1}
end else begin
// B: 0..2=len-2 (M1 is <16)
// W: 0..1=new 2..15=offs0..13
dec(m_off,M4_MAX_OFFSET);
out_p^ := ansichar(in_p-ii-2);
goto m1;
{$endif}
end;
until false;
// 6. finished -> store out_len and number of bytes left to store
out_len := out_p-out_beg;
result := in_end-ii; // returns source left bytes
end;
{$endif USEASM}
function lzopas_compressdestlen(in_len: integer): integer;
// get maximum possible (worse) compressed size for out_p
begin
result := in_len+(in_Len shr 3)+(64+7);
// an incompressed block is store by one #0 for each 255 bytes -> shr 3 is good
end;
function lzopas_compress(in_p: PAnsiChar; in_len: integer; out_p: PAnsiChar): integer;
// compress in_p(in_len) into out_p
// out_p must be at least lzopas_compressdestlen(in_len) bytes long
// returns compressed size in out_p
var out_beg: PAnsiChar;
t, tt: PtrInt;
label mov;
begin
out_beg := out_p;
// 1. store in_len
if in_len>=$8000 then begin
pWord(out_p)^ := $8000 or (in_len and $7fff);
pWord(out_p+2)^ := in_len shr 15;
inc(out_p,4);
end else begin
pWord(out_p)^ := in_len; // in_len<32768 -> stored as word, otherwise as integer
if in_len=0 then begin
result := 2;
exit;
end;
inc(out_p,2);
end;
// 2. compress
if in_len<=M2_MAX_LEN+5 then begin // M2_MAX_LEN+5=13
// 2.1 source is not big enough to be hashed -> direct copy
t := in_len;
out_p^ := ansichar(t+17); // out_p=op_beg -> avoid PByte(out_p-2)^ access
goto mov;
end else begin
// 2.2 compress using lzo hashing
t := _lzo1x_1_do_compress(in_p, in_len, out_p, result);
inc(out_p,result);
end;
// 3. store remaining t bytes
if t>0 then begin
if t<=3 then
inc(out_p[-2],t) else
if t<=18 then begin
out_p^ := ansichar(t-3);
inc(out_p);
end else begin
tt := t-18;
out_p^ := #0;
inc(out_p);
while tt>255 do begin // size > 255 are stored as #0
dec(tt,255);
out_p^ := #0;
inc(out_p);
end;
out_p^ := ansichar(tt);
mov: inc(out_p);
end;
// we recommend using FastCode (with SSE2) (included in D2009)
system.move((in_p+in_len-t)^,out_p^,t);
inc(out_p,t);
end;
result := out_p-out_beg;
end;
function lzopas_decompressdestlen(in_p: PAnsiChar): integer;
// get uncompressed size from lzo-compressed buffer (to reserve memory, e.g.)
begin
result := pWord(in_p)^;
inc(in_p,2);
if result and $8000<>0 then
result := (result and $7fff) or (integer(pWord(in_p)^) shl 15);
end;
function lzopas_decompress(in_p: PAnsiChar; in_len: integer; out_p: PAnsiChar): Integer;
// uncompress in_p(in_len) into out_p, returns out_len
// may write up to out_len+3 bytes in out_p
// the decompression mode is "fast-unsafe" -> CRC/Adler32 in_p data before call
{$ifdef USEASM}
// code below was extracted from the pascal code generated by Delphi 2009
// after some manual further optimization of the asm, here's something fast...
asm
push ebx
push esi
push edi
push ebp
add esp, -16
mov edi, ecx
mov [esp], edx
mov esi, eax
mov eax, [esp]
add eax, esi
mov [esp+8H], eax
movzx eax, word ptr [esi]
test eax,eax
mov [esp+4H], eax
je @@1829
add esi, 2
test byte ptr [esp+5H], 80H
jz @@1806