arm64-expand code for nrv* EOF at de-compress is not quite uniform

WINDOWS_BACK requirement also interferes.
	modified:   ../misc/testsuite/upx_testsuite_1-expected_sha256sums.sh
	modified:   stub/src/arch/arm64/v8/nrv2d_d32.S
	modified:   stub/src/arch/arm64/v8/nrv2e_d32.S
	modified:   stub/src/arm64-expand.S
           plus generated *.h, *.map
This commit is contained in:
John Reiser
2024-06-27 14:39:14 -07:00
parent e561da33bd
commit 2c4e5c7a47
7 changed files with 1732 additions and 1709 deletions
+824 -821
View File
File diff suppressed because it is too large Load Diff
+870 -866
View File
File diff suppressed because it is too large Load Diff
+3 -3
View File
@@ -82,7 +82,7 @@ ucl_nrv2d_decompress_32: .globl ucl_nrv2d_decompress_32 // ARM mode
Actual decompressed length is stored through plen_dst.
For SAFE mode: at call, *plen_dst must be allowed length of output buffer.
*/
PUSH3(x2,x3, lr)
PUSH3(x2,x3, lr) // MATCH_93
#define sp_DST0 0 /* stack offset of original dst */
add srclim,src,len,uxtw // srclim= eof_src;
#if 1==SAFE /*{*/
@@ -95,7 +95,7 @@ ucl_nrv2d_decompress_32: .globl ucl_nrv2d_decompress_32 // ARM mode
#if defined(WINDOWS_BACK) || !defined(DAISY_CHAIN) //{
eof_n2d:
POP2(x3,x4) // r3= orig_dst; r4= plen_dst
POP2(x3,x4) // MATCH_93 r3= orig_dst; r4= plen_dst
SUB2(src,srclim) // 0 if actual src length equals expected length
SUB2(dst,x3) // actual dst length
str dstw,[x4]
@@ -108,7 +108,7 @@ eof_n2d:
#endif
mov x0,x5 // result value
POP1(lr)
POP1(lr) // MATCH_93
ret
#endif // WINDOWS_BACK || ! DAISY_CHAIN }
+4 -4
View File
@@ -83,8 +83,8 @@ ucl_nrv2e_decompress_32: .globl ucl_nrv2e_decompress_32 // ARM mode
Actual decompressed length is stored through plen_dst.
For SAFE mode: at call, *plen_dst must be allowed length of output buffer.
*/
PUSH1(lr)
PUSH2(x2,x3)
PUSH1(lr) // MATCH_91
PUSH2(x2,x3) // MATCH_92
#define sp_DST0 0 /* stack offset of original dst */
add srclim,src,len,uxtw // srclim= eof_src;
#if 1==SAFE /*{*/
@@ -104,7 +104,7 @@ bad_src_n2e: # return value will be 1
#if defined(WINDOWS_BACK) || !defined(DAISY_CHAIN) //{
eof_n2e:
POP2(x3,x4) // r3= orig_dst; r4= plen_dst
POP2(x3,x4) // MATCH_92 r3= orig_dst; r4= plen_dst
SUB2(src,srclim) // 0 if actual src length equals expected length
SUB2(dst,x3) // actual dst length
str dstw,[x4]
@@ -118,7 +118,7 @@ cache_n2e: // unused label
#endif
mov x0,x5 // result value
POP1(lr)
POP1(lr) // MATCH_91
ret
#endif // WINDOS_BACK || ! DAISY_CHAIN }
+22 -6
View File
@@ -97,10 +97,11 @@ decompress: // (src *, cpr_len, dst *, &dstlen);
src .req x0
dst .req x2
dstw .req w2
.globl eof
eof_nrv: // .globl eof_nrv .type eof_nrv,%function
eof_n2e: // .globl eof_n2e .type eof_n2e,%function
eof_n2d: // .globl eof_n2d .type eof_n2d,%function
// sync_cache is done in tail of f_expand, after possible unfilter
// NYI: eof_n2b, eof_n2d, eof_n2e should be unified.
eof_n2b: // .globl eof_n2b .type eof_n2b,%function
eof: // MATCH_90 end of a compressed extent; need sync_cache after unfilter
ldr x3,[sp],#NBPW // &input_eof
@@ -112,6 +113,24 @@ eof: // MATCH_90 end of a compressed extent; need sync_cache after unfilter
ldr lr,[sp],#NBPW
ret
// WINDOWS_BACK compatibility seems to be broken
// if POP3 replaces POP2+POP1 (MATCH_92, MATCH_91)
srclim .req x7
eof_n2d: // .globl eof_n2d
eof_n2e: // .globl eof_n2e
POP2(x3,x4) // MATCH_92 r3= orig_dst; r4= plen_dst
SUB2(src,srclim) // 0 if actual src length equals expected length
SUB2(dst,x3) // actual dst length
str dstw,[x4]
POP1(lr) // MATCH_91
ret
.unreq src
.unreq dst
.unreq dstw
.unreq srclim
eof_lzma: .globl elf_lzma
mov sp,fp
mov x5,x0 // save result value
@@ -124,9 +143,6 @@ eof_lzma: .globl elf_lzma
mov x0,x5 // result value
ret
.unreq src
.unreq dst
#define M_NRV2B_LE32 2
#define M_NRV2B_8 3
#define M_NRV2D_LE32 5
+2 -2
View File
@@ -57,8 +57,8 @@ EXP_HEAD 0x0000000000000000 0x104
EXP_HEAD 0x0000000000000000 0x104 tmp/arm64-expand.o
0x0000000000000000 f_expand
EXP_TAIL 0x0000000000000000 0xa8
EXP_TAIL 0x0000000000000000 0xa8 tmp/arm64-expand.o
EXP_TAIL 0x0000000000000000 0xc0
EXP_TAIL 0x0000000000000000 0xc0 tmp/arm64-expand.o
0x0000000000000000 eof
NRV2E 0x0000000000000000 0xf4