diff --git a/src/stub/src/arch/i386/nrv2b_d32-easy.S b/src/stub/src/arch/i386/nrv2b_d32-easy.S index 6b37583f..cd3569d0 100644 --- a/src/stub/src/arch/i386/nrv2b_d32-easy.S +++ b/src/stub/src/arch/i386/nrv2b_d32-easy.S @@ -38,7 +38,7 @@ lit_n2b: incl %esi; movb %dl,(%edi) incl %edi top_n2b: - movb (%esi),%dl # prefetch: literal, or bottom 8 bits of offset + movzbl (%esi),%edx # prefetch: literal, or bottom 8 bits of offset jnextb1yp lit_n2b push $1; pop off offmore_n2b: @@ -46,7 +46,7 @@ offmore_n2b: jnextb0np offmore_n2b subl $ 3,off; jc len_n2b # use previous offset - shll $ 8,off; movzbl %dl,%edx + shll $ 8,off orl %edx,off; incl %esi xorl $~0,off; jz eof movl off,disp diff --git a/src/stub/src/i386-expand.S b/src/stub/src/i386-expand.S index 0098fab7..c6c780dc 100644 --- a/src/stub/src/i386-expand.S +++ b/src/stub/src/i386-expand.S @@ -51,7 +51,7 @@ NBPW= 4 #define GETBITp \ addl bits,bits; jnz 0f; \ movl (%esi),bits; sub $-4,%esi; \ - adcl bits,bits; movb (%esi),%dl; \ + adcl bits,bits; movzbl (%esi),%edx; \ 0: /* Same, but without prefetch (not useful for length of match.) */ #define jnextb0n jnextb0y @@ -125,7 +125,7 @@ __clear_cache: .globl __clear_cache refill: movl (%esi),bits; sub $-4,%esi // next 32 bits; set Carry adcl bits,bits // LSB= 1 (CarryIn); CarryOut= next bit - movb (%esi),%dl // pre-fetch: literal, or bottom 8 bits of offset + movzbl (%esi),%edx // pre-fetch: literal, or bottom 8 bits of offset rep; ret getbit: addl bits,bits; jz refill // Carry= next bit @@ -133,16 +133,16 @@ getbit: copy: // In: len, %edi, dispq; Out: 0==len, %edi, dispq; trashes %eax, %edx lea (%edi,dispq),%eax; cmpl $5,len // <=3 is forced - movb (%eax),%dl; jbe copy1 // <=5 for better branch predict + movzbl (%eax),%edx; jbe copy1 // <=5 for better branch predict cmpl $-4,displ; ja copy1 // 4-byte chunks would overlap subl $4,len // adjust for termination cases copy4: movl (%eax),%edx; add $4, %eax; subl $4,len movl %edx,(%edi); lea 4(%edi),%edi; jnc copy4 - addl $4,len; movb (%eax),%dl; jz copy0 + addl $4,len; movzbl (%eax),%edx; jz copy0 copy1: inc %eax; movb %dl,(%edi); dec len - movb (%eax),%dl + movzbl (%eax),%edx lea 1(%edi),%edi; jnz copy1 copy0: rep; ret