prefetching does not help length of match
l_lx_elf64amd.S amd_d_nrv2b.S amd_d_nrv2e.S committer: jreiser <jreiser> 1132290120 +0000
This commit is contained in:
@@ -35,11 +35,11 @@ lit_n2b:
|
||||
incq %rdi
|
||||
top_n2b:
|
||||
movb (%rsi),%dl # speculate: literal, or bottom 8 bits of offset
|
||||
jnextb1y lit_n2b
|
||||
jnextb1yp lit_n2b
|
||||
lea 1(lenq),off # [len= 0] off= 1
|
||||
offmore_n2b:
|
||||
getnextb(off)
|
||||
jnextb0n offmore_n2b
|
||||
getnextbp(off)
|
||||
jnextb0np offmore_n2b
|
||||
|
||||
subl $ 3,off; jc len_n2b # use previous offset
|
||||
shll $ 8,off; movzbl %dl,%edx
|
||||
|
||||
@@ -35,16 +35,16 @@ lit_n2e:
|
||||
incq %rdi
|
||||
top_n2e:
|
||||
movb (%rsi),%dl # speculate: literal, or bottom 8 bits of offset
|
||||
jnextb1y lit_n2e
|
||||
jnextb1yp lit_n2e
|
||||
lea 1(lenq),off # [len= 0] off= 1
|
||||
jmp getoff_n2e
|
||||
|
||||
off_n2e:
|
||||
dec off
|
||||
getnextb(off)
|
||||
getnextbp(off)
|
||||
getoff_n2e:
|
||||
getnextb(off)
|
||||
jnextb0n off_n2e
|
||||
getnextbp(off)
|
||||
jnextb0np off_n2e
|
||||
|
||||
subl $ 3,off; jc offprev_n2e
|
||||
shll $ 8,off; movzbl %dl,%edx
|
||||
|
||||
@@ -102,6 +102,17 @@ ra_setup:
|
||||
*/
|
||||
/* jump on next bit {0,1} with prediction {y==>likely, n==>unlikely} */
|
||||
/* Prediction omitted for now. */
|
||||
/* On refill: prefetch next byte, for latency reduction on literals and offsets. */
|
||||
#define jnextb0np jnextb0yp
|
||||
#define jnextb0yp GETBITp; jnc
|
||||
#define jnextb1np jnextb1yp
|
||||
#define jnextb1yp GETBITp; jc
|
||||
#define GETBITp \
|
||||
addl bits,bits; jnz 0f; \
|
||||
movl (%rsi),bits; subq $-4,%rsi; \
|
||||
adcl bits,bits; movb (%rsi),%dl; \
|
||||
0:
|
||||
/* Same, but without prefetch (not useful for length of match.) */
|
||||
#define jnextb0n jnextb0y
|
||||
#define jnextb0y GETBIT; jnc
|
||||
#define jnextb1n jnextb1y
|
||||
@@ -109,11 +120,12 @@ ra_setup:
|
||||
#define GETBIT \
|
||||
addl bits,bits; jnz 0f; \
|
||||
movl (%rsi),bits; subq $-4,%rsi; \
|
||||
adcl bits,bits; movb (%rsi),%dl; \
|
||||
adcl bits,bits; \
|
||||
0:
|
||||
|
||||
/* rotate next bit into bottom bit of reg */
|
||||
#define getnextb(reg) call *%r11; adcl reg,reg
|
||||
#define getnextbp(reg) call *%r11; adcl reg,reg
|
||||
#define getnextb(reg) getnextbp(reg)
|
||||
|
||||
ALIGN(1<<3)
|
||||
getbit:
|
||||
|
||||
Reference in New Issue
Block a user