1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/patches/glibc/ports-2.10.1/540-alpha-fix-memchr.patch Fri Apr 08 00:59:32 2011 +0200
1.3 @@ -0,0 +1,121 @@
1.4 +2009-07-27 Aurelien Jarno <aurelien@aurel32.net>
1.5 +
1.6 + * sysdeps/alpha/memchr.S: Use prefetch load.
1.7 + * sysdeps/alpha/alphaev6/memchr.S: Likewise.
1.8 +
1.9 +diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S
1.10 +--- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S 2009-05-16 10:36:20.000000000 +0200
1.11 ++++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S 2009-11-13 00:51:15.000000000 +0100
1.12 +@@ -127,7 +127,7 @@
1.13 + cmpbge $31, $1, $2 # E :
1.14 + bne $2, $found_it # U :
1.15 + # At least one byte left to process.
1.16 +- ldq $1, 8($0) # L :
1.17 ++ ldq $31, 8($0) # L :
1.18 + subq $5, 1, $18 # E : U L U L
1.19 +
1.20 + addq $0, 8, $0 # E :
1.21 +@@ -143,38 +143,38 @@
1.22 + and $4, 8, $4 # E : odd number of quads?
1.23 + bne $4, $odd_quad_count # U :
1.24 + # At least three quads remain to be accessed
1.25 +- mov $1, $4 # E : L U L U : move prefetched value to correct reg
1.26 ++ nop # E : L U L U : move prefetched value to correct reg
1.27 +
1.28 + .align 4
1.29 + $unrolled_loop:
1.30 +- ldq $1, 8($0) # L : prefetch $1
1.31 +- xor $17, $4, $2 # E :
1.32 +- cmpbge $31, $2, $2 # E :
1.33 +- bne $2, $found_it # U : U L U L
1.34 ++ ldq $1, 0($0) # L : load quad
1.35 ++ xor $17, $1, $2 # E :
1.36 ++ ldq $31, 8($0) # L : prefetch next quad
1.37 ++ cmpbge $31, $2, $2 # E : U L U L
1.38 +
1.39 ++ bne $2, $found_it # U :
1.40 + addq $0, 8, $0 # E :
1.41 + nop # E :
1.42 + nop # E :
1.43 +- nop # E :
1.44 +
1.45 + $odd_quad_count:
1.46 ++ ldq $1, 0($0) # L : load quad
1.47 + xor $17, $1, $2 # E :
1.48 +- ldq $4, 8($0) # L : prefetch $4
1.49 ++ ldq $31, 8($0) # L : prefetch $4
1.50 + cmpbge $31, $2, $2 # E :
1.51 +- addq $0, 8, $6 # E :
1.52 +
1.53 ++ addq $0, 8, $6 # E :
1.54 + bne $2, $found_it # U :
1.55 + cmpult $6, $18, $6 # E :
1.56 + addq $0, 8, $0 # E :
1.57 +- nop # E :
1.58 +
1.59 + bne $6, $unrolled_loop # U :
1.60 +- mov $4, $1 # E : move prefetched value into $1
1.61 + nop # E :
1.62 + nop # E :
1.63 +-
1.64 +-$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do
1.65 + nop # E :
1.66 ++
1.67 ++$final: ldq $1, 0($0) # L : load last quad
1.68 ++ subq $5, $0, $18 # E : $18 <- number of bytes left to do
1.69 + nop # E :
1.70 + bne $18, $last_quad # U :
1.71 +
1.72 +diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S
1.73 +--- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S 2009-05-16 10:36:20.000000000 +0200
1.74 ++++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S 2009-11-13 00:51:15.000000000 +0100
1.75 +@@ -119,7 +119,7 @@
1.76 +
1.77 + # At least one byte left to process.
1.78 +
1.79 +- ldq t0, 8(v0) # e0 :
1.80 ++ ldq zero, 8(v0) # e0 : prefetch next quad
1.81 + subq t4, 1, a2 # .. e1 :
1.82 + addq v0, 8, v0 #-e0 :
1.83 +
1.84 +@@ -138,19 +138,19 @@
1.85 +
1.86 + # At least three quads remain to be accessed
1.87 +
1.88 +- mov t0, t3 # e0 : move prefetched value to correct reg
1.89 +-
1.90 + .align 4
1.91 + $unrolled_loop:
1.92 +- ldq t0, 8(v0) #-e0 : prefetch t0
1.93 +- xor a1, t3, t1 # .. e1 :
1.94 +- cmpbge zero, t1, t1 # e0 :
1.95 +- bne t1, $found_it # .. e1 :
1.96 ++ ldq t0, 0(v0) # e0 : load quad
1.97 ++ xor a1, t0, t1 # .. e1 :
1.98 ++ ldq zero, 8(v0) # e0 : prefetch next quad
1.99 ++ cmpbge zero, t1, t1 # .. e1:
1.100 ++ bne t1, $found_it # e0 :
1.101 +
1.102 +- addq v0, 8, v0 #-e0 :
1.103 ++ addq v0, 8, v0 # e1 :
1.104 + $odd_quad_count:
1.105 ++ ldq t0, 0(v0) # e0 : load quad
1.106 + xor a1, t0, t1 # .. e1 :
1.107 +- ldq t3, 8(v0) # e0 : prefetch t3
1.108 ++ ldq zero, 8(v0) # e0 : prefetch next quad
1.109 + cmpbge zero, t1, t1 # .. e1 :
1.110 + addq v0, 8, t5 #-e0 :
1.111 + bne t1, $found_it # .. e1 :
1.112 +@@ -159,8 +159,8 @@
1.113 + addq v0, 8, v0 # .. e1 :
1.114 + bne t5, $unrolled_loop #-e1 :
1.115 +
1.116 +- mov t3, t0 # e0 : move prefetched value into t0
1.117 +-$final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
1.118 ++$final: ldq t0, 0(v0) # e0 : load last quad
1.119 ++ subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
1.120 + bne a2, $last_quad # e1 :
1.121 +
1.122 + $not_found:
1.123 +diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/ports/sysdeps/alpha/alphaev6/memchr.S
1.124 +diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/memchr.S glibc-2.10.1/ports/sysdeps/alpha/memchr.S