yann@1625
|
1 |
2009-07-27 Aurelien Jarno <aurelien@aurel32.net>
|
yann@1625
|
2 |
|
yann@1625
|
3 |
* sysdeps/alpha/memchr.S: Use prefetch load.
|
yann@1625
|
4 |
* sysdeps/alpha/alphaev6/memchr.S: Likewise.
|
yann@1625
|
5 |
|
yann@1625
|
6 |
diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S
|
yann@1625
|
7 |
--- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S 2009-05-16 10:36:20.000000000 +0200
|
yann@1625
|
8 |
+++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/alphaev6/memchr.S 2009-11-13 00:51:15.000000000 +0100
|
yann@1625
|
9 |
@@ -127,7 +127,7 @@
|
yann@1625
|
10 |
cmpbge $31, $1, $2 # E :
|
yann@1625
|
11 |
bne $2, $found_it # U :
|
yann@1625
|
12 |
# At least one byte left to process.
|
yann@1625
|
13 |
- ldq $1, 8($0) # L :
|
yann@1625
|
14 |
+ ldq $31, 8($0) # L :
|
yann@1625
|
15 |
subq $5, 1, $18 # E : U L U L
|
yann@1625
|
16 |
|
yann@1625
|
17 |
addq $0, 8, $0 # E :
|
yann@1625
|
18 |
@@ -143,38 +143,38 @@
|
yann@1625
|
19 |
and $4, 8, $4 # E : odd number of quads?
|
yann@1625
|
20 |
bne $4, $odd_quad_count # U :
|
yann@1625
|
21 |
# At least three quads remain to be accessed
|
yann@1625
|
22 |
- mov $1, $4 # E : L U L U : move prefetched value to correct reg
|
yann@1625
|
23 |
+ nop # E : L U L U : move prefetched value to correct reg
|
yann@1625
|
24 |
|
yann@1625
|
25 |
.align 4
|
yann@1625
|
26 |
$unrolled_loop:
|
yann@1625
|
27 |
- ldq $1, 8($0) # L : prefetch $1
|
yann@1625
|
28 |
- xor $17, $4, $2 # E :
|
yann@1625
|
29 |
- cmpbge $31, $2, $2 # E :
|
yann@1625
|
30 |
- bne $2, $found_it # U : U L U L
|
yann@1625
|
31 |
+ ldq $1, 0($0) # L : load quad
|
yann@1625
|
32 |
+ xor $17, $1, $2 # E :
|
yann@1625
|
33 |
+ ldq $31, 8($0) # L : prefetch next quad
|
yann@1625
|
34 |
+ cmpbge $31, $2, $2 # E : U L U L
|
yann@1625
|
35 |
|
yann@1625
|
36 |
+ bne $2, $found_it # U :
|
yann@1625
|
37 |
addq $0, 8, $0 # E :
|
yann@1625
|
38 |
nop # E :
|
yann@1625
|
39 |
nop # E :
|
yann@1625
|
40 |
- nop # E :
|
yann@1625
|
41 |
|
yann@1625
|
42 |
$odd_quad_count:
|
yann@1625
|
43 |
+ ldq $1, 0($0) # L : load quad
|
yann@1625
|
44 |
xor $17, $1, $2 # E :
|
yann@1625
|
45 |
- ldq $4, 8($0) # L : prefetch $4
|
yann@1625
|
46 |
+ ldq $31, 8($0) # L : prefetch $4
|
yann@1625
|
47 |
cmpbge $31, $2, $2 # E :
|
yann@1625
|
48 |
- addq $0, 8, $6 # E :
|
yann@1625
|
49 |
|
yann@1625
|
50 |
+ addq $0, 8, $6 # E :
|
yann@1625
|
51 |
bne $2, $found_it # U :
|
yann@1625
|
52 |
cmpult $6, $18, $6 # E :
|
yann@1625
|
53 |
addq $0, 8, $0 # E :
|
yann@1625
|
54 |
- nop # E :
|
yann@1625
|
55 |
|
yann@1625
|
56 |
bne $6, $unrolled_loop # U :
|
yann@1625
|
57 |
- mov $4, $1 # E : move prefetched value into $1
|
yann@1625
|
58 |
nop # E :
|
yann@1625
|
59 |
nop # E :
|
yann@1625
|
60 |
-
|
yann@1625
|
61 |
-$final: subq $5, $0, $18 # E : $18 <- number of bytes left to do
|
yann@1625
|
62 |
nop # E :
|
yann@1625
|
63 |
+
|
yann@1625
|
64 |
+$final: ldq $1, 0($0) # L : load last quad
|
yann@1625
|
65 |
+ subq $5, $0, $18 # E : $18 <- number of bytes left to do
|
yann@1625
|
66 |
nop # E :
|
yann@1625
|
67 |
bne $18, $last_quad # U :
|
yann@1625
|
68 |
|
yann@1625
|
69 |
diff -durN glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S
|
yann@1625
|
70 |
--- glibc-2.10.1.orig/glibc-ports-2.10.1/sysdeps/alpha/memchr.S 2009-05-16 10:36:20.000000000 +0200
|
yann@1625
|
71 |
+++ glibc-2.10.1/glibc-ports-2.10.1/sysdeps/alpha/memchr.S 2009-11-13 00:51:15.000000000 +0100
|
yann@1625
|
72 |
@@ -119,7 +119,7 @@
|
yann@1625
|
73 |
|
yann@1625
|
74 |
# At least one byte left to process.
|
yann@1625
|
75 |
|
yann@1625
|
76 |
- ldq t0, 8(v0) # e0 :
|
yann@1625
|
77 |
+ ldq zero, 8(v0) # e0 : prefetch next quad
|
yann@1625
|
78 |
subq t4, 1, a2 # .. e1 :
|
yann@1625
|
79 |
addq v0, 8, v0 #-e0 :
|
yann@1625
|
80 |
|
yann@1625
|
81 |
@@ -138,19 +138,19 @@
|
yann@1625
|
82 |
|
yann@1625
|
83 |
# At least three quads remain to be accessed
|
yann@1625
|
84 |
|
yann@1625
|
85 |
- mov t0, t3 # e0 : move prefetched value to correct reg
|
yann@1625
|
86 |
-
|
yann@1625
|
87 |
.align 4
|
yann@1625
|
88 |
$unrolled_loop:
|
yann@1625
|
89 |
- ldq t0, 8(v0) #-e0 : prefetch t0
|
yann@1625
|
90 |
- xor a1, t3, t1 # .. e1 :
|
yann@1625
|
91 |
- cmpbge zero, t1, t1 # e0 :
|
yann@1625
|
92 |
- bne t1, $found_it # .. e1 :
|
yann@1625
|
93 |
+ ldq t0, 0(v0) # e0 : load quad
|
yann@1625
|
94 |
+ xor a1, t0, t1 # .. e1 :
|
yann@1625
|
95 |
+ ldq zero, 8(v0) # e0 : prefetch next quad
|
yann@1625
|
96 |
+ cmpbge zero, t1, t1 # .. e1:
|
yann@1625
|
97 |
+ bne t1, $found_it # e0 :
|
yann@1625
|
98 |
|
yann@1625
|
99 |
- addq v0, 8, v0 #-e0 :
|
yann@1625
|
100 |
+ addq v0, 8, v0 # e1 :
|
yann@1625
|
101 |
$odd_quad_count:
|
yann@1625
|
102 |
+ ldq t0, 0(v0) # e0 : load quad
|
yann@1625
|
103 |
xor a1, t0, t1 # .. e1 :
|
yann@1625
|
104 |
- ldq t3, 8(v0) # e0 : prefetch t3
|
yann@1625
|
105 |
+ ldq zero, 8(v0) # e0 : prefetch next quad
|
yann@1625
|
106 |
cmpbge zero, t1, t1 # .. e1 :
|
yann@1625
|
107 |
addq v0, 8, t5 #-e0 :
|
yann@1625
|
108 |
bne t1, $found_it # .. e1 :
|
yann@1625
|
109 |
@@ -159,8 +159,8 @@
|
yann@1625
|
110 |
addq v0, 8, v0 # .. e1 :
|
yann@1625
|
111 |
bne t5, $unrolled_loop #-e1 :
|
yann@1625
|
112 |
|
yann@1625
|
113 |
- mov t3, t0 # e0 : move prefetched value into t0
|
yann@1625
|
114 |
-$final: subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
|
yann@1625
|
115 |
+$final: ldq t0, 0(v0) # e0 : load last quad
|
yann@1625
|
116 |
+ subq t4, v0, a2 # .. e1 : a2 <- number of bytes left to do
|
yann@1625
|
117 |
bne a2, $last_quad # e1 :
|
yann@1625
|
118 |
|
yann@1625
|
119 |
$not_found:
|
yann@1625
|
120 |
diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/alphaev6/memchr.S glibc-2.10.1/ports/sysdeps/alpha/alphaev6/memchr.S
|
yann@1625
|
121 |
diff -durN glibc-2.10.1.orig/ports/sysdeps/alpha/memchr.S glibc-2.10.1/ports/sysdeps/alpha/memchr.S
|