Very often you post code, good code of course, it is so sophisticated, that I can't understand.
I want to make a search string and have thought to the following :
String to search
PhilippeI begin to make a mask with 8 'P' :
PPPPPPPPI substact the mask to the first 8 characters
I make XOR 0 to get a bit mask
I make a BSF and I have the first position of the first mask character into the string.
And after I continue to do the same thing.
It's just the beguinning but I try to understand how string search are made , not reading any code.
Quote
7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 Last Bit Avancer
0 00000000 0 0 0 0 0 0 0 0 _ _ _ _ _ _ _ _ 0 8
1 00000001 0 0 0 0 0 0 0 1 _ _ _ _ _ _ _ P 0 7
2 00000010 0 0 0 0 0 0 1 0 _ _ _ _ _ _ P _ 1 6
3 00000011 0 0 0 0 0 0 1 1 _ _ _ _ _ _ P P 0 7
4 00000100 0 0 0 0 0 1 0 0 _ _ _ _ _ P _ _ 2 5
5 00000101 0 0 0 0 0 1 0 1 _ _ _ _ _ P _ P 0 7
6 00000110 0 0 0 0 0 1 1 0 _ _ _ _ _ P P _ 1 6
7 00000111 0 0 0 0 0 1 1 1 _ _ _ _ _ P P P 0 7
8 00001000 0 0 0 0 1 0 0 0 _ _ _ _ P _ _ _ 3 4
9 00001001 0 0 0 0 1 0 0 1 _ _ _ _ P _ _ P 0 7
10 00001010 0 0 0 0 1 0 1 0 _ _ _ _ P _ P _ 1 6
11 00001011 0 0 0 0 1 0 1 1 _ _ _ _ P _ P P 0 7
12 00001100 0 0 0 0 1 1 0 0 _ _ _ _ P P _ _ 2 5
13 00001101 0 0 0 0 1 1 0 1 _ _ _ _ P P _ P 0 7
14 00001110 0 0 0 0 1 1 1 0 _ _ _ _ P P P _ 1 6
15 00001111 0 0 0 0 1 1 1 1 _ _ _ _ P P P P 0 7
16 00010000 0 0 0 1 0 0 0 0 _ _ _ P _ _ _ _ 4 3
17 00010001 0 0 0 1 0 0 0 1 _ _ _ P _ _ _ P 0 7
18 00010010 0 0 0 1 0 0 1 0 _ _ _ P _ _ P _ 1 6
19 00010011 0 0 0 1 0 0 1 1 _ _ _ P _ _ P P 0 7
20 00010100 0 0 0 1 0 1 0 0 _ _ _ P _ P _ _ 2 5
21 00010101 0 0 0 1 0 1 0 1 _ _ _ P _ P _ P 0 7
22 00010110 0 0 0 1 0 1 1 0 _ _ _ P _ P P _ 1 6
23 00010111 0 0 0 1 0 1 1 1 _ _ _ P _ P P P 0 7
24 00011000 0 0 0 1 1 0 0 0 _ _ _ P P _ _ _ 3 4
25 00011001 0 0 0 1 1 0 0 1 _ _ _ P P _ _ P 0 7
26 00011010 0 0 0 1 1 0 1 0 _ _ _ P P _ P _ 1 6
27 00011011 0 0 0 1 1 0 1 1 _ _ _ P P _ P P 0 7
28 00011100 0 0 0 1 1 1 0 0 _ _ _ P P P _ _ 2 5
29 00011101 0 0 0 1 1 1 0 1 _ _ _ P P P _ P 0 7
30 00011110 0 0 0 1 1 1 1 0 _ _ _ P P P P _ 1 6
31 00011111 0 0 0 1 1 1 1 1 _ _ _ P P P P P 0 7
32 00100000 0 0 1 0 0 0 0 0 _ _ P _ _ _ _ _ 5 2
33 00100001 0 0 1 0 0 0 0 1 _ _ P _ _ _ _ P 0 7
34 00100010 0 0 1 0 0 0 1 0 _ _ P _ _ _ P _ 1 6
35 00100011 0 0 1 0 0 0 1 1 _ _ P _ _ _ P P 0 7
36 00100100 0 0 1 0 0 1 0 0 _ _ P _ _ P _ _ 2 5
37 00100101 0 0 1 0 0 1 0 1 _ _ P _ _ P _ P 0 7
38 00100110 0 0 1 0 0 1 1 0 _ _ P _ _ P P _ 1 6
39 00100111 0 0 1 0 0 1 1 1 _ _ P _ _ P P P 0 7
40 00101000 0 0 1 0 1 0 0 0 _ _ P _ P _ _ _ 3 4
41 00101001 0 0 1 0 1 0 0 1 _ _ P _ P _ _ P 0 7
42 00101010 0 0 1 0 1 0 1 0 _ _ P _ P _ P _ 1 6
43 00101011 0 0 1 0 1 0 1 1 _ _ P _ P _ P P 0 7
44 00101100 0 0 1 0 1 1 0 0 _ _ P _ P P _ _ 2 5
45 00101101 0 0 1 0 1 1 0 1 _ _ P _ P P _ P 0 7
46 00101110 0 0 1 0 1 1 1 0 _ _ P _ P P P _ 1 6
47 00101111 0 0 1 0 1 1 1 1 _ _ P _ P P P P 0 7
48 00110000 0 0 1 1 0 0 0 0 _ _ P P _ _ _ _ 4 3
49 00110001 0 0 1 1 0 0 0 1 _ _ P P _ _ _ P 0 7
50 00110010 0 0 1 1 0 0 1 0 _ _ P P _ _ P _ 1 6
51 00110011 0 0 1 1 0 0 1 1 _ _ P P _ _ P P 0 7
52 00110100 0 0 1 1 0 1 0 0 _ _ P P _ P _ _ 2 5
53 00110101 0 0 1 1 0 1 0 1 _ _ P P _ P _ P 0 7
54 00110110 0 0 1 1 0 1 1 0 _ _ P P _ P P _ 1 6
55 00110111 0 0 1 1 0 1 1 1 _ _ P P _ P P P 0 7
56 00111000 0 0 1 1 1 0 0 0 _ _ P P P _ _ _ 3 4
57 00111001 0 0 1 1 1 0 0 1 _ _ P P P _ _ P 0 7
58 00111010 0 0 1 1 1 0 1 0 _ _ P P P _ P _ 1 6
59 00111011 0 0 1 1 1 0 1 1 _ _ P P P _ P P 0 7
60 00111100 0 0 1 1 1 1 0 0 _ _ P P P P _ _ 2 5
61 00111101 0 0 1 1 1 1 0 1 _ _ P P P P _ P 0 7
62 00111110 0 0 1 1 1 1 1 0 _ _ P P P P P _ 1 6
63 00111111 0 0 1 1 1 1 1 1 _ _ P P P P P P 0 7
64 01000000 0 1 0 0 0 0 0 0 _ P _ _ _ _ _ _ 6 1
65 01000001 0 1 0 0 0 0 0 1 _ P _ _ _ _ _ P 0 7
66 01000010 0 1 0 0 0 0 1 0 _ P _ _ _ _ P _ 1 6
67 01000011 0 1 0 0 0 0 1 1 _ P _ _ _ _ P P 0 7
68 01000100 0 1 0 0 0 1 0 0 _ P _ _ _ P _ _ 2 5
69 01000101 0 1 0 0 0 1 0 1 _ P _ _ _ P _ P 0 7
70 01000110 0 1 0 0 0 1 1 0 _ P _ _ _ P P _ 1 6
71 01000111 0 1 0 0 0 1 1 1 _ P _ _ _ P P P 0 7
72 01001000 0 1 0 0 1 0 0 0 _ P _ _ P _ _ _ 3 4
73 01001001 0 1 0 0 1 0 0 1 _ P _ _ P _ _ P 0 7
74 01001010 0 1 0 0 1 0 1 0 _ P _ _ P _ P _ 1 6
75 01001011 0 1 0 0 1 0 1 1 _ P _ _ P _ P P 0 7
76 01001100 0 1 0 0 1 1 0 0 _ P _ _ P P _ _ 2 5
77 01001101 0 1 0 0 1 1 0 1 _ P _ _ P P _ P 0 7
78 01001110 0 1 0 0 1 1 1 0 _ P _ _ P P P _ 1 6
79 01001111 0 1 0 0 1 1 1 1 _ P _ _ P P P P 0 7
80 01010000 0 1 0 1 0 0 0 0 _ P _ P _ _ _ _ 4 3
81 01010001 0 1 0 1 0 0 0 1 _ P _ P _ _ _ P 0 7
82 01010010 0 1 0 1 0 0 1 0 _ P _ P _ _ P _ 1 6
83 01010011 0 1 0 1 0 0 1 1 _ P _ P _ _ P P 0 7
84 01010100 0 1 0 1 0 1 0 0 _ P _ P _ P _ _ 2 5
85 01010101 0 1 0 1 0 1 0 1 _ P _ P _ P _ P 0 7
86 01010110 0 1 0 1 0 1 1 0 _ P _ P _ P P _ 1 6
87 01010111 0 1 0 1 0 1 1 1 _ P _ P _ P P P 0 7
88 01011000 0 1 0 1 1 0 0 0 _ P _ P P _ _ _ 3 4
89 01011001 0 1 0 1 1 0 0 1 _ P _ P P _ _ P 0 7
90 01011010 0 1 0 1 1 0 1 0 _ P _ P P _ P _ 1 6
91 01011011 0 1 0 1 1 0 1 1 _ P _ P P _ P P 0 7
92 01011100 0 1 0 1 1 1 0 0 _ P _ P P P _ _ 2 5
93 01011101 0 1 0 1 1 1 0 1 _ P _ P P P _ P 0 7
94 01011110 0 1 0 1 1 1 1 0 _ P _ P P P P _ 1 6
95 01011111 0 1 0 1 1 1 1 1 _ P _ P P P P P 0 7
96 01100000 0 1 1 0 0 0 0 0 _ P P _ _ _ _ _ 5 2
97 01100001 0 1 1 0 0 0 0 1 _ P P _ _ _ _ P 0 7
98 01100010 0 1 1 0 0 0 1 0 _ P P _ _ _ P _ 1 6
99 01100011 0 1 1 0 0 0 1 1 _ P P _ _ _ P P 0 7
100 01100100 0 1 1 0 0 1 0 0 _ P P _ _ P _ _ 2 5
101 01100101 0 1 1 0 0 1 0 1 _ P P _ _ P _ P 0 7
102 01100110 0 1 1 0 0 1 1 0 _ P P _ _ P P _ 1 6
103 01100111 0 1 1 0 0 1 1 1 _ P P _ _ P P P 0 7
104 01101000 0 1 1 0 1 0 0 0 _ P P _ P _ _ _ 3 4
105 01101001 0 1 1 0 1 0 0 1 _ P P _ P _ _ P 0 7
106 01101010 0 1 1 0 1 0 1 0 _ P P _ P _ P _ 1 6
107 01101011 0 1 1 0 1 0 1 1 _ P P _ P _ P P 0 7
108 01101100 0 1 1 0 1 1 0 0 _ P P _ P P _ _ 2 5
109 01101101 0 1 1 0 1 1 0 1 _ P P _ P P _ P 0 7
110 01101110 0 1 1 0 1 1 1 0 _ P P _ P P P _ 1 6
111 01101111 0 1 1 0 1 1 1 1 _ P P _ P P P P 0 7
112 01110000 0 1 1 1 0 0 0 0 _ P P P _ _ _ _ 4 3
113 01110001 0 1 1 1 0 0 0 1 _ P P P _ _ _ P 0 7
114 01110010 0 1 1 1 0 0 1 0 _ P P P _ _ P _ 1 6
115 01110011 0 1 1 1 0 0 1 1 _ P P P _ _ P P 0 7
116 01110100 0 1 1 1 0 1 0 0 _ P P P _ P _ _ 2 5
117 01110101 0 1 1 1 0 1 0 1 _ P P P _ P _ P 0 7
118 01110110 0 1 1 1 0 1 1 0 _ P P P _ P P _ 1 6
119 01110111 0 1 1 1 0 1 1 1 _ P P P _ P P P 0 7
120 01111000 0 1 1 1 1 0 0 0 _ P P P P _ _ _ 3 4
121 01111001 0 1 1 1 1 0 0 1 _ P P P P _ _ P 0 7
122 01111010 0 1 1 1 1 0 1 0 _ P P P P _ P _ 1 6
123 01111011 0 1 1 1 1 0 1 1 _ P P P P _ P P 0 7
124 01111100 0 1 1 1 1 1 0 0 _ P P P P P _ _ 2 5
125 01111101 0 1 1 1 1 1 0 1 _ P P P P P _ P 0 7
126 01111110 0 1 1 1 1 1 1 0 _ P P P P P P _ 1 6
127 01111111 0 1 1 1 1 1 1 1 _ P P P P P P P 0 7
128 10000000 1 0 0 0 0 0 0 0 P _ _ _ _ _ _ _ 7 0
129 10000001 1 0 0 0 0 0 0 1 P _ _ _ _ _ _ P 0 7
130 10000010 1 0 0 0 0 0 1 0 P _ _ _ _ _ P _ 1 6
131 10000011 1 0 0 0 0 0 1 1 P _ _ _ _ _ P P 0 7
132 10000100 1 0 0 0 0 1 0 0 P _ _ _ _ P _ _ 2 5
133 10000101 1 0 0 0 0 1 0 1 P _ _ _ _ P _ P 0 7
134 10000110 1 0 0 0 0 1 1 0 P _ _ _ _ P P _ 1 6
135 10000111 1 0 0 0 0 1 1 1 P _ _ _ _ P P P 0 7
136 10001000 1 0 0 0 1 0 0 0 P _ _ _ P _ _ _ 3 4
137 10001001 1 0 0 0 1 0 0 1 P _ _ _ P _ _ P 0 7
138 10001010 1 0 0 0 1 0 1 0 P _ _ _ P _ P _ 1 6
139 10001011 1 0 0 0 1 0 1 1 P _ _ _ P _ P P 0 7
140 10001100 1 0 0 0 1 1 0 0 P _ _ _ P P _ _ 2 5
141 10001101 1 0 0 0 1 1 0 1 P _ _ _ P P _ P 0 7
142 10001110 1 0 0 0 1 1 1 0 P _ _ _ P P P _ 1 6
143 10001111 1 0 0 0 1 1 1 1 P _ _ _ P P P P 0 7
144 10010000 1 0 0 1 0 0 0 0 P _ _ P _ _ _ _ 4 3
145 10010001 1 0 0 1 0 0 0 1 P _ _ P _ _ _ P 0 7
146 10010010 1 0 0 1 0 0 1 0 P _ _ P _ _ P _ 1 6
147 10010011 1 0 0 1 0 0 1 1 P _ _ P _ _ P P 0 7
148 10010100 1 0 0 1 0 1 0 0 P _ _ P _ P _ _ 2 5
149 10010101 1 0 0 1 0 1 0 1 P _ _ P _ P _ P 0 7
150 10010110 1 0 0 1 0 1 1 0 P _ _ P _ P P _ 1 6
151 10010111 1 0 0 1 0 1 1 1 P _ _ P _ P P P 0 7
152 10011000 1 0 0 1 1 0 0 0 P _ _ P P _ _ _ 3 4
153 10011001 1 0 0 1 1 0 0 1 P _ _ P P _ _ P 0 7
154 10011010 1 0 0 1 1 0 1 0 P _ _ P P _ P _ 1 6
155 10011011 1 0 0 1 1 0 1 1 P _ _ P P _ P P 0 7
156 10011100 1 0 0 1 1 1 0 0 P _ _ P P P _ _ 2 5
157 10011101 1 0 0 1 1 1 0 1 P _ _ P P P _ P 0 7
158 10011110 1 0 0 1 1 1 1 0 P _ _ P P P P _ 1 6
159 10011111 1 0 0 1 1 1 1 1 P _ _ P P P P P 0 7
160 10100000 1 0 1 0 0 0 0 0 P _ P _ _ _ _ _ 5 2
161 10100001 1 0 1 0 0 0 0 1 P _ P _ _ _ _ P 0 7
162 10100010 1 0 1 0 0 0 1 0 P _ P _ _ _ P _ 1 6
163 10100011 1 0 1 0 0 0 1 1 P _ P _ _ _ P P 0 7
164 10100100 1 0 1 0 0 1 0 0 P _ P _ _ P _ _ 2 5
165 10100101 1 0 1 0 0 1 0 1 P _ P _ _ P _ P 0 7
166 10100110 1 0 1 0 0 1 1 0 P _ P _ _ P P _ 1 6
167 10100111 1 0 1 0 0 1 1 1 P _ P _ _ P P P 0 7
168 10101000 1 0 1 0 1 0 0 0 P _ P _ P _ _ _ 3 4
169 10101001 1 0 1 0 1 0 0 1 P _ P _ P _ _ P 0 7
170 10101010 1 0 1 0 1 0 1 0 P _ P _ P _ P _ 1 6
171 10101011 1 0 1 0 1 0 1 1 P _ P _ P _ P P 0 7
172 10101100 1 0 1 0 1 1 0 0 P _ P _ P P _ _ 2 5
173 10101101 1 0 1 0 1 1 0 1 P _ P _ P P _ P 0 7
174 10101110 1 0 1 0 1 1 1 0 P _ P _ P P P _ 1 6
175 10101111 1 0 1 0 1 1 1 1 P _ P _ P P P P 0 7
176 10110000 1 0 1 1 0 0 0 0 P _ P P _ _ _ _ 4 3
177 10110001 1 0 1 1 0 0 0 1 P _ P P _ _ _ P 0 7
178 10110010 1 0 1 1 0 0 1 0 P _ P P _ _ P _ 1 6
179 10110011 1 0 1 1 0 0 1 1 P _ P P _ _ P P 0 7
180 10110100 1 0 1 1 0 1 0 0 P _ P P _ P _ _ 2 5
181 10110101 1 0 1 1 0 1 0 1 P _ P P _ P _ P 0 7
182 10110110 1 0 1 1 0 1 1 0 P _ P P _ P P _ 1 6
183 10110111 1 0 1 1 0 1 1 1 P _ P P _ P P P 0 7
184 10111000 1 0 1 1 1 0 0 0 P _ P P P _ _ _ 3 4
185 10111001 1 0 1 1 1 0 0 1 P _ P P P _ _ P 0 7
186 10111010 1 0 1 1 1 0 1 0 P _ P P P _ P _ 1 6
187 10111011 1 0 1 1 1 0 1 1 P _ P P P _ P P 0 7
188 10111100 1 0 1 1 1 1 0 0 P _ P P P P _ _ 2 5
189 10111101 1 0 1 1 1 1 0 1 P _ P P P P _ P 0 7
190 10111110 1 0 1 1 1 1 1 0 P _ P P P P P _ 1 6
191 10111111 1 0 1 1 1 1 1 1 P _ P P P P P P 0 7
192 11000000 1 1 0 0 0 0 0 0 P P _ _ _ _ _ _ 6 1
193 11000001 1 1 0 0 0 0 0 1 P P _ _ _ _ _ P 0 7
194 11000010 1 1 0 0 0 0 1 0 P P _ _ _ _ P _ 1 6
195 11000011 1 1 0 0 0 0 1 1 P P _ _ _ _ P P 0 7
196 11000100 1 1 0 0 0 1 0 0 P P _ _ _ P _ _ 2 5
197 11000101 1 1 0 0 0 1 0 1 P P _ _ _ P _ P 0 7
198 11000110 1 1 0 0 0 1 1 0 P P _ _ _ P P _ 1 6
199 11000111 1 1 0 0 0 1 1 1 P P _ _ _ P P P 0 7
200 11001000 1 1 0 0 1 0 0 0 P P _ _ P _ _ _ 3 4
201 11001001 1 1 0 0 1 0 0 1 P P _ _ P _ _ P 0 7
202 11001010 1 1 0 0 1 0 1 0 P P _ _ P _ P _ 1 6
203 11001011 1 1 0 0 1 0 1 1 P P _ _ P _ P P 0 7
204 11001100 1 1 0 0 1 1 0 0 P P _ _ P P _ _ 2 5
205 11001101 1 1 0 0 1 1 0 1 P P _ _ P P _ P 0 7
206 11001110 1 1 0 0 1 1 1 0 P P _ _ P P P _ 1 6
207 11001111 1 1 0 0 1 1 1 1 P P _ _ P P P P 0 7
208 11010000 1 1 0 1 0 0 0 0 P P _ P _ _ _ _ 4 3
209 11010001 1 1 0 1 0 0 0 1 P P _ P _ _ _ P 0 7
210 11010010 1 1 0 1 0 0 1 0 P P _ P _ _ P _ 1 6
211 11010011 1 1 0 1 0 0 1 1 P P _ P _ _ P P 0 7
212 11010100 1 1 0 1 0 1 0 0 P P _ P _ P _ _ 2 5
213 11010101 1 1 0 1 0 1 0 1 P P _ P _ P _ P 0 7
214 11010110 1 1 0 1 0 1 1 0 P P _ P _ P P _ 1 6
215 11010111 1 1 0 1 0 1 1 1 P P _ P _ P P P 0 7
216 11011000 1 1 0 1 1 0 0 0 P P _ P P _ _ _ 3 4
217 11011001 1 1 0 1 1 0 0 1 P P _ P P _ _ P 0 7
218 11011010 1 1 0 1 1 0 1 0 P P _ P P _ P _ 1 6
219 11011011 1 1 0 1 1 0 1 1 P P _ P P _ P P 0 7
220 11011100 1 1 0 1 1 1 0 0 P P _ P P P _ _ 2 5
221 11011101 1 1 0 1 1 1 0 1 P P _ P P P _ P 0 7
222 11011110 1 1 0 1 1 1 1 0 P P _ P P P P _ 1 6
223 11011111 1 1 0 1 1 1 1 1 P P _ P P P P P 0 7
224 11100000 1 1 1 0 0 0 0 0 P P P _ _ _ _ _ 5 2
225 11100001 1 1 1 0 0 0 0 1 P P P _ _ _ _ P 0 7
226 11100010 1 1 1 0 0 0 1 0 P P P _ _ _ P _ 1 6
227 11100011 1 1 1 0 0 0 1 1 P P P _ _ _ P P 0 7
228 11100100 1 1 1 0 0 1 0 0 P P P _ _ P _ _ 2 5
229 11100101 1 1 1 0 0 1 0 1 P P P _ _ P _ P 0 7
230 11100110 1 1 1 0 0 1 1 0 P P P _ _ P P _ 1 6
231 11100111 1 1 1 0 0 1 1 1 P P P _ _ P P P 0 7
232 11101000 1 1 1 0 1 0 0 0 P P P _ P _ _ _ 3 4
233 11101001 1 1 1 0 1 0 0 1 P P P _ P _ _ P 0 7
234 11101010 1 1 1 0 1 0 1 0 P P P _ P _ P _ 1 6
235 11101011 1 1 1 0 1 0 1 1 P P P _ P _ P P 0 7
236 11101100 1 1 1 0 1 1 0 0 P P P _ P P _ _ 2 5
237 11101101 1 1 1 0 1 1 0 1 P P P _ P P _ P 0 7
238 11101110 1 1 1 0 1 1 1 0 P P P _ P P P _ 1 6
239 11101111 1 1 1 0 1 1 1 1 P P P _ P P P P 0 7
240 11110000 1 1 1 1 0 0 0 0 P P P P _ _ _ _ 4 3
241 11110001 1 1 1 1 0 0 0 1 P P P P _ _ _ P 0 7
242 11110010 1 1 1 1 0 0 1 0 P P P P _ _ P _ 1 6
243 11110011 1 1 1 1 0 0 1 1 P P P P _ _ P P 0 7
244 11110100 1 1 1 1 0 1 0 0 P P P P _ P _ _ 2 5
245 11110101 1 1 1 1 0 1 0 1 P P P P _ P _ P 0 7
246 11110110 1 1 1 1 0 1 1 0 P P P P _ P P _ 1 6
247 11110111 1 1 1 1 0 1 1 1 P P P P _ P P P 0 7
248 11111000 1 1 1 1 1 0 0 0 P P P P P _ _ _ 3 4
249 11111001 1 1 1 1 1 0 0 1 P P P P P _ _ P 0 7
250 11111010 1 1 1 1 1 0 1 0 P P P P P _ P _ 1 6
251 11111011 1 1 1 1 1 0 1 1 P P P P P _ P P 0 7
252 11111100 1 1 1 1 1 1 0 0 P P P P P P _ _ 2 5
253 11111101 1 1 1 1 1 1 0 1 P P P P P P _ P 0 7
254 11111110 1 1 1 1 1 1 1 0 P P P P P P P _ 1 6
255 11111111 1 1 1 1 1 1 1 1 P P P P P P P P 0 7
Here are all the possibilities.
;_________________________1_________2
;______________01234567|89012345|67890123|45
;______________01234567|01234567|01234567|01
szString Byte "Ordinate|ur de Ph|ilippe R|IO"
;______________00000000|00000010|0
;_____________________________10|000000xx|
You maybe want to read this
http://stackoverflow.com/questions/3183582/what-is-the-fastest-substring-search-algorithm
http://www.sciencedirect.com/science/article/pii/S1570866706001067
About KMP, i port it some years ago, but i don´t remember on which file i used to test. At the time it was faster then BM.
Quote from: guga on January 04, 2016, 03:42:37 AM
You maybe want to read this
http://stackoverflow.com/questions/3183582/what-is-the-fastest-substring-search-algorithm
http://www.sciencedirect.com/science/article/pii/S1570866706001067
About KMP, i port it some years ago, but i don´t remember on which file i used to test. At the time it was faster then BM.
Lots of theory, no DLL to download and compare 8)
We've beaten Instr() to death, see e.g.
http://masm32.com/board/index.php?topic=4631.msg50092#msg50092
http://masm32.com/board/index.php?topic=2998.msg31226#msg31226
Thanks for the articles I go to read.
Quote
C:\Users\Grincheux\Downloads\InstrTimings0803>InstrTimings
AMD Athlon(tm) II X2 250 Processor (SSE3)
++++++++++++++++++++
Testing if [comment * -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=...] contains TXFS_RM_FLAG_DO_NOT_RESET_RM_AT_NEXT_START
30383 kCycles for 3 * MB Instr_
46519 kCycles for 3 * InString
45985 kCycles for 3 * crt_strstr
21128 kCycles for 3 * BMHBinSearch, using len
8158 kCycles for 3 * BMHBinsearch, known length
10222 kCycles for 3 * InstrJJ
32337 kCycles for 3 * InstrFicko
4684 kCycles for 3 * RevInstrFicko
4775 kCycles for 3 * MB Rinstr
30354 kCycles for 3 * MB Instr_
46375 kCycles for 3 * InString
46558 kCycles for 3 * crt_strstr
22403 kCycles for 3 * BMHBinSearch, using len
8081 kCycles for 3 * BMHBinsearch, known length
10132 kCycles for 3 * InstrJJ
32249 kCycles for 3 * InstrFicko
4988 kCycles for 3 * RevInstrFicko
4812 kCycles for 3 * MB Rinstr
2026935 = eax MB Instr_
2026935 = eax InString
2026935 = eax crt_strstr
2026935 = eax BMHBinSearch, using len
2026935 = eax BMHBinsearch, known length
2026935 = eax InstrJJ
2026935 = eax InstrFicko
2026935 = eax RevInstrFicko
2026935 = eax MB Rinstr
Testing if [comment * -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=...] contains TXFS_
269 kCycles for 3 * MB Instr_
7496 kCycles for 3 * InString
425 kCycles for 3 * crt_strstr
11442 kCycles for 3 * BMHBinSearch, using len
129 kCycles for 3 * BMHBinsearch, known length
58 kCycles for 3 * InstrJJ
4858 kCycles for 3 * InstrFicko
4868 kCycles for 3 * RevInstrFicko
4996 kCycles for 3 * MB Rinstr
251 kCycles for 3 * MB Instr_
7559 kCycles for 3 * InString
376 kCycles for 3 * crt_strstr
11574 kCycles for 3 * BMHBinSearch, using len
128 kCycles for 3 * BMHBinsearch, known length
58 kCycles for 3 * InstrJJ
4608 kCycles for 3 * InstrFicko
4736 kCycles for 3 * RevInstrFicko
4699 kCycles for 3 * MB Rinstr
18057 = eax MB Instr_
18057 = eax InString
18057 = eax crt_strstr
18057 = eax BMHBinSearch, using len
18057 = eax BMHBinsearch, known length
18057 = eax InstrJJ
18057 = eax InstrFicko
2028751 = eax RevInstrFicko
2028751 = eax MB Rinstr
Testing if [comment * -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=...] contains No such string
32500 kCycles for 3 * MB Instr_
45404 kCycles for 3 * InString
45875 kCycles for 3 * crt_strstr
31196 kCycles for 3 * BMHBinSearch, using len
18401 kCycles for 3 * BMHBinsearch, known length
9474 kCycles for 3 * InstrJJ
30341 kCycles for 3 * InstrFicko
21088 kCycles for 3 * RevInstrFicko
32649 kCycles for 3 * MB Rinstr
32691 kCycles for 3 * MB Instr_
45563 kCycles for 3 * InString
45542 kCycles for 3 * crt_strstr
32065 kCycles for 3 * BMHBinSearch, using len
18438 kCycles for 3 * BMHBinsearch, known length
9485 kCycles for 3 * InstrJJ
30437 kCycles for 3 * InstrFicko
20919 kCycles for 3 * RevInstrFicko
32988 kCycles for 3 * MB Rinstr
0 = eax MB Instr_
0 = eax InString
0 = eax crt_strstr
0 = eax BMHBinSearch, using len
0 = eax BMHBinsearch, known length
0 = eax InstrJJ
0 = eax InstrFicko
0 = eax RevInstrFicko
0 = eax MB Rinstr
Testing if [This is a simple string which has at the...] contains Dupli
3235 kCycles for 10000 * MB Instr_
4160 kCycles for 10000 * InString
3738 kCycles for 10000 * crt_strstr
5507 kCycles for 10000 * BMHBinSearch, using len
3556 kCycles for 10000 * BMHBinsearch, known length
973 kCycles for 10000 * InstrJJ
2938 kCycles for 10000 * InstrFicko
1634 kCycles for 10000 * RevInstrFicko
1422 kCycles for 10000 * MB Rinstr
3193 kCycles for 10000 * MB Instr_
4206 kCycles for 10000 * InString
4026 kCycles for 10000 * crt_strstr
5064 kCycles for 10000 * BMHBinSearch, using len
3709 kCycles for 10000 * BMHBinsearch, known length
742 kCycles for 10000 * InstrJJ
2873 kCycles for 10000 * InstrFicko
1296 kCycles for 10000 * RevInstrFicko
1168 kCycles for 10000 * MB Rinstr
70 = eax MB Instr_
70 = eax InString
70 = eax crt_strstr
70 = eax BMHBinSearch, using len
70 = eax BMHBinsearch, known length
70 = eax InstrJJ
70 = eax InstrFicko
70 = eax RevInstrFicko
70 = eax MB Rinstr
--- ok ---
I adopt JJ2007's string search. I don't understand what it does but it seems to to quicker.
I have downloaded the articles you indicate, when I see how short the function (in C) are and how long JJ's algo is, I am not sure they are very good.
If Guga could test these codes with its profiler that would be interesting.
I will test them :t.
But before, i need to finish a few more functions to include on the library. I need to make sure the updates i´m making did not break anything :greensml:
I want to use JJ2007's function so I insert his code from the zip, the program crashes. I make no call to his function. Just include the source!
.MMX
.XMM
; -------------------------------------------------------------------------------------------------------------
.Code
; -------------------------------------------------------------------------------------------------------------
; ==================================================================================
ALIGN 4
; ==================================================================================
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
UseLodsb= 1 ; strangely enough, lodsb is faster on Celeron M and AMD Athlon (>10%)
UsePushad = 0 ; 1=6 bytes less, no speed difference on P4
align 16
TestF_s:
InstrJJ proc StartPos:DWORD, lpSource:DWORD, lpPattern:DWORD, sMode:DWORD ; TestF proc
; sMode is reserved for a future implementation of a "ignore case of first character" mode
if UsePushad
EspOff equ esp+8*4
pushad
else
EspOff equ esp+6*4
push esi
push edi
push ebx ; all registers preserved, except eax = return value
push ebp ; pushad/popad would cost overall ca. 12 cycles on P4
push ecx
push edx
endif
mov edx, [EspOff+3*4] ; lpPattern
movzx ecx, byte ptr [edx] ; 3 cycles to fill xmm0 with first byte
mov edi, [edx+1] ; next 4 bytes of pattern
imul ecx, 01010101h ; propagate lobyte
movd xmm0, ecx
pshufd xmm0, xmm0, 0 ; xmm0 holds first byte of pattern
mov eax, edi
or ebx, -1
.if !al ; 7917, 786, 305b
inc ebx ; clr ebx, pattern byte 2 is zero
mov edi, ebx ; clear edi
.elseif !ah
movzx ebx, bl ; byte 3 is zero (= and ebx, 0ffh)
and edi, ebx
.else
bswap eax ; one byte shorter, and a bit faster than shr eax, 16
test ah, ah ; must come first; if not zero, test new al (highest byte before swap)
.if Zero?
movzx ebx, bx ; byte 4 is zero (= and ebx, 0ffffh)
.else
test al, al ; highest byte before swap
.if Zero?
shr ebx, 8 ; byte 5 is zero (= ebx, 0ffffffh but faster and shorter)
.endif
.endif
.endif
mov esi, [EspOff+2*4] ; lpSource
and edi, ebx ; apply mask for pattern bytes 2-5
test esi, 15 ; aligned?
je L0 ; if yes, clear ebp and go directly into the main loop
lea ebp, [esi+16] ; save unaligned address+16
movdqu xmm2, [esi] ; load 16 bytes from current unaligned address
and esi, -16 ; align esi downwards
jmp Lu
Repeat 0 ; 0 is fastest
nop ; 4=1666, 0=1672
Endm
L0: xor ebp, ebp ; ----------------------------- clear the alignment flag -----------------------------------------
L1: movdqa xmm2, [esi]
Lu: movdqa xmm1, xmm0 ; xmm0 has first byte of pattern
lea esi, [esi+16] ; position in mainstring (moving up/down lea or add costs cycles; add esi, 16 is 5 cycles slower on CM, 8 on P4)
pcmpeqb xmm1, xmm2 ; compare memory to pattern byte
pmovmskb edx, xmm1 ; set byte mask in edx for search pattern byte
if 1 ; for testing only - don't use zero if there is no match
pcmpeqb xmm1, xmm2 ; look for zero byte (xmm1 is either 0 or ff)
pmovmskb eax, xmm1 ; set byte mask in eax for zero delimiter byte
test eax, eax
jne ZBF ; zero byte found? check/clear ebp, then ChkNull
endif
REPEAT 0 ; unrolling has negative effects
test edx, edx
jne @F ; pattern byte found? no=go back, yes=repeat the exercise with a word
movdqa xmm2, [esi]
movdqa xmm1, xmm0 ; xmm0 has first byte of pattern
lea esi, [esi+16] ; position in mainstring (moving up/down lea or add costs cycles; add esi, 16 is 5 cycles slower on CM, 8 on P4)
pcmpeqb xmm1, xmm2 ; compare memory to pattern byte
pmovmskb edx, xmm1 ; set byte mask in edx for search pattern byte
pcmpeqb xmm1, xmm2 ; look for zero byte (xmm1 is either 0 or ff)
pmovmskb eax, xmm1 ; set byte mask in eax for zero delimiter byte
test eax, eax ; db 02eh ; branch hint: not taken, no effect
jne ZBF ; zero byte found? check/clear ebp, then ChkNull
ENDM
test edx, edx
je L1 ; pattern byte found? no=go back, yes=repeat the exercise with a word
@@: test ebp, ebp ; 0=aligned from the beginning, or at least second loop
jne EbpChk ; rare case, once per algo and only if Mainstring is unaligned - so try not to branch
@@:
REPEAT 3 ; 4 same as 3, 1+5 worse: 1=+300, 2=+30
bsf ecx, edx ; ----------------------------- bit scan for the index ----------------------------------------------------
btr edx, ecx ; clear bit ecx in edx (no add ecx, ebp here: 0.5*171=+85 cycles)
mov eax, [esi+ecx-14-1] ; first unaligned chunk contains match; this loop 171 times for TestSubX
and eax, ebx ; apply mask
cmp eax, edi
je FoundPattern
test edx, edx
je L1 ; mysteriously, jmp L0 was consistently ca. 10 cycles faster than jmp L1...
ENDM
jmp @B ; 0=no more hits in these 16 bytes, go back searching (this order is here a lot faster)
EbpChk:
sub ebp, esi ; ebp>0, might be first unaligned loop
.if Sign?
xor ebp, ebp ; ebp<esi: we are at least in the second loop, i.e. 16-byte aligned
.endif
@@: bsf ecx, edx ; bit scan with misalignment offset in ebp -----------------------
btr edx, ecx ; clear bit ecx in edx
add ecx, ebp ; ebp is the misalignment offset, needed for the whole bitscan loop
mov eax, [esi+ecx-15] ; first unaligned chunk contains match
and eax, ebx ; apply mask
cmp eax, edi
je FoundPatternSubEcx ; corrects increased ecx
BadLuck:
test edx, edx
je L0 ; unaligned bit scan end, time to clear ebp in L0 -------------
jmp @B ; first character was OK but next one wasn't, so do the bsf
ZBF: sub ebp, esi ; ebp>0, might be first unaligned loop
jns ChkNull
xor ebp, ebp ; ebp<esi: we are at least in the second loop, i.e. 16-byte aligned
ChkNull: bsf ebx, eax ; nullbyte index to ebx; this branch only if a nullbyte was found in Mainstring
xor eax, eax ; default: 0=no match
test edx, edx ; edx holds pattern index
je NoMatch
bsf ecx, edx ; pattern word index in ecx
cmp ebx, ecx ; null before pattern word: get outta here
jb NoMatch
add ecx, ebp ; jmp FoundPattern is slower (same size)
; 0 16 32 48 51
;SmSrcU db "123456789012a4X67890?2345b78901234X67890c234567890X6789012345", 0 ;, "X67890"
;SmPat7 db "X678901", 0
FoundPatternSubEcx:
sub ecx, ebp
FoundPattern: ; we need to check the rest of the string here
cmp edi, 0ffffffh ; 3 bytes = 5-byte pattern, OK
jbe Match
push esi
push ebx
lea ebx, [esi+ecx-16+4] ; Mainstring: 1 byte covered by inner loop, 4 by edi, so we check from byte 6 onwards
mov esi, [EspOff+3*4+4+4] ; Pattern
add ebx, ebp
add esi, 5
@@:
if UseLodsb
lodsb ; sometimes a few cycles faster (but difficult to prove)
test al, al ; this cannot be shifted below because of the rare case of equality after the zero delimiter:
je @F ; db "Mainstr", 0, "abc" ... db "str", 0, "abx" would crash
else
movzx eax, byte ptr [esi] ; esi=Substr (mov al is a few cycles slower)
test eax, eax
je @F
inc esi
endif
inc ebx
cmp al, [ebx] ; ebx=Mainstr
je @B
@@: pop ebx
pop esi
jne BadLuck ; test al, al not needed, flags still valid
Match: sub esi, [EspOff+2*4] ; lpSource: subtract original src pointer
lea eax, [esi+ecx-15] ; and adjust for the index
add eax, ebp ; ebp = offset in first unaligned chunk
NoMatch:
if UsePushad
mov [esp+32-4], eax ; popped eax will contain current eax
popad
else
pop edx ; 6 registers
pop ecx
pop ebp
pop ebx
pop edi
pop esi
endif
ret 4*DWORD ; 4 arguments
InstrJJ endp
Itested with pushad = 0 and 1
I appreciate some help
Have a good day/night
Quote from: Grincheux on January 04, 2016, 05:32:26 PM
I want to use JJ2007's function so I insert his code from the zip, the program crashes. I make no call to his function. Just include the source!
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
...
ret 4*DWORD ; 4 arguments
InstrJJ endp
Mysteries of Masm ;)
here is he source I use
.MMX
.XMM
; -------------------------------------------------------------------------------------------------------------
.Code
; -------------------------------------------------------------------------------------------------------------
; ==================================================================================
ALIGN 4
; ==================================================================================
OPTION PROLOGUE:NONE
OPTION EPILOGUE:NONE
UseLodsb= 1 ; strangely enough, lodsb is faster on Celeron M and AMD Athlon (>10%)
UsePushad = 0 ; 1=6 bytes less, no speed difference on P4
align 16
TestF_s:
InstrJJ proc StartPos:DWORD, lpSource:DWORD, lpPattern:DWORD, sMode:DWORD ; TestF proc
; sMode is reserved for a future implementation of a "ignore case of first character" mode
if UsePushad
EspOff equ esp+8*4
pushad
else
EspOff equ esp+6*4
push esi
push edi
push ebx ; all registers preserved, except eax = return value
push ebp ; pushad/popad would cost overall ca. 12 cycles on P4
push ecx
push edx
endif
mov edx, [EspOff+3*4] ; lpPattern
movzx ecx, byte ptr [edx] ; 3 cycles to fill xmm0 with first byte
mov edi, [edx+1] ; next 4 bytes of pattern
imul ecx, 01010101h ; propagate lobyte
movd xmm0, ecx
pshufd xmm0, xmm0, 0 ; xmm0 holds first byte of pattern
mov eax, edi
or ebx, -1
.if !al ; 7917, 786, 305b
inc ebx ; clr ebx, pattern byte 2 is zero
mov edi, ebx ; clear edi
.elseif !ah
movzx ebx, bl ; byte 3 is zero (= and ebx, 0ffh)
and edi, ebx
.else
bswap eax ; one byte shorter, and a bit faster than shr eax, 16
test ah, ah ; must come first; if not zero, test new al (highest byte before swap)
.if Zero?
movzx ebx, bx ; byte 4 is zero (= and ebx, 0ffffh)
.else
test al, al ; highest byte before swap
.if Zero?
shr ebx, 8 ; byte 5 is zero (= ebx, 0ffffffh but faster and shorter)
.endif
.endif
.endif
mov esi, [EspOff+2*4] ; lpSource
and edi, ebx ; apply mask for pattern bytes 2-5
test esi, 15 ; aligned?
je L0 ; if yes, clear ebp and go directly into the main loop
lea ebp, [esi+16] ; save unaligned address+16
movdqu xmm2, [esi] ; load 16 bytes from current unaligned address
and esi, -16 ; align esi downwards
jmp Lu
Repeat 0 ; 0 is fastest
nop ; 4=1666, 0=1672
Endm
L0: xor ebp, ebp ; ----------------------------- clear the alignment flag -----------------------------------------
L1: movdqa xmm2, [esi]
Lu: movdqa xmm1, xmm0 ; xmm0 has first byte of pattern
lea esi, [esi+16] ; position in mainstring (moving up/down lea or add costs cycles; add esi, 16 is 5 cycles slower on CM, 8 on P4)
pcmpeqb xmm1, xmm2 ; compare memory to pattern byte
pmovmskb edx, xmm1 ; set byte mask in edx for search pattern byte
if 1 ; for testing only - don't use zero if there is no match
pcmpeqb xmm1, xmm2 ; look for zero byte (xmm1 is either 0 or ff)
pmovmskb eax, xmm1 ; set byte mask in eax for zero delimiter byte
test eax, eax
jne ZBF ; zero byte found? check/clear ebp, then ChkNull
endif
REPEAT 0 ; unrolling has negative effects
test edx, edx
jne @F ; pattern byte found? no=go back, yes=repeat the exercise with a word
movdqa xmm2, [esi]
movdqa xmm1, xmm0 ; xmm0 has first byte of pattern
lea esi, [esi+16] ; position in mainstring (moving up/down lea or add costs cycles; add esi, 16 is 5 cycles slower on CM, 8 on P4)
pcmpeqb xmm1, xmm2 ; compare memory to pattern byte
pmovmskb edx, xmm1 ; set byte mask in edx for search pattern byte
pcmpeqb xmm1, xmm2 ; look for zero byte (xmm1 is either 0 or ff)
pmovmskb eax, xmm1 ; set byte mask in eax for zero delimiter byte
test eax, eax ; db 02eh ; branch hint: not taken, no effect
jne ZBF ; zero byte found? check/clear ebp, then ChkNull
ENDM
test edx, edx
je L1 ; pattern byte found? no=go back, yes=repeat the exercise with a word
@@: test ebp, ebp ; 0=aligned from the beginning, or at least second loop
jne EbpChk ; rare case, once per algo and only if Mainstring is unaligned - so try not to branch
@@:
REPEAT 3 ; 4 same as 3, 1+5 worse: 1=+300, 2=+30
bsf ecx, edx ; ----------------------------- bit scan for the index ----------------------------------------------------
btr edx, ecx ; clear bit ecx in edx (no add ecx, ebp here: 0.5*171=+85 cycles)
mov eax, [esi+ecx-14-1] ; first unaligned chunk contains match; this loop 171 times for TestSubX
and eax, ebx ; apply mask
cmp eax, edi
je FoundPattern
test edx, edx
je L1 ; mysteriously, jmp L0 was consistently ca. 10 cycles faster than jmp L1...
ENDM
jmp @B ; 0=no more hits in these 16 bytes, go back searching (this order is here a lot faster)
EbpChk:
sub ebp, esi ; ebp>0, might be first unaligned loop
.if Sign?
xor ebp, ebp ; ebp<esi: we are at least in the second loop, i.e. 16-byte aligned
.endif
@@: bsf ecx, edx ; bit scan with misalignment offset in ebp -----------------------
btr edx, ecx ; clear bit ecx in edx
add ecx, ebp ; ebp is the misalignment offset, needed for the whole bitscan loop
mov eax, [esi+ecx-15] ; first unaligned chunk contains match
and eax, ebx ; apply mask
cmp eax, edi
je FoundPatternSubEcx ; corrects increased ecx
BadLuck:
test edx, edx
je L0 ; unaligned bit scan end, time to clear ebp in L0 -------------
jmp @B ; first character was OK but next one wasn't, so do the bsf
ZBF: sub ebp, esi ; ebp>0, might be first unaligned loop
jns ChkNull
xor ebp, ebp ; ebp<esi: we are at least in the second loop, i.e. 16-byte aligned
ChkNull: bsf ebx, eax ; nullbyte index to ebx; this branch only if a nullbyte was found in Mainstring
xor eax, eax ; default: 0=no match
test edx, edx ; edx holds pattern index
je NoMatch
bsf ecx, edx ; pattern word index in ecx
cmp ebx, ecx ; null before pattern word: get outta here
jb NoMatch
add ecx, ebp ; jmp FoundPattern is slower (same size)
; 0 16 32 48 51
;SmSrcU db "123456789012a4X67890?2345b78901234X67890c234567890X6789012345", 0 ;, "X67890"
;SmPat7 db "X678901", 0
FoundPatternSubEcx:
sub ecx, ebp
FoundPattern: ; we need to check the rest of the string here
cmp edi, 0ffffffh ; 3 bytes = 5-byte pattern, OK
jbe Match
push esi
push ebx
lea ebx, [esi+ecx-16+4] ; Mainstring: 1 byte covered by inner loop, 4 by edi, so we check from byte 6 onwards
mov esi, [EspOff+3*4+4+4] ; Pattern
add ebx, ebp
add esi, 5
@@:
if UseLodsb
lodsb ; sometimes a few cycles faster (but difficult to prove)
test al, al ; this cannot be shifted below because of the rare case of equality after the zero delimiter:
je @F ; db "Mainstr", 0, "abc" ... db "str", 0, "abx" would crash
else
movzx eax, byte ptr [esi] ; esi=Substr (mov al is a few cycles slower)
test eax, eax
je @F
inc esi
endif
inc ebx
cmp al, [ebx] ; ebx=Mainstr
je @B
@@: pop ebx
pop esi
jne BadLuck ; test al, al not needed, flags still valid
Match: sub esi, [EspOff+2*4] ; lpSource: subtract original src pointer
lea eax, [esi+ecx-15] ; and adjust for the index
add eax, ebp ; ebp = offset in first unaligned chunk
NoMatch:
if UsePushad
mov [esp+32-4], eax ; popped eax will contain current eax
popad
else
pop edx ; 6 registers
pop ecx
pop ebp
pop ebx
pop edi
pop esi
endif
ret 4*DWORD ; 4 arguments
InstrJJ endp
But that does not explain why the program crashes without calling the function!
Probably because it is set to use no prologue or epilogues, which JJ has highlighted in his post in red. And if you just included the file early on in your own project all your other functions will assume that behaviour as well and thus crash.
Prob best restore them with
OPTION PROLOGUE:PrologueDef
OPTION EPILOGUE:EpilogueDef
at the end of the file your have included (After the ENDP)
I think that is the answer. Thx