… | |
… | |
39 | #endif |
39 | #endif |
40 | } |
40 | } |
41 | |
41 | |
42 | =item ecb_noinline |
42 | =item ecb_noinline |
43 | |
43 | |
44 | Prevent a function from being inlined - it might be optimsied away, but |
44 | Prevent a function from being inlined - it might be optimised away, but |
45 | not inlined into other functions. This is useful if you know your function |
45 | not inlined into other functions. This is useful if you know your function |
46 | is rarely called and large enough for inlining not to be helpful. |
46 | is rarely called and large enough for inlining not to be helpful. |
47 | |
47 | |
48 | =item ecb_noreturn |
48 | =item ecb_noreturn |
49 | |
49 | |
… | |
… | |
61 | |
61 | |
62 | =head2 OPTIMISATION HINTS |
62 | =head2 OPTIMISATION HINTS |
63 | |
63 | |
64 | =over 4 |
64 | =over 4 |
65 | |
65 | |
66 | =item bool ecb_is_constant(expr) |
66 | =item bool ecb_is_constant(expr) [MACRO] |
67 | |
67 | |
68 | Returns true iff the expression can be deduced to be a compile-time |
68 | Returns true iff the expression can be deduced to be a compile-time |
69 | constant, and false otherwise. |
69 | constant, and false otherwise. |
70 | |
70 | |
71 | For example, when you have a C<rndm16> function that returns a 16 bit |
71 | For example, when you have a C<rndm16> function that returns a 16 bit |
… | |
… | |
89 | return is_constant (n) && !(n & (n - 1)) |
89 | return is_constant (n) && !(n & (n - 1)) |
90 | ? rndm16 () & (num - 1) |
90 | ? rndm16 () & (num - 1) |
91 | : (n * (uint32_t)rndm16 ()) >> 16; |
91 | : (n * (uint32_t)rndm16 ()) >> 16; |
92 | } |
92 | } |
93 | |
93 | |
94 | =item bool ecb_expect (expr, value) |
94 | =item bool ecb_expect (expr, value) [MACRO] |
95 | |
95 | |
96 | Evaluates C<expr> and returns it. In addition, it tells the compiler that |
96 | Evaluates C<expr> and returns it. In addition, it tells the compiler that |
97 | the C<expr> evaluates to C<value> a lot, which can be used for static |
97 | the C<expr> evaluates to C<value> a lot, which can be used for static |
98 | branch optimisations. |
98 | branch optimisations. |
99 | |
99 | |
100 | Usually, you want to use the more intuitive C<ecb_likely> and |
100 | Usually, you want to use the more intuitive C<ecb_likely> and |
101 | C<ecb_unlikely> functions instead. |
101 | C<ecb_unlikely> functions instead. |
102 | |
102 | |
103 | =item bool ecb_likely (bool) |
103 | =item bool ecb_likely (bool) [MACRO] |
104 | |
104 | |
105 | =item bool ecb_unlikely (bool) |
105 | =item bool ecb_unlikely (bool) [MACRO] |
106 | |
106 | |
107 | These two functions expect a expression that is true or false and return |
107 | These two functions expect a expression that is true or false and return |
108 | C<1> or C<0>, respectively, so when used in the condition of an C<if> or |
108 | C<1> or C<0>, respectively, so when used in the condition of an C<if> or |
109 | other conditional statement, it will not change the program: |
109 | other conditional statement, it will not change the program: |
110 | |
110 | |
111 | /* these two do the same thing */ |
111 | /* these two do the same thing */ |
112 | if (some_condition) ...; |
112 | if (some_condition) ...; |
113 | if (ecb_likely (some_condition)) ...; |
113 | if (ecb_likely (some_condition)) ...; |
114 | |
114 | |
115 | However, by using C<ecb_likely>, you tell the compiler that the condition |
115 | However, by using C<ecb_likely>, you tell the compiler that the condition |
116 | is likely to be true (and for C<ecb_unlikel>, that it is unlikely to be |
116 | is likely to be true (and for C<ecb_unlikely>, that it is unlikely to be |
117 | true). |
117 | true). |
118 | |
118 | |
119 | For example, when you check for a 0-ptr and expect this to be a rare, |
119 | For example, when you check for a null pointer and expect this to be a |
120 | exceptional, case, then use C<ecb_unlikely>: |
120 | rare, exceptional, case, then use C<ecb_unlikely>: |
121 | |
121 | |
122 | void my_free (void *ptr) |
122 | void my_free (void *ptr) |
123 | { |
123 | { |
124 | if (ecb_unlikely (ptr == 0)) |
124 | if (ecb_unlikely (ptr == 0)) |
125 | return; |
125 | return; |
… | |
… | |
129 | tell the compiler what the hot path through a function is can increase |
129 | tell the compiler what the hot path through a function is can increase |
130 | performance considerably. |
130 | performance considerably. |
131 | |
131 | |
132 | A very good example is in a function that reserves more space for some |
132 | A very good example is in a function that reserves more space for some |
133 | memory block (for example, inside an implementation of a string stream) - |
133 | memory block (for example, inside an implementation of a string stream) - |
134 | eahc time something is added, you have to check for a buffer overrun, but |
134 | each time something is added, you have to check for a buffer overrun, but |
135 | you expect that most checks will turn out to be false: |
135 | you expect that most checks will turn out to be false: |
136 | |
136 | |
137 | /* make sure we have "size" extra room in our buffer */ |
137 | /* make sure we have "size" extra room in our buffer */ |
138 | ecb_inline void |
138 | ecb_inline void |
139 | reserve (int size) |
139 | reserve (int size) |
140 | { |
140 | { |
141 | if (ecb_unlikely (current + size > end)) |
141 | if (ecb_unlikely (current + size > end)) |
142 | real_reserve_method (size); /* presumably noinline */ |
142 | real_reserve_method (size); /* presumably noinline */ |
143 | } |
143 | } |
144 | |
144 | |
145 | =item bool ecb_assume (cond) |
145 | =item bool ecb_assume (cond) [MACRO] |
146 | |
146 | |
147 | Try to tell the compiler that some condition is true, even if it's not |
147 | Try to tell the compiler that some condition is true, even if it's not |
148 | obvious. |
148 | obvious. |
149 | |
149 | |
150 | This can be used to teach the compiler about invariants or other |
150 | This can be used to teach the compiler about invariants or other |
… | |
… | |
173 | call will never be executed. |
173 | call will never be executed. |
174 | |
174 | |
175 | =item bool ecb_unreachable () |
175 | =item bool ecb_unreachable () |
176 | |
176 | |
177 | This function does nothing itself, except tell the compiler that it will |
177 | This function does nothing itself, except tell the compiler that it will |
178 | never be executed. Apart from supressing a warning in some cases, this |
178 | never be executed. Apart from suppressing a warning in some cases, this |
179 | function can be used to implement C<ecb_assume> or similar functions. |
179 | function can be used to implement C<ecb_assume> or similar functions. |
180 | |
180 | |
181 | =item bool ecb_prefetch (addr, rw, locality) |
181 | =item bool ecb_prefetch (addr, rw, locality) [MACRO] |
182 | |
182 | |
183 | Tells the compiler to try to prefetch memory at the given C<addr>ess |
183 | Tells the compiler to try to prefetch memory at the given C<addr>ess |
184 | for either reading (c<rw> = 0) or writing (C<rw> = 1). A C<locality> of |
184 | for either reading (C<rw> = 0) or writing (C<rw> = 1). A C<locality> of |
185 | C<0> means that there will only be one access later, C<3> means that |
185 | C<0> means that there will only be one access later, C<3> means that |
186 | the data will likely be accessed very often, and values in between mean |
186 | the data will likely be accessed very often, and values in between mean |
187 | something... in between. The memory pointed to by the address does not |
187 | something... in between. The memory pointed to by the address does not |
188 | need to be accessible (it could be a null pointer for example), but C<rw> |
188 | need to be accessible (it could be a null pointer for example), but C<rw> |
189 | and C<locality> must be compile-time constants. |
189 | and C<locality> must be compile-time constants. |
190 | |
190 | |
191 | An obvious way to use this is to prefetch some data far away, in a big |
191 | An obvious way to use this is to prefetch some data far away, in a big |
192 | array you loop over. This prefethces memory some 128 array elements later, |
192 | array you loop over. This prefetches memory some 128 array elements later, |
193 | in the hope that it will be ready when the CPU arrives at that location. |
193 | in the hope that it will be ready when the CPU arrives at that location. |
194 | |
194 | |
195 | int sum = 0; |
195 | int sum = 0; |
196 | |
196 | |
197 | for (i = 0; i < N; ++i) |
197 | for (i = 0; i < N; ++i) |
… | |
… | |
222 | |
222 | |
223 | =item bool ecb_big_endian () |
223 | =item bool ecb_big_endian () |
224 | |
224 | |
225 | =item bool ecb_little_endian () |
225 | =item bool ecb_little_endian () |
226 | |
226 | |
|
|
227 | These two functions return true if the byte order is big endian |
|
|
228 | (most-significant byte first) or little endian (least-significant byte |
|
|
229 | first) respectively. |
|
|
230 | |
227 | =item int ecb_ctz32 (uint32_t x) |
231 | =item int ecb_ctz32 (uint32_t x) |
228 | |
232 | |
|
|
233 | Returns the index of the least significant bit set in C<x> (or |
|
|
234 | equivalently the number of bits set to 0 before the least significant |
|
|
235 | bit set), starting from 0. If C<x> is 0 the result is undefined. A |
|
|
236 | common use case is to compute the integer binary logarithm, i.e., |
|
|
237 | floor(log2(n)). For example: |
|
|
238 | |
|
|
239 | ecb_ctz32(3) = 0 |
|
|
240 | ecb_ctz32(6) = 1 |
|
|
241 | |
229 | =item int ecb_popcount32 (uint32_t x) |
242 | =item int ecb_popcount32 (uint32_t x) |
230 | |
243 | |
|
|
244 | Returns the number of bits set to 1 in C<x>. For example: |
|
|
245 | |
|
|
246 | ecb_popcount32(7) = 3 |
|
|
247 | ecb_popcount32(255) = 8 |
|
|
248 | |
|
|
249 | =item uint32_t ecb_bswap16 (uint32_t x) |
|
|
250 | |
231 | =item uint32_t ecb_bswap32 (uint32_t x) |
251 | =item uint32_t ecb_bswap32 (uint32_t x) |
232 | |
252 | |
233 | =item uint32_t ecb_bswap16 (uint32_t x) |
253 | These two functions return the value of the 16-bit (32-bit) variable |
|
|
254 | C<x> after reversing the order of bytes. |
234 | |
255 | |
235 | =item uint32_t ecb_rotr32 (uint32_t x, unsigned int count) |
256 | =item uint32_t ecb_rotr32 (uint32_t x, unsigned int count) |
236 | |
257 | |
237 | =item uint32_t ecb_rotl32 (uint32_t x, unsigned int count) |
258 | =item uint32_t ecb_rotl32 (uint32_t x, unsigned int count) |
238 | |
259 | |
|
|
260 | These two functions return the value of C<x> after shifting all the bits |
|
|
261 | by C<count> positions to the right or left respectively. |
|
|
262 | |
239 | =back |
263 | =back |
240 | |
264 | |
241 | =head2 ARITHMETIC |
265 | =head2 ARITHMETIC |
242 | |
266 | |
243 | =over 4 |
267 | =over 4 |
244 | |
268 | |
245 | =item x = ecb_mod (m, n) [MACRO] |
269 | =item x = ecb_mod (m, n) [MACRO] |
246 | |
270 | |
|
|
271 | Returns the positive remainder of the modulo operation between C<m> |
|
|
272 | and C<n>. |
|
|
273 | |
247 | =back |
274 | =back |
248 | |
275 | |
249 | =head2 UTILITY |
276 | =head2 UTILITY |
250 | |
277 | |
251 | =over 4 |
278 | =over 4 |
252 | |
279 | |
253 | =item ecb_array_length (name) [MACRO] |
280 | =item element_count = ecb_array_length (name) [MACRO] |
254 | |
281 | |
255 | =back |
282 | Returns the number of elements in the array C<name>. For example: |
256 | |
283 | |
|
|
284 | int primes[] = { 2, 3, 5, 7, 11 }; |
|
|
285 | int sum = 0; |
257 | |
286 | |
|
|
287 | for (i = 0; i < ecb_array_length (primes); i++) |
|
|
288 | sum += primes [i]; |
|
|
289 | |
|
|
290 | =back |
|
|
291 | |
|
|
292 | |