Commit | Line | Data |
---|---|---|
80f575fc DM |
1 | |
2 | /* autogenerated from motiondetectorc.orc */ | |
3 | ||
4 | #ifdef HAVE_CONFIG_H | |
5 | #include "config.h" | |
6 | #endif | |
7 | ||
8 | #ifndef _ORC_INTEGER_TYPEDEFS_ | |
9 | #define _ORC_INTEGER_TYPEDEFS_ | |
10 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
11 | #include <stdint.h> | |
12 | typedef int8_t orc_int8; | |
13 | typedef int16_t orc_int16; | |
14 | typedef int32_t orc_int32; | |
15 | typedef int64_t orc_int64; | |
16 | typedef uint8_t orc_uint8; | |
17 | typedef uint16_t orc_uint16; | |
18 | typedef uint32_t orc_uint32; | |
19 | typedef uint64_t orc_uint64; | |
20 | #define ORC_UINT64_C(x) UINT64_C(x) | |
21 | #elif defined(_MSC_VER) | |
22 | typedef signed __int8 orc_int8; | |
23 | typedef signed __int16 orc_int16; | |
24 | typedef signed __int32 orc_int32; | |
25 | typedef signed __int64 orc_int64; | |
26 | typedef unsigned __int8 orc_uint8; | |
27 | typedef unsigned __int16 orc_uint16; | |
28 | typedef unsigned __int32 orc_uint32; | |
29 | typedef unsigned __int64 orc_uint64; | |
30 | #define ORC_UINT64_C(x) (x##Ui64) | |
31 | #define inline __inline | |
32 | #else | |
33 | #include <limits.h> | |
34 | typedef signed char orc_int8; | |
35 | typedef short orc_int16; | |
36 | typedef int orc_int32; | |
37 | typedef unsigned char orc_uint8; | |
38 | typedef unsigned short orc_uint16; | |
39 | typedef unsigned int orc_uint32; | |
40 | #if INT_MAX == LONG_MAX | |
41 | typedef long long orc_int64; | |
42 | typedef unsigned long long orc_uint64; | |
43 | #define ORC_UINT64_C(x) (x##ULL) | |
44 | #else | |
45 | typedef long orc_int64; | |
46 | typedef unsigned long orc_uint64; | |
47 | #define ORC_UINT64_C(x) (x##UL) | |
48 | #endif | |
49 | #endif | |
50 | typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16; | |
51 | typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32; | |
52 | typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64; | |
53 | #endif | |
54 | #ifndef ORC_RESTRICT | |
55 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
56 | #define ORC_RESTRICT restrict | |
57 | #elif defined(__GNUC__) && __GNUC__ >= 4 | |
58 | #define ORC_RESTRICT __restrict__ | |
59 | #else | |
60 | #define ORC_RESTRICT | |
61 | #endif | |
62 | #endif | |
63 | ||
64 | #ifndef DISABLE_ORC | |
65 | #include <orc/orc.h> | |
66 | #endif | |
67 | void image_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m); | |
68 | void image_line_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int n); | |
69 | void image_sum_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int n, int m); | |
70 | void image_variance_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int p2, int n, int m); | |
71 | ||
72 | ||
73 | /* begin Orc C target preamble */ | |
74 | #define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x))) | |
75 | #define ORC_ABS(a) ((a)<0 ? -(a) : (a)) | |
76 | #define ORC_MIN(a,b) ((a)<(b) ? (a) : (b)) | |
77 | #define ORC_MAX(a,b) ((a)>(b) ? (a) : (b)) | |
78 | #define ORC_SB_MAX 127 | |
79 | #define ORC_SB_MIN (-1-ORC_SB_MAX) | |
80 | #define ORC_UB_MAX 255 | |
81 | #define ORC_UB_MIN 0 | |
82 | #define ORC_SW_MAX 32767 | |
83 | #define ORC_SW_MIN (-1-ORC_SW_MAX) | |
84 | #define ORC_UW_MAX 65535 | |
85 | #define ORC_UW_MIN 0 | |
86 | #define ORC_SL_MAX 2147483647 | |
87 | #define ORC_SL_MIN (-1-ORC_SL_MAX) | |
88 | #define ORC_UL_MAX 4294967295U | |
89 | #define ORC_UL_MIN 0 | |
90 | #define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX) | |
91 | #define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX) | |
92 | #define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX) | |
93 | #define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX) | |
94 | #define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX) | |
95 | #define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX) | |
96 | #define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8)) | |
97 | #define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24)) | |
98 | #define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56)) | |
99 | #define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset))) | |
100 | #define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff)) | |
101 | #define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0)) | |
102 | #define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff))) | |
103 | #define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0)) | |
104 | #ifndef ORC_RESTRICT | |
105 | #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L | |
106 | #define ORC_RESTRICT restrict | |
107 | #elif defined(__GNUC__) && __GNUC__ >= 4 | |
108 | #define ORC_RESTRICT __restrict__ | |
109 | #else | |
110 | #define ORC_RESTRICT | |
111 | #endif | |
112 | #endif | |
113 | /* end Orc C target preamble */ | |
114 | ||
115 | ||
116 | ||
117 | /* image_difference_optimized */ | |
118 | #ifdef DISABLE_ORC | |
119 | void | |
120 | image_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m){ | |
121 | int i; | |
122 | int j; | |
123 | const orc_int8 * ORC_RESTRICT ptr4; | |
124 | const orc_int8 * ORC_RESTRICT ptr5; | |
125 | orc_union32 var12 = { 0 }; | |
126 | orc_int8 var32; | |
127 | orc_int8 var33; | |
128 | ||
129 | for (j = 0; j < m; j++) { | |
130 | ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j); | |
131 | ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j); | |
132 | ||
133 | ||
134 | for (i = 0; i < n; i++) { | |
135 | /* 0: loadb */ | |
136 | var32 = ptr4[i]; | |
137 | /* 1: loadb */ | |
138 | var33 = ptr5[i]; | |
139 | /* 2: accsadubl */ | |
140 | var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33); | |
141 | } | |
142 | } | |
143 | *a1 = var12.i; | |
144 | ||
145 | } | |
146 | ||
147 | #else | |
148 | static void | |
149 | _backup_image_difference_optimized (OrcExecutor * ORC_RESTRICT ex) | |
150 | { | |
151 | int i; | |
152 | int j; | |
153 | int n = ex->n; | |
154 | int m = ex->params[ORC_VAR_A1]; | |
155 | const orc_int8 * ORC_RESTRICT ptr4; | |
156 | const orc_int8 * ORC_RESTRICT ptr5; | |
157 | orc_union32 var12 = { 0 }; | |
158 | orc_int8 var32; | |
159 | orc_int8 var33; | |
160 | ||
161 | for (j = 0; j < m; j++) { | |
162 | ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j); | |
163 | ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j); | |
164 | ||
165 | ||
166 | for (i = 0; i < n; i++) { | |
167 | /* 0: loadb */ | |
168 | var32 = ptr4[i]; | |
169 | /* 1: loadb */ | |
170 | var33 = ptr5[i]; | |
171 | /* 2: accsadubl */ | |
172 | var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33); | |
173 | } | |
174 | } | |
175 | ex->accumulators[0] = var12.i; | |
176 | ||
177 | } | |
178 | ||
179 | void | |
180 | image_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m) | |
181 | { | |
182 | OrcExecutor _ex, *ex = &_ex; | |
183 | static int p_inited = 0; | |
184 | static OrcCode *c = 0; | |
185 | void (*func) (OrcExecutor *); | |
186 | ||
187 | if (!p_inited) { | |
188 | orc_once_mutex_lock (); | |
189 | if (!p_inited) { | |
190 | OrcProgram *p; | |
191 | ||
192 | p = orc_program_new (); | |
193 | orc_program_set_2d (p); | |
194 | orc_program_set_name (p, "image_difference_optimized"); | |
195 | orc_program_set_backup_function (p, _backup_image_difference_optimized); | |
196 | orc_program_add_source (p, 1, "s1"); | |
197 | orc_program_add_source (p, 1, "s2"); | |
198 | orc_program_add_accumulator (p, 4, "a1"); | |
199 | ||
200 | orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1); | |
201 | ||
202 | orc_program_compile (p); | |
203 | c = orc_program_take_code (p); | |
204 | orc_program_free (p); | |
205 | } | |
206 | p_inited = TRUE; | |
207 | orc_once_mutex_unlock (); | |
208 | } | |
209 | ex->arrays[ORC_VAR_A2] = c; | |
210 | ex->program = 0; | |
211 | ||
212 | ex->n = n; | |
213 | ORC_EXECUTOR_M(ex) = m; | |
214 | ex->arrays[ORC_VAR_S1] = (void *)s1; | |
215 | ex->params[ORC_VAR_S1] = s1_stride; | |
216 | ex->arrays[ORC_VAR_S2] = (void *)s2; | |
217 | ex->params[ORC_VAR_S2] = s2_stride; | |
218 | ||
219 | func = c->exec; | |
220 | func (ex); | |
221 | *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1); | |
222 | } | |
223 | #endif | |
224 | ||
225 | ||
226 | /* image_line_difference_optimized */ | |
227 | #ifdef DISABLE_ORC | |
228 | void | |
229 | image_line_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int n){ | |
230 | int i; | |
231 | const orc_int8 * ORC_RESTRICT ptr4; | |
232 | const orc_int8 * ORC_RESTRICT ptr5; | |
233 | orc_union32 var12 = { 0 }; | |
234 | orc_int8 var32; | |
235 | orc_int8 var33; | |
236 | ||
237 | ptr4 = (orc_int8 *)s1; | |
238 | ptr5 = (orc_int8 *)s2; | |
239 | ||
240 | ||
241 | for (i = 0; i < n; i++) { | |
242 | /* 0: loadb */ | |
243 | var32 = ptr4[i]; | |
244 | /* 1: loadb */ | |
245 | var33 = ptr5[i]; | |
246 | /* 2: accsadubl */ | |
247 | var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33); | |
248 | } | |
249 | *a1 = var12.i; | |
250 | ||
251 | } | |
252 | ||
253 | #else | |
254 | static void | |
255 | _backup_image_line_difference_optimized (OrcExecutor * ORC_RESTRICT ex) | |
256 | { | |
257 | int i; | |
258 | int n = ex->n; | |
259 | const orc_int8 * ORC_RESTRICT ptr4; | |
260 | const orc_int8 * ORC_RESTRICT ptr5; | |
261 | orc_union32 var12 = { 0 }; | |
262 | orc_int8 var32; | |
263 | orc_int8 var33; | |
264 | ||
265 | ptr4 = (orc_int8 *)ex->arrays[4]; | |
266 | ptr5 = (orc_int8 *)ex->arrays[5]; | |
267 | ||
268 | ||
269 | for (i = 0; i < n; i++) { | |
270 | /* 0: loadb */ | |
271 | var32 = ptr4[i]; | |
272 | /* 1: loadb */ | |
273 | var33 = ptr5[i]; | |
274 | /* 2: accsadubl */ | |
275 | var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33); | |
276 | } | |
277 | ex->accumulators[0] = var12.i; | |
278 | ||
279 | } | |
280 | ||
281 | void | |
282 | image_line_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int n) | |
283 | { | |
284 | OrcExecutor _ex, *ex = &_ex; | |
285 | static int p_inited = 0; | |
286 | static OrcCode *c = 0; | |
287 | void (*func) (OrcExecutor *); | |
288 | ||
289 | if (!p_inited) { | |
290 | orc_once_mutex_lock (); | |
291 | if (!p_inited) { | |
292 | OrcProgram *p; | |
293 | ||
294 | p = orc_program_new (); | |
295 | orc_program_set_name (p, "image_line_difference_optimized"); | |
296 | orc_program_set_backup_function (p, _backup_image_line_difference_optimized); | |
297 | orc_program_add_source (p, 1, "s1"); | |
298 | orc_program_add_source (p, 1, "s2"); | |
299 | orc_program_add_accumulator (p, 4, "a1"); | |
300 | ||
301 | orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1); | |
302 | ||
303 | orc_program_compile (p); | |
304 | c = orc_program_take_code (p); | |
305 | orc_program_free (p); | |
306 | } | |
307 | p_inited = TRUE; | |
308 | orc_once_mutex_unlock (); | |
309 | } | |
310 | ex->arrays[ORC_VAR_A2] = c; | |
311 | ex->program = 0; | |
312 | ||
313 | ex->n = n; | |
314 | ex->arrays[ORC_VAR_S1] = (void *)s1; | |
315 | ex->arrays[ORC_VAR_S2] = (void *)s2; | |
316 | ||
317 | func = c->exec; | |
318 | func (ex); | |
319 | *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1); | |
320 | } | |
321 | #endif | |
322 | ||
323 | ||
324 | /* image_sum_optimized */ | |
325 | #ifdef DISABLE_ORC | |
326 | void | |
327 | image_sum_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int n, int m){ | |
328 | int i; | |
329 | int j; | |
330 | const orc_int8 * ORC_RESTRICT ptr4; | |
331 | orc_union32 var12 = { 0 }; | |
332 | orc_int8 var34; | |
333 | orc_union16 var35; | |
334 | orc_union32 var36; | |
335 | ||
336 | for (j = 0; j < m; j++) { | |
337 | ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j); | |
338 | ||
339 | ||
340 | for (i = 0; i < n; i++) { | |
341 | /* 0: loadb */ | |
342 | var34 = ptr4[i]; | |
343 | /* 1: convubw */ | |
344 | var35.i = (orc_uint8)var34; | |
345 | /* 2: convuwl */ | |
346 | var36.i = (orc_uint16)var35.i; | |
347 | /* 3: accl */ | |
348 | var12.i = var12.i + var36.i; | |
349 | } | |
350 | } | |
351 | *a1 = var12.i; | |
352 | ||
353 | } | |
354 | ||
355 | #else | |
356 | static void | |
357 | _backup_image_sum_optimized (OrcExecutor * ORC_RESTRICT ex) | |
358 | { | |
359 | int i; | |
360 | int j; | |
361 | int n = ex->n; | |
362 | int m = ex->params[ORC_VAR_A1]; | |
363 | const orc_int8 * ORC_RESTRICT ptr4; | |
364 | orc_union32 var12 = { 0 }; | |
365 | orc_int8 var34; | |
366 | orc_union16 var35; | |
367 | orc_union32 var36; | |
368 | ||
369 | for (j = 0; j < m; j++) { | |
370 | ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j); | |
371 | ||
372 | ||
373 | for (i = 0; i < n; i++) { | |
374 | /* 0: loadb */ | |
375 | var34 = ptr4[i]; | |
376 | /* 1: convubw */ | |
377 | var35.i = (orc_uint8)var34; | |
378 | /* 2: convuwl */ | |
379 | var36.i = (orc_uint16)var35.i; | |
380 | /* 3: accl */ | |
381 | var12.i = var12.i + var36.i; | |
382 | } | |
383 | } | |
384 | ex->accumulators[0] = var12.i; | |
385 | ||
386 | } | |
387 | ||
388 | void | |
389 | image_sum_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int n, int m) | |
390 | { | |
391 | OrcExecutor _ex, *ex = &_ex; | |
392 | static int p_inited = 0; | |
393 | static OrcCode *c = 0; | |
394 | void (*func) (OrcExecutor *); | |
395 | ||
396 | if (!p_inited) { | |
397 | orc_once_mutex_lock (); | |
398 | if (!p_inited) { | |
399 | OrcProgram *p; | |
400 | ||
401 | p = orc_program_new (); | |
402 | orc_program_set_2d (p); | |
403 | orc_program_set_name (p, "image_sum_optimized"); | |
404 | orc_program_set_backup_function (p, _backup_image_sum_optimized); | |
405 | orc_program_add_source (p, 1, "s1"); | |
406 | orc_program_add_accumulator (p, 4, "a1"); | |
407 | orc_program_add_temporary (p, 2, "t1"); | |
408 | orc_program_add_temporary (p, 4, "t2"); | |
409 | ||
410 | orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1); | |
411 | orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); | |
412 | orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1); | |
413 | ||
414 | orc_program_compile (p); | |
415 | c = orc_program_take_code (p); | |
416 | orc_program_free (p); | |
417 | } | |
418 | p_inited = TRUE; | |
419 | orc_once_mutex_unlock (); | |
420 | } | |
421 | ex->arrays[ORC_VAR_A2] = c; | |
422 | ex->program = 0; | |
423 | ||
424 | ex->n = n; | |
425 | ORC_EXECUTOR_M(ex) = m; | |
426 | ex->arrays[ORC_VAR_S1] = (void *)s1; | |
427 | ex->params[ORC_VAR_S1] = s1_stride; | |
428 | ||
429 | func = c->exec; | |
430 | func (ex); | |
431 | *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1); | |
432 | } | |
433 | #endif | |
434 | ||
435 | ||
436 | /* image_variance_optimized */ | |
437 | #ifdef DISABLE_ORC | |
438 | void | |
439 | image_variance_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int p2, int n, int m){ | |
440 | int i; | |
441 | int j; | |
442 | const orc_int8 * ORC_RESTRICT ptr4; | |
443 | orc_union32 var12 = { 0 }; | |
444 | orc_int8 var32; | |
445 | orc_int8 var33; | |
446 | ||
447 | for (j = 0; j < m; j++) { | |
448 | ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j); | |
449 | ||
450 | /* 1: loadpb */ | |
451 | var33 = p2; | |
452 | ||
453 | for (i = 0; i < n; i++) { | |
454 | /* 0: loadb */ | |
455 | var32 = ptr4[i]; | |
456 | /* 2: accsadubl */ | |
457 | var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33); | |
458 | } | |
459 | } | |
460 | *a1 = var12.i; | |
461 | ||
462 | } | |
463 | ||
464 | #else | |
465 | static void | |
466 | _backup_image_variance_optimized (OrcExecutor * ORC_RESTRICT ex) | |
467 | { | |
468 | int i; | |
469 | int j; | |
470 | int n = ex->n; | |
471 | int m = ex->params[ORC_VAR_A1]; | |
472 | const orc_int8 * ORC_RESTRICT ptr4; | |
473 | orc_union32 var12 = { 0 }; | |
474 | orc_int8 var32; | |
475 | orc_int8 var33; | |
476 | ||
477 | for (j = 0; j < m; j++) { | |
478 | ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j); | |
479 | ||
480 | /* 1: loadpb */ | |
481 | var33 = ex->params[25]; | |
482 | ||
483 | for (i = 0; i < n; i++) { | |
484 | /* 0: loadb */ | |
485 | var32 = ptr4[i]; | |
486 | /* 2: accsadubl */ | |
487 | var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33); | |
488 | } | |
489 | } | |
490 | ex->accumulators[0] = var12.i; | |
491 | ||
492 | } | |
493 | ||
494 | void | |
495 | image_variance_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int p2, int n, int m) | |
496 | { | |
497 | OrcExecutor _ex, *ex = &_ex; | |
498 | static int p_inited = 0; | |
499 | static OrcCode *c = 0; | |
500 | void (*func) (OrcExecutor *); | |
501 | ||
502 | if (!p_inited) { | |
503 | orc_once_mutex_lock (); | |
504 | if (!p_inited) { | |
505 | OrcProgram *p; | |
506 | ||
507 | p = orc_program_new (); | |
508 | orc_program_set_2d (p); | |
509 | orc_program_set_name (p, "image_variance_optimized"); | |
510 | orc_program_set_backup_function (p, _backup_image_variance_optimized); | |
511 | orc_program_add_source (p, 1, "s1"); | |
512 | orc_program_add_accumulator (p, 4, "a1"); | |
513 | orc_program_add_parameter (p, 1, "p2"); | |
514 | ||
515 | orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_P2, ORC_VAR_D1); | |
516 | ||
517 | orc_program_compile (p); | |
518 | c = orc_program_take_code (p); | |
519 | orc_program_free (p); | |
520 | } | |
521 | p_inited = TRUE; | |
522 | orc_once_mutex_unlock (); | |
523 | } | |
524 | ex->arrays[ORC_VAR_A2] = c; | |
525 | ex->program = 0; | |
526 | ||
527 | ex->n = n; | |
528 | ORC_EXECUTOR_M(ex) = m; | |
529 | ex->arrays[ORC_VAR_S1] = (void *)s1; | |
530 | ex->params[ORC_VAR_S1] = s1_stride; | |
531 | ex->params[ORC_VAR_P2] = p2; | |
532 | ||
533 | func = c->exec; | |
534 | func (ex); | |
535 | *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1); | |
536 | } | |
537 | #endif | |
538 | ||
539 |