Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com. | |
22 | *****************************************************************************/ | |
23 | ||
24 | #ifndef _TESTHARNESS_H_ | |
25 | #define _TESTHARNESS_H_ 1 | |
26 | ||
27 | #include "common.h" | |
28 | #include "primitives.h" | |
29 | ||
30 | #if _MSC_VER | |
31 | #pragma warning(disable: 4324) // structure was padded due to __declspec(align()) | |
32 | #endif | |
33 | ||
34 | #if HIGH_BIT_DEPTH | |
35 | #define BIT_DEPTH 10 | |
36 | #else | |
37 | #define BIT_DEPTH 8 | |
38 | #endif | |
39 | #define PIXEL_MAX ((1 << BIT_DEPTH) - 1) | |
40 | #define PIXEL_MIN 0 | |
41 | #define SHORT_MAX 32767 | |
42 | #define SHORT_MIN -32767 | |
43 | #define UNSIGNED_SHORT_MAX 65535 | |
44 | ||
45 | using namespace x265; | |
46 | ||
47 | extern const char* lumaPartStr[NUM_LUMA_PARTITIONS]; | |
48 | extern const char* const* chromaPartStr[X265_CSP_COUNT]; | |
49 | ||
50 | class TestHarness | |
51 | { | |
52 | public: | |
53 | ||
54 | TestHarness() {} | |
55 | ||
56 | virtual ~TestHarness() {} | |
57 | ||
58 | virtual bool testCorrectness(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0; | |
59 | ||
60 | virtual void measureSpeed(const EncoderPrimitives& ref, const EncoderPrimitives& opt) = 0; | |
61 | ||
62 | virtual const char *getName() const = 0; | |
63 | ||
64 | protected: | |
65 | ||
66 | /* Temporary variables for stack checks */ | |
67 | int m_ok; | |
68 | ||
69 | uint64_t m_rand; | |
70 | }; | |
71 | ||
72 | #ifdef _MSC_VER | |
73 | #include <intrin.h> | |
74 | #elif HAVE_RDTSC | |
75 | #include <intrin.h> | |
76 | #elif defined(__GNUC__) | |
77 | /* fallback for older GCC/MinGW */ | |
78 | static inline uint32_t __rdtsc(void) | |
79 | { | |
80 | uint32_t a = 0; | |
81 | ||
82 | asm volatile("rdtsc" : "=a" (a) ::"edx"); | |
83 | return a; | |
84 | } | |
85 | ||
86 | #endif // ifdef _MSC_VER | |
87 | ||
88 | #define BENCH_RUNS 1000 | |
89 | ||
90 | // Adapted from checkasm.c, runs each optimized primitive four times, measures rdtsc | |
91 | // and discards invalid times. Repeats 1000 times to get a good average. Then measures | |
92 | // the C reference with fewer runs and reports X factor and average cycles. | |
93 | #define REPORT_SPEEDUP(RUNOPT, RUNREF, ...) \ | |
94 | { \ | |
95 | uint32_t cycles = 0; int runs = 0; \ | |
96 | RUNOPT(__VA_ARGS__); \ | |
97 | for (int ti = 0; ti < BENCH_RUNS; ti++) { \ | |
98 | uint32_t t0 = (uint32_t)__rdtsc(); \ | |
99 | RUNOPT(__VA_ARGS__); \ | |
100 | RUNOPT(__VA_ARGS__); \ | |
101 | RUNOPT(__VA_ARGS__); \ | |
102 | RUNOPT(__VA_ARGS__); \ | |
103 | uint32_t t1 = (uint32_t)__rdtsc() - t0; \ | |
104 | if (t1 * runs <= cycles * 4 && ti > 0) { cycles += t1; runs++; } \ | |
105 | } \ | |
106 | uint32_t refcycles = 0; int refruns = 0; \ | |
107 | RUNREF(__VA_ARGS__); \ | |
108 | for (int ti = 0; ti < BENCH_RUNS / 4; ti++) { \ | |
109 | uint32_t t0 = (uint32_t)__rdtsc(); \ | |
110 | RUNREF(__VA_ARGS__); \ | |
111 | RUNREF(__VA_ARGS__); \ | |
112 | RUNREF(__VA_ARGS__); \ | |
113 | RUNREF(__VA_ARGS__); \ | |
114 | uint32_t t1 = (uint32_t)__rdtsc() - t0; \ | |
115 | if (t1 * refruns <= refcycles * 4 && ti > 0) { refcycles += t1; refruns++; } \ | |
116 | } \ | |
117 | x265_emms(); \ | |
118 | float optperf = (10.0f * cycles / runs) / 4; \ | |
119 | float refperf = (10.0f * refcycles / refruns) / 4; \ | |
120 | printf("\t%3.2fx ", refperf / optperf); \ | |
121 | printf("\t %-8.2lf \t %-8.2lf\n", optperf, refperf); \ | |
122 | } | |
123 | ||
124 | extern "C" { | |
125 | #if X265_ARCH_X86 | |
126 | int x265_stack_pagealign(int (*func)(), int align); | |
127 | ||
128 | /* detect when callee-saved regs aren't saved | |
129 | * needs an explicit asm check because it only sometimes crashes in normal use. */ | |
130 | intptr_t x265_checkasm_call(intptr_t (*func)(), int *ok, ...); | |
131 | float x265_checkasm_call_float(float (*func)(), int *ok, ...); | |
132 | #else | |
133 | #define x265_stack_pagealign(func, align) func() | |
134 | #endif | |
135 | ||
136 | #if X86_64 | |
137 | ||
138 | /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. | |
139 | * This is done by clobbering the stack with junk around the stack pointer and calling the | |
140 | * assembly function through x265_checkasm_call with added dummy arguments which forces all | |
141 | * real arguments to be passed on the stack and not in registers. For 32-bit argument the | |
142 | * upper half of the 64-bit register location on the stack will now contain junk. Note that | |
143 | * this is dependent on compiler behavior and that interrupts etc. at the wrong time may | |
144 | * overwrite the junk written to the stack so there's no guarantee that it will always | |
145 | * detect all functions that assumes zero-extension. | |
146 | */ | |
147 | void x265_checkasm_stack_clobber(uint64_t clobber, ...); | |
148 | #define checked(func, ...) ( \ | |
149 | m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \ | |
150 | x265_checkasm_stack_clobber(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \ | |
151 | m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \ | |
152 | m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \ | |
153 | x265_checkasm_call((intptr_t(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__)) | |
154 | ||
155 | #define checked_float(func, ...) ( \ | |
156 | m_ok = 1, m_rand = (rand() & 0xffff) * 0x0001000100010001ULL, \ | |
157 | x265_checkasm_stack_clobber(m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \ | |
158 | m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, m_rand, \ | |
159 | m_rand, m_rand, m_rand, m_rand, m_rand), /* max_args+6 */ \ | |
160 | x265_checkasm_call_float((float(*)())func, &m_ok, 0, 0, 0, 0, __VA_ARGS__)) | |
161 | #define reportfail() if (!m_ok) { fprintf(stderr, "stack clobber check failed at %s:%d", __FILE__, __LINE__); abort(); } | |
162 | #elif ARCH_X86 | |
163 | #define checked(func, ...) x265_checkasm_call((intptr_t(*)())func, &m_ok, __VA_ARGS__); | |
164 | #define checked_float(func, ...) x265_checkasm_call_float((float(*)())func, &m_ok, __VA_ARGS__); | |
165 | ||
166 | #else // if X86_64 | |
167 | #define checked(func, ...) func(__VA_ARGS__) | |
168 | #define checked_float(func, ...) func(__VA_ARGS__) | |
169 | #define reportfail() | |
170 | #endif // if X86_64 | |
171 | } | |
172 | ||
173 | #endif // ifndef _TESTHARNESS_H_ |