Commit | Line | Data |
---|---|---|
72b9787e JB |
1 | /***************************************************************************** |
2 | * Copyright (C) 2013 x265 project | |
3 | * | |
4 | * Authors: Steve Borho <steve@borho.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. | |
19 | * | |
20 | * This program is also available under a commercial proprietary license. | |
21 | * For more information, contact us at license @ x265.com | |
22 | *****************************************************************************/ | |
23 | ||
24 | #include "threadpool.h" | |
25 | #include "threading.h" | |
26 | #include "wavefront.h" | |
27 | #include "common.h" | |
28 | ||
29 | namespace x265 { | |
30 | // x265 private namespace | |
31 | ||
32 | bool WaveFront::init(int numRows) | |
33 | { | |
34 | m_numRows = numRows; | |
35 | ||
36 | m_numWords = (numRows + 63) >> 6; | |
37 | m_internalDependencyBitmap = X265_MALLOC(uint64_t, m_numWords); | |
38 | if (m_internalDependencyBitmap) | |
39 | memset((void*)m_internalDependencyBitmap, 0, sizeof(uint64_t) * m_numWords); | |
40 | ||
41 | m_externalDependencyBitmap = X265_MALLOC(uint64_t, m_numWords); | |
42 | if (m_externalDependencyBitmap) | |
43 | memset((void*)m_externalDependencyBitmap, 0, sizeof(uint64_t) * m_numWords); | |
44 | ||
45 | return m_internalDependencyBitmap && m_externalDependencyBitmap; | |
46 | } | |
47 | ||
48 | WaveFront::~WaveFront() | |
49 | { | |
50 | x265_free((void*)m_internalDependencyBitmap); | |
51 | x265_free((void*)m_externalDependencyBitmap); | |
52 | } | |
53 | ||
54 | void WaveFront::clearEnabledRowMask() | |
55 | { | |
56 | memset((void*)m_externalDependencyBitmap, 0, sizeof(uint64_t) * m_numWords); | |
57 | } | |
58 | ||
59 | void WaveFront::enqueueRow(int row) | |
60 | { | |
61 | // thread safe | |
62 | uint64_t bit = 1LL << (row & 63); | |
63 | ||
64 | X265_CHECK(row < m_numRows, "invalid row\n"); | |
65 | ATOMIC_OR(&m_internalDependencyBitmap[row >> 6], bit); | |
66 | if (m_pool) m_pool->pokeIdleThread(); | |
67 | } | |
68 | ||
69 | void WaveFront::enableRow(int row) | |
70 | { | |
71 | // thread safe | |
72 | uint64_t bit = 1LL << (row & 63); | |
73 | ||
74 | X265_CHECK(row < m_numRows, "invalid row\n"); | |
75 | ATOMIC_OR(&m_externalDependencyBitmap[row >> 6], bit); | |
76 | } | |
77 | ||
78 | void WaveFront::enableAllRows() | |
79 | { | |
80 | memset((void*)m_externalDependencyBitmap, ~0, sizeof(uint64_t) * m_numWords); | |
81 | } | |
82 | ||
83 | bool WaveFront::checkHigherPriorityRow(int curRow) | |
84 | { | |
85 | int fullwords = curRow >> 6; | |
86 | uint64_t mask = (1LL << (curRow & 63)) - 1; | |
87 | ||
88 | // Check full bitmap words before curRow | |
89 | for (int i = 0; i < fullwords; i++) | |
90 | { | |
91 | if (m_internalDependencyBitmap[i] & m_externalDependencyBitmap[i]) | |
92 | return true; | |
93 | } | |
94 | ||
95 | // check the partially masked bitmap word of curRow | |
96 | if (m_internalDependencyBitmap[fullwords] & m_externalDependencyBitmap[fullwords] & mask) | |
97 | return true; | |
98 | return false; | |
99 | } | |
100 | ||
101 | bool WaveFront::dequeueRow(int row) | |
102 | { | |
103 | uint64_t oldval, newval; | |
104 | ||
105 | oldval = m_internalDependencyBitmap[row >> 6]; | |
106 | newval = oldval & ~(1LL << (row & 63)); | |
107 | return ATOMIC_CAS(&m_internalDependencyBitmap[row >> 6], oldval, newval) == oldval; | |
108 | } | |
109 | ||
110 | bool WaveFront::findJob(int threadId) | |
111 | { | |
112 | unsigned long id; | |
113 | ||
114 | // thread safe | |
115 | for (int w = 0; w < m_numWords; w++) | |
116 | { | |
117 | uint64_t oldval = m_internalDependencyBitmap[w]; | |
118 | while (oldval & m_externalDependencyBitmap[w]) | |
119 | { | |
120 | uint64_t mask = oldval & m_externalDependencyBitmap[w]; | |
121 | ||
122 | CTZ64(id, mask); | |
123 | ||
124 | uint64_t newval = oldval & ~(1LL << id); | |
125 | if (ATOMIC_CAS(&m_internalDependencyBitmap[w], oldval, newval) == oldval) | |
126 | { | |
127 | // we cleared the bit, process row | |
128 | processRow(w * 64 + id, threadId); | |
129 | return true; | |
130 | } | |
131 | // some other thread cleared the bit, try another bit | |
132 | oldval = m_internalDependencyBitmap[w]; | |
133 | } | |
134 | } | |
135 | ||
136 | // made it through the bitmap without finding any enqueued rows | |
137 | return false; | |
138 | } | |
139 | } |