Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (C) 2009 David Conrad | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | #include "avcodec.h" | |
22 | #include "diracdsp.h" | |
23 | #include "libavcodec/x86/diracdsp_mmx.h" | |
24 | ||
25 | #define FILTER(src, stride) \ | |
26 | ((21*((src)[ 0*stride] + (src)[1*stride]) \ | |
27 | -7*((src)[-1*stride] + (src)[2*stride]) \ | |
28 | +3*((src)[-2*stride] + (src)[3*stride]) \ | |
29 | -1*((src)[-3*stride] + (src)[4*stride]) + 16) >> 5) | |
30 | ||
31 | static void dirac_hpel_filter(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, const uint8_t *src, | |
32 | int stride, int width, int height) | |
33 | { | |
34 | int x, y; | |
35 | ||
36 | for (y = 0; y < height; y++) { | |
37 | for (x = -3; x < width+5; x++) | |
38 | dstv[x] = av_clip_uint8(FILTER(src+x, stride)); | |
39 | ||
40 | for (x = 0; x < width; x++) | |
41 | dstc[x] = av_clip_uint8(FILTER(dstv+x, 1)); | |
42 | ||
43 | for (x = 0; x < width; x++) | |
44 | dsth[x] = av_clip_uint8(FILTER(src+x, 1)); | |
45 | ||
46 | src += stride; | |
47 | dsth += stride; | |
48 | dstv += stride; | |
49 | dstc += stride; | |
50 | } | |
51 | } | |
52 | ||
53 | #define PIXOP_BILINEAR(PFX, OP, WIDTH) \ | |
54 | static void ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c(uint8_t *dst, const uint8_t *src[5], int stride, int h) \ | |
55 | { \ | |
56 | int x; \ | |
57 | const uint8_t *s0 = src[0]; \ | |
58 | const uint8_t *s1 = src[1]; \ | |
59 | const uint8_t *s2 = src[2]; \ | |
60 | const uint8_t *s3 = src[3]; \ | |
61 | const uint8_t *w = src[4]; \ | |
62 | \ | |
63 | while (h--) { \ | |
64 | for (x = 0; x < WIDTH; x++) { \ | |
65 | OP(dst[x], (s0[x]*w[0] + s1[x]*w[1] + s2[x]*w[2] + s3[x]*w[3] + 8) >> 4); \ | |
66 | } \ | |
67 | \ | |
68 | dst += stride; \ | |
69 | s0 += stride; \ | |
70 | s1 += stride; \ | |
71 | s2 += stride; \ | |
72 | s3 += stride; \ | |
73 | } \ | |
74 | } | |
75 | ||
76 | #define OP_PUT(dst, val) (dst) = (val) | |
77 | #define OP_AVG(dst, val) (dst) = (((dst) + (val) + 1)>>1) | |
78 | ||
79 | PIXOP_BILINEAR(put, OP_PUT, 8) | |
80 | PIXOP_BILINEAR(put, OP_PUT, 16) | |
81 | PIXOP_BILINEAR(put, OP_PUT, 32) | |
82 | PIXOP_BILINEAR(avg, OP_AVG, 8) | |
83 | PIXOP_BILINEAR(avg, OP_AVG, 16) | |
84 | PIXOP_BILINEAR(avg, OP_AVG, 32) | |
85 | ||
86 | #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + (1<<(log2_denom-1))) >> log2_denom) | |
87 | #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + (1<<(log2_denom-1))) >> log2_denom) | |
88 | ||
89 | #define DIRAC_WEIGHT(W) \ | |
90 | static void weight_dirac_pixels ## W ## _c(uint8_t *block, int stride, int log2_denom, \ | |
91 | int weight, int h) { \ | |
92 | int x; \ | |
93 | while (h--) { \ | |
94 | for (x = 0; x < W; x++) { \ | |
95 | op_scale1(x); \ | |
96 | op_scale1(x+1); \ | |
97 | } \ | |
98 | block += stride; \ | |
99 | } \ | |
100 | } \ | |
101 | static void biweight_dirac_pixels ## W ## _c(uint8_t *dst, const uint8_t *src, int stride, int log2_denom, \ | |
102 | int weightd, int weights, int h) { \ | |
103 | int x; \ | |
104 | while (h--) { \ | |
105 | for (x = 0; x < W; x++) { \ | |
106 | op_scale2(x); \ | |
107 | op_scale2(x+1); \ | |
108 | } \ | |
109 | dst += stride; \ | |
110 | src += stride; \ | |
111 | } \ | |
112 | } | |
113 | ||
114 | DIRAC_WEIGHT(8) | |
115 | DIRAC_WEIGHT(16) | |
116 | DIRAC_WEIGHT(32) | |
117 | ||
118 | #define ADD_OBMC(xblen) \ | |
119 | static void add_obmc ## xblen ## _c(uint16_t *dst, const uint8_t *src, int stride, \ | |
120 | const uint8_t *obmc_weight, int yblen) \ | |
121 | { \ | |
122 | int x; \ | |
123 | while (yblen--) { \ | |
124 | for (x = 0; x < xblen; x += 2) { \ | |
125 | dst[x ] += src[x ] * obmc_weight[x ]; \ | |
126 | dst[x+1] += src[x+1] * obmc_weight[x+1]; \ | |
127 | } \ | |
128 | dst += stride; \ | |
129 | src += stride; \ | |
130 | obmc_weight += 32; \ | |
131 | } \ | |
132 | } | |
133 | ||
134 | ADD_OBMC(8) | |
135 | ADD_OBMC(16) | |
136 | ADD_OBMC(32) | |
137 | ||
138 | static void put_signed_rect_clamped_c(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height) | |
139 | { | |
140 | int x, y; | |
141 | for (y = 0; y < height; y++) { | |
142 | for (x = 0; x < width; x+=4) { | |
143 | dst[x ] = av_clip_uint8(src[x ] + 128); | |
144 | dst[x+1] = av_clip_uint8(src[x+1] + 128); | |
145 | dst[x+2] = av_clip_uint8(src[x+2] + 128); | |
146 | dst[x+3] = av_clip_uint8(src[x+3] + 128); | |
147 | } | |
148 | dst += dst_stride; | |
149 | src += src_stride; | |
150 | } | |
151 | } | |
152 | ||
153 | static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride, | |
154 | const int16_t *idwt, int idwt_stride, | |
155 | int width, int height) | |
156 | { | |
157 | int x, y; | |
158 | ||
159 | for (y = 0; y < height; y++) { | |
160 | for (x = 0; x < width; x+=2) { | |
161 | dst[x ] = av_clip_uint8(((src[x ]+32)>>6) + idwt[x ]); | |
162 | dst[x+1] = av_clip_uint8(((src[x+1]+32)>>6) + idwt[x+1]); | |
163 | } | |
164 | dst += stride; | |
165 | src += stride; | |
166 | idwt += idwt_stride; | |
167 | } | |
168 | } | |
169 | ||
170 | #define PIXFUNC(PFX, WIDTH) \ | |
171 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \ | |
172 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \ | |
173 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][2] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l4_c; \ | |
174 | c->PFX ## _dirac_pixels_tab[WIDTH>>4][3] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c | |
175 | ||
176 | void ff_diracdsp_init(DiracDSPContext *c) | |
177 | { | |
178 | c->dirac_hpel_filter = dirac_hpel_filter; | |
179 | c->add_rect_clamped = add_rect_clamped_c; | |
180 | c->put_signed_rect_clamped = put_signed_rect_clamped_c; | |
181 | ||
182 | c->add_dirac_obmc[0] = add_obmc8_c; | |
183 | c->add_dirac_obmc[1] = add_obmc16_c; | |
184 | c->add_dirac_obmc[2] = add_obmc32_c; | |
185 | ||
186 | c->weight_dirac_pixels_tab[0] = weight_dirac_pixels8_c; | |
187 | c->weight_dirac_pixels_tab[1] = weight_dirac_pixels16_c; | |
188 | c->weight_dirac_pixels_tab[2] = weight_dirac_pixels32_c; | |
189 | c->biweight_dirac_pixels_tab[0] = biweight_dirac_pixels8_c; | |
190 | c->biweight_dirac_pixels_tab[1] = biweight_dirac_pixels16_c; | |
191 | c->biweight_dirac_pixels_tab[2] = biweight_dirac_pixels32_c; | |
192 | ||
193 | PIXFUNC(put, 8); | |
194 | PIXFUNC(put, 16); | |
195 | PIXFUNC(put, 32); | |
196 | PIXFUNC(avg, 8); | |
197 | PIXFUNC(avg, 16); | |
198 | PIXFUNC(avg, 32); | |
199 | ||
200 | if (HAVE_MMX && HAVE_YASM) ff_diracdsp_init_mmx(c); | |
201 | } |