Commit | Line | Data |
---|---|---|
2ba45a60 DM |
1 | /* |
2 | * Copyright (c) 2006 Paul Richards <paul.richards@gmail.com> | |
3 | * | |
4 | * This file is part of FFmpeg. | |
5 | * | |
6 | * FFmpeg is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License as published by the Free Software Foundation; either | |
9 | * version 2.1 of the License, or (at your option) any later version. | |
10 | * | |
11 | * FFmpeg is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * Lesser General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU Lesser General Public | |
17 | * License along with FFmpeg; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | */ | |
20 | ||
21 | /** | |
22 | * @file | |
23 | * @brief Theora encoder using libtheora. | |
24 | * @author Paul Richards <paul.richards@gmail.com> | |
25 | * | |
26 | * A lot of this is copy / paste from other output codecs in | |
27 | * libavcodec or pure guesswork (or both). | |
28 | * | |
29 | * I have used t_ prefixes on variables which are libtheora types | |
30 | * and o_ prefixes on variables which are libogg types. | |
31 | */ | |
32 | ||
33 | /* FFmpeg includes */ | |
34 | #include "libavutil/common.h" | |
35 | #include "libavutil/intreadwrite.h" | |
36 | #include "libavutil/pixdesc.h" | |
37 | #include "libavutil/log.h" | |
38 | #include "libavutil/base64.h" | |
39 | #include "avcodec.h" | |
40 | #include "internal.h" | |
41 | ||
42 | /* libtheora includes */ | |
43 | #include <theora/theoraenc.h> | |
44 | ||
45 | typedef struct TheoraContext { | |
46 | th_enc_ctx *t_state; | |
47 | uint8_t *stats; | |
48 | int stats_size; | |
49 | int stats_offset; | |
50 | int uv_hshift; | |
51 | int uv_vshift; | |
52 | int keyframe_mask; | |
53 | } TheoraContext; | |
54 | ||
55 | /** Concatenate an ogg_packet into the extradata. */ | |
56 | static int concatenate_packet(unsigned int* offset, | |
57 | AVCodecContext* avc_context, | |
58 | const ogg_packet* packet) | |
59 | { | |
60 | const char* message = NULL; | |
61 | int newsize = avc_context->extradata_size + 2 + packet->bytes; | |
62 | int err = AVERROR_INVALIDDATA; | |
63 | ||
64 | if (packet->bytes < 0) { | |
65 | message = "ogg_packet has negative size"; | |
66 | } else if (packet->bytes > 0xffff) { | |
67 | message = "ogg_packet is larger than 65535 bytes"; | |
68 | } else if (newsize < avc_context->extradata_size) { | |
69 | message = "extradata_size would overflow"; | |
70 | } else { | |
71 | if ((err = av_reallocp(&avc_context->extradata, newsize)) < 0) { | |
72 | avc_context->extradata_size = 0; | |
73 | message = "av_realloc failed"; | |
74 | } | |
75 | } | |
76 | if (message) { | |
77 | av_log(avc_context, AV_LOG_ERROR, "concatenate_packet failed: %s\n", message); | |
78 | return err; | |
79 | } | |
80 | ||
81 | avc_context->extradata_size = newsize; | |
82 | AV_WB16(avc_context->extradata + (*offset), packet->bytes); | |
83 | *offset += 2; | |
84 | memcpy(avc_context->extradata + (*offset), packet->packet, packet->bytes); | |
85 | (*offset) += packet->bytes; | |
86 | return 0; | |
87 | } | |
88 | ||
89 | static int get_stats(AVCodecContext *avctx, int eos) | |
90 | { | |
91 | #ifdef TH_ENCCTL_2PASS_OUT | |
92 | TheoraContext *h = avctx->priv_data; | |
93 | uint8_t *buf; | |
94 | int bytes; | |
95 | ||
96 | bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_OUT, &buf, sizeof(buf)); | |
97 | if (bytes < 0) { | |
98 | av_log(avctx, AV_LOG_ERROR, "Error getting first pass stats\n"); | |
99 | return AVERROR_EXTERNAL; | |
100 | } | |
101 | if (!eos) { | |
102 | h->stats = av_fast_realloc(h->stats, &h->stats_size, | |
103 | h->stats_offset + bytes); | |
104 | memcpy(h->stats + h->stats_offset, buf, bytes); | |
105 | h->stats_offset += bytes; | |
106 | } else { | |
107 | int b64_size = AV_BASE64_SIZE(h->stats_offset); | |
108 | // libtheora generates a summary header at the end | |
109 | memcpy(h->stats, buf, bytes); | |
110 | avctx->stats_out = av_malloc(b64_size); | |
111 | av_base64_encode(avctx->stats_out, b64_size, h->stats, h->stats_offset); | |
112 | } | |
113 | return 0; | |
114 | #else | |
115 | av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n"); | |
116 | return AVERROR(ENOSUP); | |
117 | #endif | |
118 | } | |
119 | ||
120 | // libtheora won't read the entire buffer we give it at once, so we have to | |
121 | // repeatedly submit it... | |
122 | static int submit_stats(AVCodecContext *avctx) | |
123 | { | |
124 | #ifdef TH_ENCCTL_2PASS_IN | |
125 | TheoraContext *h = avctx->priv_data; | |
126 | int bytes; | |
127 | if (!h->stats) { | |
128 | if (!avctx->stats_in) { | |
129 | av_log(avctx, AV_LOG_ERROR, "No statsfile for second pass\n"); | |
130 | return AVERROR(EINVAL); | |
131 | } | |
132 | h->stats_size = strlen(avctx->stats_in) * 3/4; | |
133 | h->stats = av_malloc(h->stats_size); | |
134 | h->stats_size = av_base64_decode(h->stats, avctx->stats_in, h->stats_size); | |
135 | } | |
136 | while (h->stats_size - h->stats_offset > 0) { | |
137 | bytes = th_encode_ctl(h->t_state, TH_ENCCTL_2PASS_IN, | |
138 | h->stats + h->stats_offset, | |
139 | h->stats_size - h->stats_offset); | |
140 | if (bytes < 0) { | |
141 | av_log(avctx, AV_LOG_ERROR, "Error submitting stats\n"); | |
142 | return AVERROR_EXTERNAL; | |
143 | } | |
144 | if (!bytes) | |
145 | return 0; | |
146 | h->stats_offset += bytes; | |
147 | } | |
148 | return 0; | |
149 | #else | |
150 | av_log(avctx, AV_LOG_ERROR, "libtheora too old to support 2pass\n"); | |
151 | return AVERROR(ENOSUP); | |
152 | #endif | |
153 | } | |
154 | ||
155 | static av_cold int encode_init(AVCodecContext* avc_context) | |
156 | { | |
157 | th_info t_info; | |
158 | th_comment t_comment; | |
159 | ogg_packet o_packet; | |
160 | unsigned int offset; | |
161 | TheoraContext *h = avc_context->priv_data; | |
162 | uint32_t gop_size = avc_context->gop_size; | |
163 | int ret; | |
164 | ||
165 | /* Set up the theora_info struct */ | |
166 | th_info_init(&t_info); | |
167 | t_info.frame_width = FFALIGN(avc_context->width, 16); | |
168 | t_info.frame_height = FFALIGN(avc_context->height, 16); | |
169 | t_info.pic_width = avc_context->width; | |
170 | t_info.pic_height = avc_context->height; | |
171 | t_info.pic_x = 0; | |
172 | t_info.pic_y = 0; | |
173 | /* Swap numerator and denominator as time_base in AVCodecContext gives the | |
174 | * time period between frames, but theora_info needs the framerate. */ | |
175 | t_info.fps_numerator = avc_context->time_base.den; | |
176 | t_info.fps_denominator = avc_context->time_base.num; | |
177 | if (avc_context->sample_aspect_ratio.num) { | |
178 | t_info.aspect_numerator = avc_context->sample_aspect_ratio.num; | |
179 | t_info.aspect_denominator = avc_context->sample_aspect_ratio.den; | |
180 | } else { | |
181 | t_info.aspect_numerator = 1; | |
182 | t_info.aspect_denominator = 1; | |
183 | } | |
184 | ||
185 | if (avc_context->color_primaries == AVCOL_PRI_BT470M) | |
186 | t_info.colorspace = TH_CS_ITU_REC_470M; | |
187 | else if (avc_context->color_primaries == AVCOL_PRI_BT470BG) | |
188 | t_info.colorspace = TH_CS_ITU_REC_470BG; | |
189 | else | |
190 | t_info.colorspace = TH_CS_UNSPECIFIED; | |
191 | ||
192 | if (avc_context->pix_fmt == AV_PIX_FMT_YUV420P) | |
193 | t_info.pixel_fmt = TH_PF_420; | |
194 | else if (avc_context->pix_fmt == AV_PIX_FMT_YUV422P) | |
195 | t_info.pixel_fmt = TH_PF_422; | |
196 | else if (avc_context->pix_fmt == AV_PIX_FMT_YUV444P) | |
197 | t_info.pixel_fmt = TH_PF_444; | |
198 | else { | |
199 | av_log(avc_context, AV_LOG_ERROR, "Unsupported pix_fmt\n"); | |
200 | return AVERROR(EINVAL); | |
201 | } | |
202 | avcodec_get_chroma_sub_sample(avc_context->pix_fmt, &h->uv_hshift, &h->uv_vshift); | |
203 | ||
204 | if (avc_context->flags & CODEC_FLAG_QSCALE) { | |
205 | /* Clip global_quality in QP units to the [0 - 10] range | |
206 | to be consistent with the libvorbis implementation. | |
207 | Theora accepts a quality parameter which is an int value in | |
208 | the [0 - 63] range. | |
209 | */ | |
210 | t_info.quality = av_clipf(avc_context->global_quality / (float)FF_QP2LAMBDA, 0, 10) * 6.3; | |
211 | t_info.target_bitrate = 0; | |
212 | } else { | |
213 | t_info.target_bitrate = avc_context->bit_rate; | |
214 | t_info.quality = 0; | |
215 | } | |
216 | ||
217 | /* Now initialise libtheora */ | |
218 | h->t_state = th_encode_alloc(&t_info); | |
219 | if (!h->t_state) { | |
220 | av_log(avc_context, AV_LOG_ERROR, "theora_encode_init failed\n"); | |
221 | return AVERROR_EXTERNAL; | |
222 | } | |
223 | ||
224 | h->keyframe_mask = (1 << t_info.keyframe_granule_shift) - 1; | |
225 | /* Clear up theora_info struct */ | |
226 | th_info_clear(&t_info); | |
227 | ||
228 | if (th_encode_ctl(h->t_state, TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, | |
229 | &gop_size, sizeof(gop_size))) { | |
230 | av_log(avc_context, AV_LOG_ERROR, "Error setting GOP size\n"); | |
231 | return AVERROR_EXTERNAL; | |
232 | } | |
233 | ||
234 | // need to enable 2 pass (via TH_ENCCTL_2PASS_) before encoding headers | |
235 | if (avc_context->flags & CODEC_FLAG_PASS1) { | |
236 | if ((ret = get_stats(avc_context, 0)) < 0) | |
237 | return ret; | |
238 | } else if (avc_context->flags & CODEC_FLAG_PASS2) { | |
239 | if ((ret = submit_stats(avc_context)) < 0) | |
240 | return ret; | |
241 | } | |
242 | ||
243 | /* | |
244 | Output first header packet consisting of theora | |
245 | header, comment, and tables. | |
246 | ||
247 | Each one is prefixed with a 16bit size, then they | |
248 | are concatenated together into libavcodec's extradata. | |
249 | */ | |
250 | offset = 0; | |
251 | ||
252 | /* Headers */ | |
253 | th_comment_init(&t_comment); | |
254 | ||
255 | while (th_encode_flushheader(h->t_state, &t_comment, &o_packet)) | |
256 | if ((ret = concatenate_packet(&offset, avc_context, &o_packet)) < 0) | |
257 | return ret; | |
258 | ||
259 | th_comment_clear(&t_comment); | |
260 | ||
261 | /* Set up the output AVFrame */ | |
262 | avc_context->coded_frame = av_frame_alloc(); | |
263 | ||
264 | return 0; | |
265 | } | |
266 | ||
267 | static int encode_frame(AVCodecContext* avc_context, AVPacket *pkt, | |
268 | const AVFrame *frame, int *got_packet) | |
269 | { | |
270 | th_ycbcr_buffer t_yuv_buffer; | |
271 | TheoraContext *h = avc_context->priv_data; | |
272 | ogg_packet o_packet; | |
273 | int result, i, ret; | |
274 | ||
275 | // EOS, finish and get 1st pass stats if applicable | |
276 | if (!frame) { | |
277 | th_encode_packetout(h->t_state, 1, &o_packet); | |
278 | if (avc_context->flags & CODEC_FLAG_PASS1) | |
279 | if ((ret = get_stats(avc_context, 1)) < 0) | |
280 | return ret; | |
281 | return 0; | |
282 | } | |
283 | ||
284 | /* Copy planes to the theora yuv_buffer */ | |
285 | for (i = 0; i < 3; i++) { | |
286 | t_yuv_buffer[i].width = FFALIGN(avc_context->width, 16) >> (i && h->uv_hshift); | |
287 | t_yuv_buffer[i].height = FFALIGN(avc_context->height, 16) >> (i && h->uv_vshift); | |
288 | t_yuv_buffer[i].stride = frame->linesize[i]; | |
289 | t_yuv_buffer[i].data = frame->data[i]; | |
290 | } | |
291 | ||
292 | if (avc_context->flags & CODEC_FLAG_PASS2) | |
293 | if ((ret = submit_stats(avc_context)) < 0) | |
294 | return ret; | |
295 | ||
296 | /* Now call into theora_encode_YUVin */ | |
297 | result = th_encode_ycbcr_in(h->t_state, t_yuv_buffer); | |
298 | if (result) { | |
299 | const char* message; | |
300 | switch (result) { | |
301 | case -1: | |
302 | message = "differing frame sizes"; | |
303 | break; | |
304 | case TH_EINVAL: | |
305 | message = "encoder is not ready or is finished"; | |
306 | break; | |
307 | default: | |
308 | message = "unknown reason"; | |
309 | break; | |
310 | } | |
311 | av_log(avc_context, AV_LOG_ERROR, "theora_encode_YUVin failed (%s) [%d]\n", message, result); | |
312 | return AVERROR_EXTERNAL; | |
313 | } | |
314 | ||
315 | if (avc_context->flags & CODEC_FLAG_PASS1) | |
316 | if ((ret = get_stats(avc_context, 0)) < 0) | |
317 | return ret; | |
318 | ||
319 | /* Pick up returned ogg_packet */ | |
320 | result = th_encode_packetout(h->t_state, 0, &o_packet); | |
321 | switch (result) { | |
322 | case 0: | |
323 | /* No packet is ready */ | |
324 | return 0; | |
325 | case 1: | |
326 | /* Success, we have a packet */ | |
327 | break; | |
328 | default: | |
329 | av_log(avc_context, AV_LOG_ERROR, "theora_encode_packetout failed [%d]\n", result); | |
330 | return AVERROR_EXTERNAL; | |
331 | } | |
332 | ||
333 | /* Copy ogg_packet content out to buffer */ | |
334 | if ((ret = ff_alloc_packet2(avc_context, pkt, o_packet.bytes)) < 0) | |
335 | return ret; | |
336 | memcpy(pkt->data, o_packet.packet, o_packet.bytes); | |
337 | ||
338 | // HACK: assumes no encoder delay, this is true until libtheora becomes | |
339 | // multithreaded (which will be disabled unless explicitly requested) | |
340 | pkt->pts = pkt->dts = frame->pts; | |
341 | avc_context->coded_frame->key_frame = !(o_packet.granulepos & h->keyframe_mask); | |
342 | if (avc_context->coded_frame->key_frame) | |
343 | pkt->flags |= AV_PKT_FLAG_KEY; | |
344 | *got_packet = 1; | |
345 | ||
346 | return 0; | |
347 | } | |
348 | ||
349 | static av_cold int encode_close(AVCodecContext* avc_context) | |
350 | { | |
351 | TheoraContext *h = avc_context->priv_data; | |
352 | ||
353 | th_encode_free(h->t_state); | |
354 | av_freep(&h->stats); | |
355 | av_freep(&avc_context->coded_frame); | |
356 | av_freep(&avc_context->stats_out); | |
357 | av_freep(&avc_context->extradata); | |
358 | avc_context->extradata_size = 0; | |
359 | ||
360 | return 0; | |
361 | } | |
362 | ||
363 | /** AVCodec struct exposed to libavcodec */ | |
364 | AVCodec ff_libtheora_encoder = { | |
365 | .name = "libtheora", | |
366 | .long_name = NULL_IF_CONFIG_SMALL("libtheora Theora"), | |
367 | .type = AVMEDIA_TYPE_VIDEO, | |
368 | .id = AV_CODEC_ID_THEORA, | |
369 | .priv_data_size = sizeof(TheoraContext), | |
370 | .init = encode_init, | |
371 | .close = encode_close, | |
372 | .encode2 = encode_frame, | |
373 | .capabilities = CODEC_CAP_DELAY, // needed to get the statsfile summary | |
374 | .pix_fmts = (const enum AVPixelFormat[]){ | |
375 | AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_NONE | |
376 | }, | |
377 | }; |