| 1 | ============================================= |
| 2 | Snow Video Codec Specification Draft 20080110 |
| 3 | ============================================= |
| 4 | |
| 5 | Introduction: |
| 6 | ============= |
| 7 | This specification describes the Snow bitstream syntax and semantics as |
| 8 | well as the formal Snow decoding process. |
| 9 | |
| 10 | The decoding process is described precisely and any compliant decoder |
| 11 | MUST produce the exact same output for a spec-conformant Snow stream. |
| 12 | For encoding, though, any process which generates a stream compliant to |
| 13 | the syntactical and semantic requirements and which is decodable by |
| 14 | the process described in this spec shall be considered a conformant |
| 15 | Snow encoder. |
| 16 | |
| 17 | Definitions: |
| 18 | ============ |
| 19 | |
| 20 | MUST the specific part must be done to conform to this standard |
| 21 | SHOULD it is recommended to be done that way, but not strictly required |
| 22 | |
| 23 | ilog2(x) is the rounded down logarithm of x with basis 2 |
| 24 | ilog2(0) = 0 |
| 25 | |
| 26 | Type definitions: |
| 27 | ================= |
| 28 | |
| 29 | b 1-bit range coded |
| 30 | u unsigned scalar value range coded |
| 31 | s signed scalar value range coded |
| 32 | |
| 33 | |
| 34 | Bitstream syntax: |
| 35 | ================= |
| 36 | |
| 37 | frame: |
| 38 | header |
| 39 | prediction |
| 40 | residual |
| 41 | |
| 42 | header: |
| 43 | keyframe b MID_STATE |
| 44 | if(keyframe || always_reset) |
| 45 | reset_contexts |
| 46 | if(keyframe){ |
| 47 | version u header_state |
| 48 | always_reset b header_state |
| 49 | temporal_decomposition_type u header_state |
| 50 | temporal_decomposition_count u header_state |
| 51 | spatial_decomposition_count u header_state |
| 52 | colorspace_type u header_state |
| 53 | if (nb_planes > 2) { |
| 54 | chroma_h_shift u header_state |
| 55 | chroma_v_shift u header_state |
| 56 | } |
| 57 | spatial_scalability b header_state |
| 58 | max_ref_frames-1 u header_state |
| 59 | qlogs |
| 60 | } |
| 61 | if(!keyframe){ |
| 62 | update_mc b header_state |
| 63 | if(update_mc){ |
| 64 | for(plane=0; plane<nb_plane_types; plane++){ |
| 65 | diag_mc b header_state |
| 66 | htaps/2-1 u header_state |
| 67 | for(i= p->htaps/2; i; i--) |
| 68 | |hcoeff[i]| u header_state |
| 69 | } |
| 70 | } |
| 71 | update_qlogs b header_state |
| 72 | if(update_qlogs){ |
| 73 | spatial_decomposition_count u header_state |
| 74 | qlogs |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | spatial_decomposition_type s header_state |
| 79 | qlog s header_state |
| 80 | mv_scale s header_state |
| 81 | qbias s header_state |
| 82 | block_max_depth s header_state |
| 83 | |
| 84 | qlogs: |
| 85 | for(plane=0; plane<nb_plane_types; plane++){ |
| 86 | quant_table[plane][0][0] s header_state |
| 87 | for(level=0; level < spatial_decomposition_count; level++){ |
| 88 | quant_table[plane][level][1]s header_state |
| 89 | quant_table[plane][level][3]s header_state |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | reset_contexts |
| 94 | *_state[*]= MID_STATE |
| 95 | |
| 96 | prediction: |
| 97 | for(y=0; y<block_count_vertical; y++) |
| 98 | for(x=0; x<block_count_horizontal; x++) |
| 99 | block(0) |
| 100 | |
| 101 | block(level): |
| 102 | mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0 |
| 103 | if(keyframe){ |
| 104 | intra=1 |
| 105 | }else{ |
| 106 | if(level!=max_block_depth){ |
| 107 | s_context= 2*left->level + 2*top->level + topleft->level + topright->level |
| 108 | leaf b block_state[4 + s_context] |
| 109 | } |
| 110 | if(level==max_block_depth || leaf){ |
| 111 | intra b block_state[1 + left->intra + top->intra] |
| 112 | if(intra){ |
| 113 | y_diff s block_state[32] |
| 114 | cb_diff s block_state[64] |
| 115 | cr_diff s block_state[96] |
| 116 | }else{ |
| 117 | ref_context= ilog2(2*left->ref) + ilog2(2*top->ref) |
| 118 | if(ref_frames > 1) |
| 119 | ref u block_state[128 + 1024 + 32*ref_context] |
| 120 | mx_context= ilog2(2*abs(left->mx - top->mx)) |
| 121 | my_context= ilog2(2*abs(left->my - top->my)) |
| 122 | mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)] |
| 123 | mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)] |
| 124 | } |
| 125 | }else{ |
| 126 | block(level+1) |
| 127 | block(level+1) |
| 128 | block(level+1) |
| 129 | block(level+1) |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | |
| 134 | residual: |
| 135 | residual2(luma) |
| 136 | if (nb_planes > 2) { |
| 137 | residual2(chroma_cr) |
| 138 | residual2(chroma_cb) |
| 139 | } |
| 140 | |
| 141 | residual2: |
| 142 | for(level=0; level<spatial_decomposition_count; level++){ |
| 143 | if(level==0) |
| 144 | subband(LL, 0) |
| 145 | subband(HL, level) |
| 146 | subband(LH, level) |
| 147 | subband(HH, level) |
| 148 | } |
| 149 | |
| 150 | subband: |
| 151 | FIXME |
| 152 | |
| 153 | nb_plane_types = gray ? 1 : 2; |
| 154 | |
| 155 | Tag description: |
| 156 | ---------------- |
| 157 | |
| 158 | version |
| 159 | 0 |
| 160 | this MUST NOT change within a bitstream |
| 161 | |
| 162 | always_reset |
| 163 | if 1 then the range coder contexts will be reset after each frame |
| 164 | |
| 165 | temporal_decomposition_type |
| 166 | 0 |
| 167 | |
| 168 | temporal_decomposition_count |
| 169 | 0 |
| 170 | |
| 171 | spatial_decomposition_count |
| 172 | FIXME |
| 173 | |
| 174 | colorspace_type |
| 175 | 0 unspecified YcbCr |
| 176 | 1 Gray |
| 177 | 2 Gray + Alpha |
| 178 | 3 GBR |
| 179 | 4 GBRA |
| 180 | this MUST NOT change within a bitstream |
| 181 | |
| 182 | chroma_h_shift |
| 183 | log2(luma.width / chroma.width) |
| 184 | this MUST NOT change within a bitstream |
| 185 | |
| 186 | chroma_v_shift |
| 187 | log2(luma.height / chroma.height) |
| 188 | this MUST NOT change within a bitstream |
| 189 | |
| 190 | spatial_scalability |
| 191 | 0 |
| 192 | |
| 193 | max_ref_frames |
| 194 | maximum number of reference frames |
| 195 | this MUST NOT change within a bitstream |
| 196 | |
| 197 | update_mc |
| 198 | indicates that motion compensation filter parameters are stored in the |
| 199 | header |
| 200 | |
| 201 | diag_mc |
| 202 | flag to enable faster diagonal interpolation |
| 203 | this SHOULD be 1 unless it turns out to be covered by a valid patent |
| 204 | |
| 205 | htaps |
| 206 | number of half pel interpolation filter taps, MUST be even, >0 and <10 |
| 207 | |
| 208 | hcoeff |
| 209 | half pel interpolation filter coefficients, hcoeff[0] are the 2 middle |
| 210 | coefficients [1] are the next outer ones and so on, resulting in a filter |
| 211 | like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... |
| 212 | the sign of the coefficients is not explicitly stored but alternates |
| 213 | after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... |
| 214 | hcoeff[0] is not explicitly stored but found by subtracting the sum |
| 215 | of all stored coefficients with signs from 32 |
| 216 | hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ... |
| 217 | a good choice for hcoeff and htaps is |
| 218 | htaps= 6 |
| 219 | hcoeff={40,-10,2} |
| 220 | an alternative which requires more computations at both encoder and |
| 221 | decoder side and may or may not be better is |
| 222 | htaps= 8 |
| 223 | hcoeff={42,-14,6,-2} |
| 224 | |
| 225 | |
| 226 | ref_frames |
| 227 | minimum of the number of available reference frames and max_ref_frames |
| 228 | for example the first frame after a key frame always has ref_frames=1 |
| 229 | |
| 230 | spatial_decomposition_type |
| 231 | wavelet type |
| 232 | 0 is a 9/7 symmetric compact integer wavelet |
| 233 | 1 is a 5/3 symmetric compact integer wavelet |
| 234 | others are reserved |
| 235 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
| 236 | |
| 237 | qlog |
| 238 | quality (logarthmic quantizer scale) |
| 239 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
| 240 | |
| 241 | mv_scale |
| 242 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
| 243 | FIXME check that everything works fine if this changes between frames |
| 244 | |
| 245 | qbias |
| 246 | dequantization bias |
| 247 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
| 248 | |
| 249 | block_max_depth |
| 250 | maximum depth of the block tree |
| 251 | stored as delta from last, last is reset to 0 if always_reset || keyframe |
| 252 | |
| 253 | quant_table |
| 254 | quantiztation table |
| 255 | |
| 256 | |
| 257 | Highlevel bitstream structure: |
| 258 | ============================= |
| 259 | -------------------------------------------- |
| 260 | | Header | |
| 261 | -------------------------------------------- |
| 262 | | ------------------------------------ | |
| 263 | | | Block0 | | |
| 264 | | | split? | | |
| 265 | | | yes no | | |
| 266 | | | ......... intra? | | |
| 267 | | | : Block01 : yes no | | |
| 268 | | | : Block02 : ....... .......... | | |
| 269 | | | : Block03 : : y DC : : ref index: | | |
| 270 | | | : Block04 : : cb DC : : motion x : | | |
| 271 | | | ......... : cr DC : : motion y : | | |
| 272 | | | ....... .......... | | |
| 273 | | ------------------------------------ | |
| 274 | | ------------------------------------ | |
| 275 | | | Block1 | | |
| 276 | | ... | |
| 277 | -------------------------------------------- |
| 278 | | ------------ ------------ ------------ | |
| 279 | || Y subbands | | Cb subbands| | Cr subbands|| |
| 280 | || --- --- | | --- --- | | --- --- || |
| 281 | || |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| || |
| 282 | || --- --- | | --- --- | | --- --- || |
| 283 | || --- --- | | --- --- | | --- --- || |
| 284 | || |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| || |
| 285 | || --- --- | | --- --- | | --- --- || |
| 286 | || --- --- | | --- --- | | --- --- || |
| 287 | || |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| || |
| 288 | || --- --- | | --- --- | | --- --- || |
| 289 | || --- --- | | --- --- | | --- --- || |
| 290 | || |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| || |
| 291 | || ... | | ... | | ... || |
| 292 | | ------------ ------------ ------------ | |
| 293 | -------------------------------------------- |
| 294 | |
| 295 | Decoding process: |
| 296 | ================= |
| 297 | |
| 298 | ------------ |
| 299 | | | |
| 300 | | Subbands | |
| 301 | ------------ | | |
| 302 | | | ------------ |
| 303 | | Intra DC | | |
| 304 | | | LL0 subband prediction |
| 305 | ------------ | |
| 306 | \ Dequantizaton |
| 307 | ------------------- \ | |
| 308 | | Reference frames | \ IDWT |
| 309 | | ------- ------- | Motion \ | |
| 310 | ||Frame 0| |Frame 1|| Compensation . OBMC v ------- |
| 311 | | ------- ------- | --------------. \------> + --->|Frame n|-->output |
| 312 | | ------- ------- | ------- |
| 313 | ||Frame 2| |Frame 3||<----------------------------------/ |
| 314 | | ... | |
| 315 | ------------------- |
| 316 | |
| 317 | |
| 318 | Range Coder: |
| 319 | ============ |
| 320 | |
| 321 | Binary Range Coder: |
| 322 | ------------------- |
| 323 | The implemented range coder is an adapted version based upon "Range encoding: |
| 324 | an algorithm for removing redundancy from a digitised message." by G. N. N. |
| 325 | Martin. |
| 326 | The symbols encoded by the Snow range coder are bits (0|1). The |
| 327 | associated probabilities are not fix but change depending on the symbol mix |
| 328 | seen so far. |
| 329 | |
| 330 | |
| 331 | bit seen | new state |
| 332 | ---------+----------------------------------------------- |
| 333 | 0 | 256 - state_transition_table[256 - old_state]; |
| 334 | 1 | state_transition_table[ old_state]; |
| 335 | |
| 336 | state_transition_table = { |
| 337 | 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, |
| 338 | 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, |
| 339 | 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, |
| 340 | 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, |
| 341 | 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, |
| 342 | 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, |
| 343 | 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, |
| 344 | 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, |
| 345 | 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, |
| 346 | 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, |
| 347 | 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, |
| 348 | 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, |
| 349 | 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, |
| 350 | 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, |
| 351 | 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, |
| 352 | 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0}; |
| 353 | |
| 354 | FIXME |
| 355 | |
| 356 | |
| 357 | Range Coding of integers: |
| 358 | ------------------------- |
| 359 | FIXME |
| 360 | |
| 361 | |
| 362 | Neighboring Blocks: |
| 363 | =================== |
| 364 | left and top are set to the respective blocks unless they are outside of |
| 365 | the image in which case they are set to the Null block |
| 366 | |
| 367 | top-left is set to the top left block unless it is outside of the image in |
| 368 | which case it is set to the left block |
| 369 | |
| 370 | if this block has no larger parent block or it is at the left side of its |
| 371 | parent block and the top right block is not outside of the image then the |
| 372 | top right block is used for top-right else the top-left block is used |
| 373 | |
| 374 | Null block |
| 375 | y,cb,cr are 128 |
| 376 | level, ref, mx and my are 0 |
| 377 | |
| 378 | |
| 379 | Motion Vector Prediction: |
| 380 | ========================= |
| 381 | 1. the motion vectors of all the neighboring blocks are scaled to |
| 382 | compensate for the difference of reference frames |
| 383 | |
| 384 | scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8 |
| 385 | |
| 386 | 2. the median of the scaled left, top and top-right vectors is used as |
| 387 | motion vector prediction |
| 388 | |
| 389 | 3. the used motion vector is the sum of the predictor and |
| 390 | (mvx_diff, mvy_diff)*mv_scale |
| 391 | |
| 392 | |
| 393 | Intra DC Predicton: |
| 394 | ====================== |
| 395 | the luma and chroma values of the left block are used as predictors |
| 396 | |
| 397 | the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff |
| 398 | to reverse this in the decoder apply the following: |
| 399 | block[y][x].dc[0] = block[y][x-1].dc[0] + y_diff; |
| 400 | block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff; |
| 401 | block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff; |
| 402 | block[*][-1].dc[*]= 128; |
| 403 | |
| 404 | |
| 405 | Motion Compensation: |
| 406 | ==================== |
| 407 | |
| 408 | Halfpel interpolation: |
| 409 | ---------------------- |
| 410 | halfpel interpolation is done by convolution with the halfpel filter stored |
| 411 | in the header: |
| 412 | |
| 413 | horizontal halfpel samples are found by |
| 414 | H1[y][x] = hcoeff[0]*(F[y][x ] + F[y][x+1]) |
| 415 | + hcoeff[1]*(F[y][x-1] + F[y][x+2]) |
| 416 | + hcoeff[2]*(F[y][x-2] + F[y][x+3]) |
| 417 | + ... |
| 418 | h1[y][x] = (H1[y][x] + 32)>>6; |
| 419 | |
| 420 | vertical halfpel samples are found by |
| 421 | H2[y][x] = hcoeff[0]*(F[y ][x] + F[y+1][x]) |
| 422 | + hcoeff[1]*(F[y-1][x] + F[y+2][x]) |
| 423 | + ... |
| 424 | h2[y][x] = (H2[y][x] + 32)>>6; |
| 425 | |
| 426 | vertical+horizontal halfpel samples are found by |
| 427 | H3[y][x] = hcoeff[0]*(H2[y][x ] + H2[y][x+1]) |
| 428 | + hcoeff[1]*(H2[y][x-1] + H2[y][x+2]) |
| 429 | + ... |
| 430 | H3[y][x] = hcoeff[0]*(H1[y ][x] + H1[y+1][x]) |
| 431 | + hcoeff[1]*(H1[y+1][x] + H1[y+2][x]) |
| 432 | + ... |
| 433 | h3[y][x] = (H3[y][x] + 2048)>>12; |
| 434 | |
| 435 | |
| 436 | F H1 F |
| 437 | | | | |
| 438 | | | | |
| 439 | | | | |
| 440 | F H1 F |
| 441 | | | | |
| 442 | | | | |
| 443 | | | | |
| 444 | F-------F-------F-> H1<-F-------F-------F |
| 445 | v v v |
| 446 | H2 H3 H2 |
| 447 | ^ ^ ^ |
| 448 | F-------F-------F-> H1<-F-------F-------F |
| 449 | | | | |
| 450 | | | | |
| 451 | | | | |
| 452 | F H1 F |
| 453 | | | | |
| 454 | | | | |
| 455 | | | | |
| 456 | F H1 F |
| 457 | |
| 458 | |
| 459 | unavailable fullpel samples (outside the picture for example) shall be equal |
| 460 | to the closest available fullpel sample |
| 461 | |
| 462 | |
| 463 | Smaller pel interpolation: |
| 464 | -------------------------- |
| 465 | if diag_mc is set then points which lie on a line between 2 vertically, |
| 466 | horiziontally or diagonally adjacent halfpel points shall be interpolated |
| 467 | linearls with rounding to nearest and halfway values rounded up. |
| 468 | points which lie on 2 diagonals at the same time should only use the one |
| 469 | diagonal not containing the fullpel point |
| 470 | |
| 471 | |
| 472 | |
| 473 | F-->O---q---O<--h1->O---q---O<--F |
| 474 | v \ / v \ / v |
| 475 | O O O O O O O |
| 476 | | / | \ | |
| 477 | q q q q q |
| 478 | | / | \ | |
| 479 | O O O O O O O |
| 480 | ^ / \ ^ / \ ^ |
| 481 | h2-->O---q---O<--h3->O---q---O<--h2 |
| 482 | v \ / v \ / v |
| 483 | O O O O O O O |
| 484 | | \ | / | |
| 485 | q q q q q |
| 486 | | \ | / | |
| 487 | O O O O O O O |
| 488 | ^ / \ ^ / \ ^ |
| 489 | F-->O---q---O<--h1->O---q---O<--F |
| 490 | |
| 491 | |
| 492 | |
| 493 | the remaining points shall be bilinearly interpolated from the |
| 494 | up to 4 surrounding halfpel and fullpel points, again rounding should be to |
| 495 | nearest and halfway values rounded up |
| 496 | |
| 497 | compliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma |
| 498 | interpolation at least |
| 499 | |
| 500 | |
| 501 | Overlapped block motion compensation: |
| 502 | ------------------------------------- |
| 503 | FIXME |
| 504 | |
| 505 | LL band prediction: |
| 506 | =================== |
| 507 | Each sample in the LL0 subband is predicted by the median of the left, top and |
| 508 | left+top-topleft samples, samples outside the subband shall be considered to |
| 509 | be 0. To reverse this prediction in the decoder apply the following. |
| 510 | for(y=0; y<height; y++){ |
| 511 | for(x=0; x<width; x++){ |
| 512 | sample[y][x] += median(sample[y-1][x], |
| 513 | sample[y][x-1], |
| 514 | sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]); |
| 515 | } |
| 516 | } |
| 517 | sample[-1][*]=sample[*][-1]= 0; |
| 518 | width,height here are the width and height of the LL0 subband not of the final |
| 519 | video |
| 520 | |
| 521 | |
| 522 | Dequantizaton: |
| 523 | ============== |
| 524 | FIXME |
| 525 | |
| 526 | Wavelet Transform: |
| 527 | ================== |
| 528 | |
| 529 | Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer |
| 530 | transform and a integer approximation of the symmetric biorthogonal 9/7 |
| 531 | daubechies wavelet. |
| 532 | |
| 533 | 2D IDWT (inverse discrete wavelet transform) |
| 534 | -------------------------------------------- |
| 535 | The 2D IDWT applies a 2D filter recursively, each time combining the |
| 536 | 4 lowest frequency subbands into a single subband until only 1 subband |
| 537 | remains. |
| 538 | The 2D filter is done by first applying a 1D filter in the vertical direction |
| 539 | and then applying it in the horizontal one. |
| 540 | --------------- --------------- --------------- --------------- |
| 541 | |LL0|HL0| | | | | | | | | | | | |
| 542 | |---+---| HL1 | | L0|H0 | HL1 | | LL1 | HL1 | | | | |
| 543 | |LH0|HH0| | | | | | | | | | | | |
| 544 | |-------+-------|->|-------+-------|->|-------+-------|->| L1 | H1 |->... |
| 545 | | | | | | | | | | | | | |
| 546 | | LH1 | HH1 | | LH1 | HH1 | | LH1 | HH1 | | | | |
| 547 | | | | | | | | | | | | | |
| 548 | --------------- --------------- --------------- --------------- |
| 549 | |
| 550 | |
| 551 | 1D Filter: |
| 552 | ---------- |
| 553 | 1. interleave the samples of the low and high frequency subbands like |
| 554 | s={L0, H0, L1, H1, L2, H2, L3, H3, ... } |
| 555 | note, this can end with a L or a H, the number of elements shall be w |
| 556 | s[-1] shall be considered equivalent to s[1 ] |
| 557 | s[w ] shall be considered equivalent to s[w-2] |
| 558 | |
| 559 | 2. perform the lifting steps in order as described below |
| 560 | |
| 561 | 5/3 Integer filter: |
| 562 | 1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w |
| 563 | 2. s[i] += (s[i-1] + s[i+1] )>>1; for all odd i < w |
| 564 | |
| 565 | \ | /|\ | /|\ | /|\ | /|\ |
| 566 | \|/ | \|/ | \|/ | \|/ | |
| 567 | + | + | + | + | -1/4 |
| 568 | /|\ | /|\ | /|\ | /|\ | |
| 569 | / | \|/ | \|/ | \|/ | \|/ |
| 570 | | + | + | + | + +1/2 |
| 571 | |
| 572 | |
| 573 | Snow's 9/7 Integer filter: |
| 574 | 1. s[i] -= (3*(s[i-1] + s[i+1]) + 4)>>3; for all even i < w |
| 575 | 2. s[i] -= s[i-1] + s[i+1] ; for all odd i < w |
| 576 | 3. s[i] += ( s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w |
| 577 | 4. s[i] += (3*(s[i-1] + s[i+1]) )>>1; for all odd i < w |
| 578 | |
| 579 | \ | /|\ | /|\ | /|\ | /|\ |
| 580 | \|/ | \|/ | \|/ | \|/ | |
| 581 | + | + | + | + | -3/8 |
| 582 | /|\ | /|\ | /|\ | /|\ | |
| 583 | / | \|/ | \|/ | \|/ | \|/ |
| 584 | (| + (| + (| + (| + -1 |
| 585 | \ + /|\ + /|\ + /|\ + /|\ +1/4 |
| 586 | \|/ | \|/ | \|/ | \|/ | |
| 587 | + | + | + | + | +1/16 |
| 588 | /|\ | /|\ | /|\ | /|\ | |
| 589 | / | \|/ | \|/ | \|/ | \|/ |
| 590 | | + | + | + | + +3/2 |
| 591 | |
| 592 | optimization tips: |
| 593 | following are exactly identical |
| 594 | (3a)>>1 == a + (a>>1) |
| 595 | (a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2 |
| 596 | |
| 597 | 16bit implementation note: |
| 598 | The IDWT can be implemented with 16bits, but this requires some care to |
| 599 | prevent overflows, the following list, lists the minimum number of bits needed |
| 600 | for some terms |
| 601 | 1. lifting step |
| 602 | A= s[i-1] + s[i+1] 16bit |
| 603 | 3*A + 4 18bit |
| 604 | A + (A>>1) + 2 17bit |
| 605 | |
| 606 | 3. lifting step |
| 607 | s[i-1] + s[i+1] 17bit |
| 608 | |
| 609 | 4. lifiting step |
| 610 | 3*(s[i-1] + s[i+1]) 17bit |
| 611 | |
| 612 | |
| 613 | TODO: |
| 614 | ===== |
| 615 | Important: |
| 616 | finetune initial contexts |
| 617 | flip wavelet? |
| 618 | try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients |
| 619 | try the MV length as context for coding the residual coefficients |
| 620 | use extradata for stuff which is in the keyframes now? |
| 621 | implement per picture halfpel interpolation |
| 622 | try different range coder state transition tables for different contexts |
| 623 | |
| 624 | Not Important: |
| 625 | compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality) |
| 626 | spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later) |
| 627 | |
| 628 | |
| 629 | Credits: |
| 630 | ======== |
| 631 | Michael Niedermayer |
| 632 | Loren Merritt |
| 633 | |
| 634 | |
| 635 | Copyright: |
| 636 | ========== |
| 637 | GPL + GFDL + whatever is needed to make this a RFC |