Imported Upstream version 1.4+222+hg5f9f7194267b

[deb_x265.git] / ChangeLog
diff --git a/ChangeLog b/ChangeLog

index 80323fb0af5d53bc4dad85351013eaf339c1546c..8aa1413e5f154dd974e85ee2469bf2a05adad0b8 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,1639 @@
+2014-12-23  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/rdcost.h, source/encoder/search.cpp:
+       rdcost: unify scaleChromaDist*()
+       [5f9f7194267b] [tip]
+
+2014-12-23  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/encoder/encoder.cpp:
+       encoder: allocate memory for inter and intra analysis data based on
+       slicetype
+       [9fdab427a191]
+
+       * source/encoder/analysis.cpp, source/encoder/analysis.h:
+       analysis: remove redundant argument in compressIntraCU
+       [c4ec3f22846b]
+
+2014-12-20  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/search.cpp:
+       fix 4:4:4 rd<=1
+       [8d2f418829c8]
+
+2014-12-18  David T Yuen  <dtyx265@gmail.com>
+
+       * source/common/x86/asm-primitives.cpp, source/common/x86/dct8.asm,
+       source/common/x86/dct8.h:
+       asm: idct[8x8] sse2 12232 -> 3500 over c code 3550 -> 3500 over
+       intrinsic
+       [7b816fdb393d]
+
+2014-12-17  Steve Borho  <steve@borho.org>
+
+       * source/PPA/ppaCPUEvents.h, source/encoder/frameencoder.cpp:
+       ppa: emit one event per CTU for more clarity, disable frame threads
+       events
+
+       The frame threads are generally uninteresting when WPP is in use
+       [78ae7996a1ce]
+
+       * source/PPA/ppaCPUEvents.h, source/encoder/frameencoder.cpp,
+       source/encoder/framefilter.cpp, source/encoder/slicetype.cpp,
+       source/x265.cpp:
+       ppa: refine event names
+
+       Drop the unused names, remove uninteresting events. Try to cover the
+       main thread pool tasks and the frame encoder times.
+       [6cbd7d26b2a1]
+
+       * source/PPA/ppa.cpp, source/PPA/ppa.h, source/PPA/ppaApi.h:
+       ppa: simplify interfaces, enforce coding style
+       [952a2a361fcb]
+
+       * source/common/common.h, source/encoder/analysis.cpp,
+       source/encoder/frameencoder.cpp, source/encoder/framefilter.cpp,
+       source/x265.cpp:
+       ppa: minimize code foot-print of profiling events
+
+       This will allow us to add support for more profiling systems without
+       littering the code
+       [3315d6c0ced1]
+
+       * doc/reST/cli.rst, source/x265.h:
+       doc: improve documentation for --stats and multi-pass in general
+       [42fb030a4c43]
+
+2014-12-16  Min Chen  <chenm003@163.com>
+
+       * source/encoder/nal.cpp:
+       fix: output wrong WppEntryOffset when emulating start code at end of
+       WPP row
+       [295d033cb091]
+
+2014-12-16  Aasaipriya Chandran  <aasaipriya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: chroma_hpp[16x16] for colorspace i420 in avx2 improve
+       1540c->969c
+       [775ebb4694ad]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: chroma_hpp[32x32] for colorspace i420 in avx2 improve
+       6189c->3537c
+       [619c0e654f5b]
+
+2014-12-13  Steve Borho  <steve@borho.org>
+
+       * source/encoder/api.cpp, source/encoder/encoder.cpp,
+       source/encoder/encoder.h:
+       encoder: combine create() and init() functions
+
+       They were always called back-to-back() and their functionality was
+       non-distinct. It also now checks for abort errors at startup and
+       returns a NULL from the encoder open function (early aborts are
+       usually malloc failures)
+       [6ba7be7b1697]
+
+       * source/CMakeLists.txt, source/cmake/CMakeASM_YASMInformation.cmake:
+       cmake: eoln and white-space fixes, slight refactor
+       [ee36b6311aaf]
+
+2014-12-12  Steve Borho  <steve@borho.org>
+
+       * source/encoder/analysis.h:
+       analysis: typo
+       [d00a5b93c07e]
+
+       * source/CMakeLists.txt, source/cmake/CMakeASM_YASMInformation.cmake:
+       cmake: allow position independent code to be generally configurable
+       (fixes #91)
+
+       Allow the builder to over-ride the default
+       [afdcb68dace4]
+
+2014-12-11  Steve Borho  <steve@borho.org>
+
+       * source/encoder/entropy.cpp, source/encoder/entropy.h:
+       entropy: add methods to estimate CU mode decision costs
+       [e0374c37e745]
+
+2014-12-12  Steve Borho  <steve@borho.org>
+
+       * source/common/pixel.cpp:
+       pixel: nits
+       [750839e8e0cf]
+
+       * doc/reST/cli.rst, source/common/param.cpp, source/x265.h:
+       api: change default AQ mode to 1
+
+       We've received a lot of feedback that AQ mode 2 is often
+       problematic, but AQ mode 1 is generally safe and useful.
+       [cbf5cad2e12b]
+
+2014-12-12  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vps[4x4] in avx2: improve 337c->219c
+       [6f770a6b24f0]
+
+2014-12-11  Steve Borho  <steve@borho.org>
+
+       * build/README.txt:
+       build: update README to not be so specific about yasm 1.2.0
+       [b1c2ef980dfe]
+
+2014-12-10  Steve Borho  <steve@borho.org>
+
+       * source/encoder/reference.cpp:
+       reference: avoid weighting pixels when plane is unweighted
+
+       Just because the luma plane is weighted does not mean either of the
+       chroma planes are also weighted. If the weight parameters for a
+       given plane are not present, then just directly use the un-weighted
+       reference plane.
+       [ae50be4c3a6e]
+
+2014-12-11  Aasaipriya Chandran  <aasaipriya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: chroma_hpp[4x4] for colorspace i420 in avx2 improve 217c->192c
+       [667e4ea0899f]
+
+2014-12-10  Steve Borho  <steve@borho.org>
+
+       * doc/reST/cli.rst:
+       doc: describe what happens when psy-rd is too high for bitrate
+       [9c3b478a60b2]
+
+2014-12-10  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: chroma_vpp[32x32] for colorspace i420 in avx2: improve
+       3881c->1933c
+       [04d145864dd6]
+
+2014-12-10  Steve Borho  <steve@borho.org>
+
+       * source/encoder/analysis.cpp:
+       analysis: avoid redundant MC work
+       [9e244ebe21d2]
+
+       * source/encoder/analysis.cpp:
+       analysis: fix chroma predictions for 2Nx2N bidir at zero mv
+
+       Valgrind discovered that the chroma predictions were not in fact
+       predicted
+       [0dc816f49c01]
+
+       * source/x265.h:
+       api: add some blank lines
+       [ab1e1e0ca75c]
+
+2014-12-09  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp, source/common/x86/ipfilter8.h:
+       asm: chroma_vpp[4x4] for colorspace i422 in avx2: improve 228c->184c
+       [5f16dc82652a]
+
+2014-12-10  Steve Borho  <steve@borho.org>
+
+       * source/common/lowres.cpp, source/common/lowres.h,
+       source/encoder/frameencoder.cpp, source/encoder/motion.cpp,
+       source/encoder/reference.cpp, source/encoder/reference.h,
+       source/encoder/slicetype.cpp:
+       reference: weight chroma planes of reference pictures if using
+       chroma satd
+       [6c32c8d4e0a1]
+
+2014-12-08  Steve Borho  <steve@borho.org>
+
+       * doc/reST/cli.rst, source/encoder/analysis.cpp,
+       source/encoder/frameencoder.cpp, source/encoder/motion.cpp,
+       source/encoder/motion.h, source/encoder/search.cpp,
+       source/encoder/slicetype.cpp:
+       motion: chroma ME [CHANGES OUTPUTS]
+
+       include chroma distortion in satd decisions when --subme > 2 and
+       chroma blocks are multiples of 4x4
+
+       This required making the MotionEstimate class more aware of PicYuv
+       and its indexing scheme so that it could find the correct chroma
+       pixels to interpolate. This allowed me to merge the setSourcePlane()
+       method into the lookahead's version of setSourcePU.
+
+       This requires further work. The Reference class needs to generate
+       weighted chroma planes if subpel refine will use chroma residual
+       cost. Until this is fixed, the chroma subpel steps will use
+       unweighted reference pixels.
+       [afd5620c77a4]
+
+2014-12-09  Steve Borho  <steve@borho.org>
+
+       * source/common/pixel.cpp, source/common/primitives.cpp:
+       primitives: use NULL chroma satd func pointers for blocks not
+       capable of satd
+
+       If the block is not a multiple of 4x4, then chroma satd measurements
+       are not possible, so we will disable chroma residual measurements
+       for these block sizes (and thus only measure luma residual)
+       [4c97d85c8488]
+
+       * source/common/primitives.cpp:
+       primitives: use luma satd functions for chroma, where applicable
+
+       The commented lines should be considered TODO items for the assembly
+       team
+       [29489f2fc2c7]
+
+       * source/common/pixel.cpp, source/common/primitives.h:
+       primitives: add a chroma satd table that is indexed by luma
+       partition
+
+       There are a number of chroma partitions that have dimensions of 2 or
+       6 and those cannot use satd (which is 4x4 based), so we degrade them
+       down to SAD which makes me unhappy.
+       [47c490836fd8]
+
+2014-12-08  Steve Borho  <steve@borho.org>
+
+       * source/common/lowres.h, source/encoder/reference.cpp,
+       source/encoder/reference.h:
+       reference: move reconPic pointer to base class so it is available to
+       ME
+       [dd55fd39745c]
+
+       * source/encoder/motion.cpp:
+       motion: sync argument names between the header and the cpp file
+       [e2b958539e6a]
+
+       * source/common/yuv.cpp:
+       yuv: fix size check in copyFromYuv
+
+       The target buffer needs to be as large as or larger than the source.
+       The fact that this check has never failed tells me all users of this
+       function have equal sized arguments.
+       [15be837edb36]
+
+       * source/encoder/search.cpp:
+       search: rename index variable to puIdx for consistency
+       [1cab6a4c0ab8]
+
+       * source/common/yuv.cpp, source/common/yuv.h,
+       source/encoder/analysis.cpp, source/encoder/motion.cpp,
+       source/encoder/motion.h, source/encoder/search.cpp:
+       motion: add a version of setSourcePU which can accept fenc from
+       another Yuv
+
+       The analysis code has already gone through the trouble of loading
+       the CU's fenc pixels from the source picture into a much smaller Yuv
+       buffer with small strides. This allows us to avoid accessing the
+       fenc PicYuv in a performance critical portion of the encoder.
+
+       We utilize the Yuv class to copy the PU, since it already has logic
+       for calculating part offsets for luma and chroma
+       [1d1f803a3eec]
+
+       * source/encoder/motion.cpp, source/encoder/motion.h,
+       source/encoder/search.cpp, source/encoder/slicetype.cpp,
+       source/encoder/slicetype.h:
+       motion: use Yuv instance to hold fenc PU pixels (preparing for
+       chroma ME)
+
+       This required making an init function which accepts the encoder
+       color space. We use 4:0:0 for lookahead since it does not keep
+       chroma planes. Note that I explicitly renamed this Yuv instance
+       fencPUYuv to make sure people understand it is not a duplicate of
+       the fencYuv kept by the Analysis structure; it will often be a sub-
+       partition of the CU fenc yuv.
+       [e640c8461495]
+
+       * source/encoder/slicetype.cpp:
+       slicetype: cleanups - use bufSATD method where applicable
+       [b5b05c94ae7c]
+
+       * source/common/yuv.cpp:
+       yuv: plumb in support for mono-chrome YUV buffers
+
+       The need for this will be obvious in the next commit
+       [5a44d694ed9b]
+
+2014-12-09  Aasaipriya Chandran  <aasaipriya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: chroma_hpp[8x8] for colorspace i420 in avx2 improve 530c->373c
+       [88498ec9b10b]
+
+2014-12-08  Steve Borho  <steve@borho.org>
+
+       * source/common/x86/asm-primitives.cpp:
+       asm: fix x86 link errors
+       [b376435b31c1]
+
+2014-12-09  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: chroma_vpp[16x16] for colorspace i420 in avx2: improve
+       998c->978c
+       [d042d1ea2d69]
+
+2014-12-05  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: chroma_vpp[8x8] for colorspace i420 in avx2: improve 338c->269c
+       [fee9fb1f9762]
+
+2014-12-06  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/cudata.h, source/encoder/analysis.cpp,
+       source/encoder/entropy.cpp, source/encoder/entropy.h,
+       source/encoder/search.cpp, source/encoder/search.h:
+       refine tuDepth related
+       [53f7efef5ebd]
+
+2014-12-05  Steve Borho  <steve@borho.org>
+
+       * source/cmake/version.cmake:
+       cmake: do not use a cache string for version found in hg_archive.txt
+       (refs #84)
+
+       This was not passing the tagged version number to version.cpp
+       [35d086074bb5]
+
+2014-12-04  Aarthi Thirumalai  <Aarthi Thirumalai>
+
+       * source/encoder/ratecontrol.cpp:
+       rc : fix bug in deciding qp for first frame in CRF
+       [1458ad34157c]
+
+       * source/encoder/rdcost.h, source/encoder/sao.cpp:
+       rc: fix chroma qp and chroma lambda derivations.
+
+       fix the chroma qp values for Main10 profile, derive chroma qp from
+       luma qp values according to the HEVC spec. improves quality at high
+       qps.
+       [a1e76461c0d4]
+
+2014-12-05  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/encoder/analysis.cpp:
+       analysis: comments
+       [4ae9691c1a23]
+
+2014-12-05  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/analysis.cpp:
+       fix chroma distortion for 4:2:2
+       [42df5c8bdb25]
+
+2014-12-04  Steve Borho  <steve@borho.org>
+
+       * source/encoder/CMakeLists.txt:
+       cmake: disable idiotic uninitialized local variable warnings from VC
+
+       If the compiler is not going to make any minimal attempt to figure
+       out if a variable was initialized, I am not going to make any
+       attempt to look at their stupid warnings.
+       [c9fd35f97e6d]
+
+2014-12-04  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm, source/common/x86/ipfilter8.h:
+       asm: chroma_vpp[4x4] for colorspace i420 in avx2: improve 228c->184c
+       [23e637065aec]
+
+2014-12-04  Steve Borho  <steve@borho.org>
+
+       * source/encoder/analysis.cpp, source/encoder/analysis.h:
+       analysis: cache m_bChromaSa8d and reduce redundant work
+
+       Renamed some 'part' variables to 'puIdx' to avoid variable shadow
+       warnings and for consistency with search.cpp
+       [cc327e846dac]
+
+2014-12-04  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/encoder/analysis.cpp:
+       analysis: add chroma distortion to rdLevels 3 and 4
+
+       At these rdLevels, inter/bidir and merge candidate decisions were
+       being taken based on luma sa8dCost only. This will increase bitrate
+       and lower ssim slightly, with better subjective quality.
+
+       Also fixed some naming nits.
+       [1d2a11f6a33f]
+
+       * doc/reST/cli.rst, source/CMakeLists.txt, source/common/param.cpp,
+       source/encoder/frameencoder.cpp, source/encoder/search.cpp,
+       source/x265.cpp, source/x265.h:
+       noiseReduction: allow separate strengths to be specified for intra
+       and inter CUs
+       [ec06f5878e8b]
+
+2014-12-04  Aarthi Thirumalai  <Aarthi Thirumalai>
+
+       * source/common/x86/asm-primitives.cpp:
+       primitives: fix build error in refactor of chroma p2s primitive.
+       [511dde5ac1de]
+
+2014-12-03  Steve Borho  <steve@borho.org>
+
+       * source/common/ipfilter.cpp, source/common/lowres.cpp,
+       source/common/pixel.cpp, source/common/predict.cpp,
+       source/common/primitives.cpp, source/common/primitives.h,
+       source/common/quant.cpp, source/common/shortyuv.cpp,
+       source/common/x86/asm-primitives.cpp, source/common/yuv.cpp,
+       source/encoder/search.cpp, source/test/ipfilterharness.cpp,
+       source/test/pixelharness.cpp:
+       primitives: cleanup EncoderPrimitives, refactor chroma p2s primitive
+
+       No behavior changes
+       [b1b5f06fe9ce]
+
+       * source/common/pixel.cpp, source/common/primitives.h:
+       primitives: remove unused chroma lowres primitive
+       [bfeee4ac5463]
+
+       * source/encoder/search.cpp:
+       search: avoid AMVP selection if both MVs are the same
+
+       This is a simple work avoidance optimization, should have no effect
+       on outputs
+       [2f66c3284c35]
+
+       * source/common/CMakeLists.txt, source/common/primitives.cpp:
+       cmake: remove buggy workarounds for partial SIMD support (fixes #92)
+
+       In the past, there were a number of primitives written in SIMD
+       intrinsics that could work without compiling with YASM. Most of
+       those are now gone, and we generally require YASM for SIMD support.
+       This commit remoes support for using the few remaining SIMD
+       intrinsics without having YASM to provide implementations of
+       x265_emms(), x265_cpu_cpuid(), etc. Fixing a bug in the process.
+       [d7b5e73fc91a]
+
+       * doc/reST/cli.rst:
+       doc: fix typo (closes #83)
+       [7192725cbb0a]
+
+       * doc/reST/cli.rst, source/common/param.cpp, source/x265.cpp,
+       source/x265.h:
+       param: allow NR values from 1..99, clarify docs (closes #87)
+       [21b869f9f706]
+
+       * doc/reST/Makefile, doc/reST/conf.py, doc/reST/x265.rst:
+       doc: add support for reST generated man-pages (closes #89)
+
+       This patch was attached to issue #89 by djcj
+       [ff08fd9b294c]
+
+       * source/common/constants.cpp:
+       constants: adjust lambda tabels for 10bit encodes (fixes #55)
+
+       Since samples are 10bits, where two bits of extra resolution has
+       been added to add more granularity, distortion also has two extra
+       bits. A typical resolution for this problem is to down-shift
+       distortion by 2 bits everywhere, before adding lambda * bits to
+       calculate RD cost. Instead, we multiply lambda by 4 (essentially
+       shift it up by two bits) so distortion and lambda * bits are both at
+       the higher scale.
+
+       lambda2 uses the square of the up-shifted lambda, so it has the
+       doubled up-shift same as the squared distortion values used for RDO.
+
+       Example output change: ./x265
+       /Volumes/video/sintel_trailer_2k_480p24.y4m o.bin --ssim --no-info
+
+       Main: 195.67 kb/s, SSIM Mean Y: 0.9833338 (17.782 dB) Main10 before:
+       363.49 kb/s, SSIM Mean Y: 0.9888182 (19.515 dB) Main10 after: 206.54
+       kb/s, SSIM Mean Y: 0.9855121 (18.390 dB)
+       [014a1e0fb58b]
+
+2014-12-03  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/encoder/encoder.cpp:
+       encoder: fix binary mismatch for analysis load vs save with same
+       bitrate
+       [50d2b92ecc89]
+
+2014-12-02  Steve Borho  <steve@borho.org>
+
+       * Merge
+       [de54cffaecf2]
+
+2014-11-27  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[8x16, 8x32] in avx2: improve 1139c->774c, 1968c->1452c
+       [2e055cbc9046]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[12x16] in avx2: improve 1977c->1418c
+       [ef4ca8474f5c]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[24x32] in avx2: improve 5637c->3695c
+       [8aeeaf6950f7]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[48x64] in avx2: improve 21298c->14696c
+       [d97b1c9f5106]
+
+2014-12-02  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/x265.cpp:
+       x265: add ratetol to command line help
+       [f636a0aadd68]
+
+2014-12-01  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/CMakeLists.txt, source/encoder/encoder.cpp, source/x265.h:
+       encoder: free csv file name
+
+       Since strdup is used uniformly for filenames, csvfn cannot be const.
+       [bde1753de250]
+
+2014-11-27  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[8x16, 8x32] in avx2: improve 1139c->774c, 1968c->1452c
+       [5ee693e4b5fa]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[12x16] in avx2: improve 1977c->1418c
+       [e280ce2e5076]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[24x32] in avx2: improve 5637c->3695c
+       [e1ca311bbb5b]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[48x64] in avx2: improve 21298c->14696c
+       [984271a3aae9]
+
+2014-11-30  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/x265.cpp:
+       x265: remove validateFanout
+       [d9f835ddd112]
+
+2014-11-27  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/pixel.cpp, source/common/primitives.h,
+       source/common/quant.cpp, source/common/x86/asm-primitives.cpp,
+       source/common/x86/blockcopy8.asm, source/common/x86/blockcopy8.h,
+       source/test/pixelharness.cpp, source/test/pixelharness.h:
+       primitives: refactor tskip related
+       [90401d77a05d]
+
+2014-11-28  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/dct.cpp, source/common/quant.h,
+       source/common/x86/dct8.asm, source/common/x86/dct8.h,
+       source/encoder/search.cpp:
+       nits
+       [e2db5f3c6df8]
+
+2014-11-28  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/common/param.cpp:
+       param: disable b-intra in B frames when tune grain is true.
+       [d32249002258]
+
+2014-11-25  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/encoder/encoder.h:
+       encoder: make all member fields public
+       [af6b68f0feaa]
+
+2014-11-26  Steve Borho  <steve@borho.org>
+
+       * doc/reST/cli.rst, doc/reST/presets.rst:
+       doc: restructure documentation with better grouping, improve cross-
+       refs
+       [dfe0803ae6be]
+
+       * doc/reST/introduction.rst:
+       doc: fix a sphinx build warning
+       [f488b394693b]
+
+       * doc/reST/presets.rst:
+       doc: improve readability of film grain section
+       [03bd64057e72]
+
+       * doc/reST/cli.rst, doc/reST/presets.rst:
+       doc: add cbr to the list of tunings, add helpful cross-refs
+       [071dbe651364]
+
+2014-11-27  Aarthi Thirumalai  <Aarthi Thirumalai>
+
+       * source/CMakeLists.txt, source/common/param.cpp,
+       source/encoder/ratecontrol.cpp, source/x265.cpp, source/x265.h:
+       rc: introduce cli option to tune for cbr.
+       [8e602ed5ca4c]
+
+2014-11-25  Aarthi Thirumalai  <Aarthi Thirumalai>
+
+       * source/encoder/ratecontrol.cpp:
+       rc: improve the frame size planning with respect to vbv buffer
+       occupancy and the lookahead window.
+       [2870269cdd60]
+
+       * source/encoder/ratecontrol.cpp, source/encoder/ratecontrol.h:
+       rc: adjust qp for B frames from ABR feedback in case of CBR.
+
+       limits the bitrate fluctuation for CBR with respect to the target
+       bitrate set.
+       [576c675adf92]
+
+       * source/encoder/ratecontrol.cpp, source/encoder/ratecontrol.h:
+       rc: limit bit amortization in ABR to longer sequences
+       [11342c8376dd]
+
+2014-11-26  Steve Borho  <steve@borho.org>
+
+       * source/encoder/ratecontrol.cpp:
+       rc: use c-style typecasts
+       [c67b4f3a5e3c]
+
+2014-11-19  Aarthi Thirumalai  <Aarthi Thirumalai>
+
+       * source/encoder/ratecontrol.cpp:
+       rc: tune midframe vbv logic for B frames
+       [8f5fa9538e13]
+
+2014-11-21  Aarthi Thirumalai  <Aarthi Thirumalai>
+
+       * source/encoder/slicetype.cpp:
+       slicetype: fix vbv lookahead data collection for all frames within
+       the lookahead window.
+       [52246e09727d]
+
+2014-11-26  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_hpp[8x8, 8x16, 8x32] in avx2: improve 623c->523c,
+       1384c->1083c, 2555c->2058c
+       [01d82aa06285]
+
+2014-11-26  Aasaipriya Chandran  <aasaipriya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       Luma_hpp[48x64] avx2 asm code : improved 25053c->17882c
+       [bb7303bb00d1]
+
+2014-11-26  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_hpp[8x4] in avx2: improve 357c->261c
+       [a88ddc970748]
+
+2014-11-26  Aasaipriya Chandran  <aasaipriya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       Luma_hpp[32x8 , 32x16 , 32x24 , 32x32 , 32x64] avx2 asm code:
+       improved 2032c->1556c, 4238c->3014c, 6696c->4801c, 8697c->6433c,
+       16823c->12297c
+       [b0153f354186]
+
+2014-11-26  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[64x16] in avx2: improve 7245c->4910c
+       [5700875b428f]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[64x32, 64x48, 64x64] in avx2: improve 14150c->9810c,
+       21132c->14684c, 28663c->19616c
+       [db518f7c8474]
+
+2014-11-25  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[32x8] in avx2: improve 2047c->1472c
+       [d57c28a3010b]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[32x24] in avx2: improve 5562c->3899c
+       [dedc5a8589a6]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[32x16] in avx2: improve 3808c->2491c
+       [3db00b06aea6]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[32x32, 32x64] in avx2: improve 7247c->4909c,
+       14365c->9774c
+       [adf15e303c37]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[16x32, 16x64] in avx2: improve 3875c->2463c,
+       7499c->4894c
+       [45456cd145d8]
+
+2014-11-25  Aasaipriya Chandran  <aasaipriya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: avx2 for Luma_hpp[16x4, 16x8, 16x12, 16x16 , 16x32, 16x64]
+
+       619c->458c, 1174c->812c, 1694c->1112c, 2291c->1535c, 4846c->3207c,
+       9294c->6104c
+       [d11d3120361f]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: avx2 for luma_hpp[64x64, 64x48, 64x32, 64x16]
+
+       33137c->22606c , 24826c->17202c , 16726c->11560c , 7830c->5534c
+       [1e8a0f1e0889]
+
+2014-11-22  Steve Borho  <steve@borho.org>
+
+       * source/encoder/frameencoder.cpp, source/encoder/frameencoder.h:
+       frameencoder: do not use bitmaps for framefilter if not WPP
+
+       The non-WPP row loop wants to do frame filter work in between each
+       row, with a m_filterRowDelay lag. If we use the functions which
+       update the bitmap, it would allow a worker thread to process a
+       filter row before it was ready. In short, the non-WPP path was never
+       intended to work in the presence of a thread pool. This was causing
+       crashes when --no-wpp --pmode||--pme was used.
+       [8011e2a68b88]
+
+2014-11-24  Steve Borho  <steve@borho.org>
+
+       * source/encoder/frameencoder.cpp:
+       frameencoder: release row lock while waiting during VBV restarts
+
+       This fixes what appears to have been an old deadlock bug that has
+       just recently become very reproducible
+       [82f6e4847d57]
+
+2014-11-21  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[16x4] in avx2: improve 734c->497c
+       [3c6f703f94ea]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[16x8] in avx2: improve 1195c->745c
+       [fc83cf5299ae]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[16x12] in avx2: improve 1644c->1018c
+       [65017182318c]
+
+2014-11-21  Praveen Tiwari  <Praveen Tiwari>
+
+       * source/common/dct.cpp:
+       idct32_c: C code optimization
+       [346fccbba4de]
+
+       * source/common/dct.cpp:
+       idct16_c: optimization
+       [388c893d3825]
+
+       * source/common/dct.cpp:
+       idct8_c: optimization
+       [f7d7c480b85d]
+
+       * source/common/dct.cpp:
+       idct4_c: optimization
+       [69a472a77b49]
+
+       * source/common/dct.cpp:
+       dct32_c: optimization
+       [a60dfb900169]
+
+       * source/common/dct.cpp:
+       dct16_c: optimization
+       [7e94ea285179]
+
+       * source/common/dct.cpp:
+       dct8_c: optimization
+       [d426e93e240c]
+
+       * source/common/dct.cpp:
+       dct4_c: C code optimization
+       [d4376e113855]
+
+       * source/common/dct.cpp:
+       idst4_c: optimization
+       [8f373c20bc41]
+
+       * source/common/dct.cpp:
+       dst4_c: optimization
+       [49b66c57972d]
+
+2014-11-21  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/pixel.cpp, source/common/x86/asm-primitives.cpp,
+       source/common/x86/blockcopy8.asm, source/common/x86/blockcopy8.h:
+       fix copy16to16_shl
+       [5a8da9cb52e8]
+
+2014-11-20  Steve Borho  <steve@borho.org>
+
+       * source/encoder/analysis.cpp, source/encoder/analysis.h,
+       source/encoder/search.cpp, source/encoder/search.h:
+       analysis: explicit locking for pmode and pme parameters
+
+       We've found a repro case involving --no-wpp --pmode --pme --preset
+       slower where time starved worker threads get stuck in the findJob()
+       routine and pushed off the CPU in the mean time the master thread
+       moves on to another CU. This caused very hard to reproduce crashes.
+       [2f8df4c972b9]
+
+2014-11-20  David T Yuen  <dtyx265@gmail.com>
+
+       * source/common/vec/dct-sse3.cpp:
+       Updated intrinsic of idct8 sse3 for new input format
+       [2abf89f5c4f2]
+
+2014-11-20  Divya Manivannan  <divya@multicorewareinc.com>
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[16x16] in avx2: improve 2141c->1284c
+       [2a2142982602]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[8x4] in avx2: improve 498c->257c
+       [c2fd1b7d5d99]
+
+       * source/common/x86/asm-primitives.cpp,
+       source/common/x86/ipfilter8.asm:
+       asm: luma_vpp[8x8] in avx2: improve 701c->387c
+       [562c43f738e4]
+
+2014-11-20  Steve Borho  <steve@borho.org>
+
+       * source/encoder/encoder.cpp:
+       encoder: nits and alloc zero fix
+
+       intraData needs to be zerod on allocation else if one of the later
+       allocs failed some of the pointers will be uninitialized and passed
+       to X265_FREE()
+       [80dcd3dfb805]
+
+2014-11-20  Praveen Tiwari  <Praveen Tiwari>
+
+       * source/common/dct.cpp:
+       Fix for C code mismatch
+
+       This patch is for fix the the binary mismatch in encoded output
+       introduced during refactorizaton of the transform/quant path.
+       Basically it is original version of code to make sure all valid
+       inputs are copied in input buffer, in other hand it is not fully
+       optimized code but this patch is quick fix for the problem and allow
+       us to optimze one function at a time.
+       [1d17ec0cb954]
+
+2014-11-20  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/pixel.cpp:
+       fix for old gcc
+       [ed587d360b97]
+
+2014-11-20  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * build/icl32/build-all.bat, build/icl32/make-makefile.bat,
+       build/icl64/build-all.bat, build/icl64/make-makefile.bat:
+       build: remove icl32 and icl64 scripts
+
+       Typical Windows ICL users link with Visual Studio
+       [3649fabf90d3]
+
+2014-11-20  Praveen Tiwari  <Praveen Tiwari>
+
+       * source/common/x86/ipfilter8.asm:
+       luma_hpp[4x4]: AVX2 asm code bug fix
+       [4b637cb9b792]
+
+2014-11-20  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/encoder/encoder.cpp:
+       encoder: fix analysis file read
+       [0c25a6eac0ca]
+
+2014-11-20  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/analysis.cpp:
+       fix for rd=0
+       [b33cbe130c63]
+
+       * source/common/cudata.cpp, source/common/cudata.h,
+       source/encoder/analysis.cpp, source/encoder/frameencoder.cpp,
+       source/encoder/search.cpp:
+       replace char to int8_t, where it should be signed char
+       [14a8bb7bbcab]
+
+2014-11-19  Praveen Tiwari  <Praveen Tiwari>
+
+       * source/common/x86/asm-primitives.cpp:
+       disable denoiseDct asm code until fixed for Mac OS
+       [f236adb703f5]
+
+2014-11-16  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/dct.cpp, source/common/ipfilter.cpp,
+       source/common/picyuv.h, source/common/pixel.cpp,
+       source/common/predict.cpp, source/common/primitives.h,
+       source/common/quant.cpp, source/common/quant.h,
+       source/common/shortyuv.cpp, source/common/vec/dct-sse3.cpp,
+       source/common/vec/dct-ssse3.cpp, source/common/x86/blockcopy8.h,
+       source/common/x86/dct8.h, source/common/x86/ipfilter8.h,
+       source/common/x86/mc.h, source/common/x86/pixel-util.h,
+       source/common/x86/pixel.h, source/common/yuv.cpp,
+       source/encoder/analysis.cpp, source/encoder/rdcost.h,
+       source/encoder/search.cpp:
+       primitives: clarify constness
+       [99b5cebf8193]
+
+2014-11-18  Steve Borho  <steve@borho.org>
+
+       * source/common/dct.cpp:
+       dct: fix gcc warnings
+       [34cb58c53859]
+
+2014-11-18  Praveen Tiwari  <Praveen Tiwari>
+
+       * source/common/dct.cpp, source/common/pixel.cpp,
+       source/common/primitives.h, source/common/quant.cpp,
+       source/common/quant.h, source/common/vec/dct-sse3.cpp,
+       source/common/vec/dct-sse41.cpp, source/common/vec/dct-ssse3.cpp,
+       source/common/x86/asm-primitives.cpp,
+       source/common/x86/blockcopy8.asm, source/common/x86/blockcopy8.h,
+       source/common/x86/dct8.asm, source/common/x86/dct8.h,
+       source/common/x86/pixel-util.h, source/common/x86/pixel-util8.asm,
+       source/test/mbdstharness.cpp, source/test/mbdstharness.h,
+       source/test/pixelharness.cpp, source/test/pixelharness.h:
+       refactorizaton of the transform/quant path.
+
+       This patch involves scaling down the DCT/IDCT coefficients from
+       int32_t to int16_t as they can be accommodated on int16_t without
+       any introduction of encode error, this allows us to clean up lots of
+       DCT/IDCT intermediate buffers, optimize enode efficiency for
+       different cli options including noise reduction by reducing data
+       movement operations, accommodating more number of coefficients in a
+       single register for SIMD operations. This patch include all
+       necessary changes for the transfor/quant path including unit test
+       code.
+       [8bee552a1964]
+
+2014-11-19  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/common.h:
+       fseeko for mingw32
+       [cb9bb697fcaa]
+
+2014-11-19  Steve Borho  <steve@borho.org>
+
+       * source/common/threading.h:
+       threadind: fixes for VC11 Win32 includes, prune two unused functions
+       [2b830f08d948]
+
+2014-11-18  Steve Borho  <steve@borho.org>
+
+       * source/common/wavefront.cpp:
+       wavefront: fix msvc warning
+
+       warning C4800: 'unsigned long' : forcing value to bool 'true' or
+       'false' (performance warning)
+       [e29c618cd9a7]
+
+       * source/common/param.cpp, source/common/quant.cpp,
+       source/common/threading.h, source/common/threadpool.cpp,
+       source/common/wavefront.cpp, source/common/wavefront.h,
+       source/common/winxp.h, source/encoder/entropy.cpp,
+       source/encoder/slicetype.cpp:
+       threading: use 32bit atomic integer operations exclusively
+
+       The 32bit operations have better portability and have less onerous
+       alignment restrictions.
+       [814b687db30e]
+
+       * source/common/constants.cpp, source/common/constants.h,
+       source/common/primitives.cpp, source/encoder/api.cpp,
+       source/test/intrapredharness.cpp:
+       constants: remove init/destroyROM functions
+       [d3389bb9efd0]
+
+       * source/x265.h:
+       api: fix range limit docs for RQT limit params
+       [d059cfa88f1a]
+
+       * source/encoder/frameencoder.cpp:
+       frameencoder: white-space nits
+       [29a374b62920]
+
+       * source/encoder/analysis.cpp:
+       analysis: drop MATCH_NON_PMODE macro
+
+       this was a debugging feature, it's not being tested which means it
+       will get broken and so it's best just to keep the code clean
+       [dc61091d5cc4]
+
+       * source/common/threading.cpp:
+       threading: don't use this->
+
+       We don't do this anywhere else
+       [3731d9bc7b88]
+
+       * source/common/threading.cpp, source/common/threading.h,
+       source/common/threadpool.cpp, source/common/threadpool.h:
+       threading: copyright comment format nits
+
+       be consistent with our other files
+       [a7b9b90e1bdd]
+
+       * source/common/param.cpp:
+       param: use strdup() on input strings uniformly
+       [ad532c30bc95]
+
+2014-11-18  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/encoder/encoder.cpp:
+       encoder: init filename to NULL
+       [2f0062f0791b]
+
+2014-11-17  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/common/common.h, source/encoder/analysis.cpp,
+       source/encoder/search.cpp:
+       search: fix binary mismatch and inconsistent crash for share inter
+       information
+       [854fcbb50220]
+
+       * source/encoder/encoder.cpp:
+       encoder: force slicetype using analysis file
+       [05d824463602]
+
+2014-11-17  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/cudata.cpp, source/common/lowres.h,
+       source/common/mv.h, source/encoder/bitcost.h,
+       source/encoder/motion.cpp, source/encoder/motion.h,
+       source/encoder/slicetype.cpp:
+       modify MV default constructor to do nothing
+       [7a1ec67bd004]
+
+2014-11-17  Aarthi Thirumalai  <Aarthi Thirumalai>
+
+       * source/encoder/ratecontrol.cpp:
+       vbv: tune vbv predictors for better mapping of predicted bits to
+       encoded bits
+       [27d36c4b4a27]
+
+2014-11-16  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/encoder/analysis.cpp, source/encoder/analysis.h:
+       analysis: cleanups, init pointers, variable names are made self-
+       explanatory
+       [ed2ba7a90567]
+
+2014-11-12  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/encoder/analysis.cpp:
+       analysis: fix binary mismatch for share intra save and load mode
+       with same cli
+       [10b8d3fbe408]
+
+2014-11-14  Steve Borho  <steve@borho.org>
+
+       * source/x265.cpp:
+       cli: fix analysis filename argument
+
+       This showed up as a GCC warning about an unused variable, but having
+       the arg handled here prevented the org from being passed to
+       x265_param_parse()
+       [8191e0d02455]
+
+       * source/encoder/encoder.cpp:
+       encoder: add prefix to FREAD and FWRITE macros to avoid MacOSX macro
+       conflict
+
+       /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform
+       /Developer/SDKs/MacOSX10.10.sdk/usr/include/sys/fcntl.h:111:9: note:
+       previous definition is here #define FWRITE 0x0002
+       [b617dca5ce12]
+
+       * source/common/common.h, source/common/frame.h,
+       source/encoder/encoder.h:
+       common: move analysis reuse structs to common.h
+
+       files in common/ shouldn't include encoder.h
+       [72f1222903a3]
+
+2014-11-14  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/analysis.cpp:
+       analysis: encodeResidue() directly write to reconPic
+       [c3096034934f]
+
+2014-11-14  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/CMakeLists.txt, source/common/common.h,
+       source/common/frame.h, source/common/param.cpp,
+       source/encoder/analysis.cpp, source/encoder/analysis.h,
+       source/encoder/api.cpp, source/encoder/encoder.cpp,
+       source/encoder/encoder.h, source/x265.cpp, source/x265.def.in,
+       source/x265.h:
+       analysis save/load: refactor full implementation
+
+       1. Move analysis inter/intra data into encoder 2. Encoder allocates
+       and frees memory for x265 analysis, remove api calls 3. Inter and
+       intra data allocated based on sliceType only 4. frame record size is
+       now variable
+       [58c2e06c2e4a]
+
+2014-11-13  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/analysis.cpp:
+       analysis: don't add the cost of picture boundary CU to avgCost
+       [CHANGES OUTPUT]
+       [64314f8061f1]
+
+2014-11-13  Steve Borho  <steve@borho.org>
+
+       * source/cmake/FindVLD.cmake:
+       cmake: hack to avoid escaping problems in cmake 3.1 parser
+
+       Fix suggested by Mario *LigH* Rohkrämer
+       [17f2fb0996db]
+
+2014-11-13  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/cudata.cpp, source/encoder/analysis.cpp,
+       source/encoder/entropy.cpp, source/encoder/entropy.h,
+       source/encoder/sao.cpp:
+       nits
+       [03974d78f241]
+
+2014-11-12  Steve Borho  <steve@borho.org>
+
+       * source/encoder/rdcost.h:
+       rdcost: lower the psy-rd scale factor for I slices to 96/256
+
+       Based on Santhoshini's testing, this is better at preventing
+       artifacts
+       [18aefbde72ab]
+
+2014-11-11  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/cudata.cpp, source/common/cudata.h,
+       source/encoder/frameencoder.cpp, source/encoder/frameencoder.h:
+       refine initializeGeoms()
+       [98fb658f3229]
+
+2014-11-11  Steve Borho  <steve@borho.org>
+
+       * source/encoder/analysis.cpp:
+       analysis: fix bidir non-determinism in --pmode --rd 5
+       [306ef9782a30]
+
+       * source/common/param.cpp, source/encoder/analysis.cpp,
+       source/encoder/search.cpp, source/encoder/search.h:
+       Merge
+       [fa2fedd97ff2]
+
+2014-11-10  Steve Borho  <steve@borho.org>
+
+       * source/common/quant.cpp:
+       quant: allow --nr in all slice types evenly
+       [38fa64a5c51c]
+
+2014-11-06  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/common/common.h, source/common/quant.cpp,
+       source/common/quant.h:
+       noiseReduction: apply only for I and P, move NoiseReduction to
+       quant.h
+
+       This doubles the number of quant nr categories; intra blocks now use
+       the lower half.
+       [ed89e58b44e8]
+
+2014-11-10  Steve Borho  <steve@borho.org>
+
+       * doc/reST/cli.rst, source/common/param.cpp:
+       param: raise --nr limit to 2000
+       [27f293dd9eee]
+
+       * doc/reST/presets.rst, source/common/param.cpp:
+       param: remove --b-intra from --tune grain, document rdoq restriction
+       [64ccc616be33]
+
+2014-11-09  Steve Borho  <steve@borho.org>
+
+       * source/encoder/rdcost.h:
+       rdcost: experimental slice-type based psy-rd scale factor
+       [4f3fd7ab8868]
+
+2014-11-08  Steve Borho  <steve@borho.org>
+
+       * source/encoder/analysis.cpp, source/encoder/analysis.h,
+       source/encoder/search.cpp:
+       analysis: RDO based BIDIR decisions
+
+       At RD 0, 1, and 2, this changes 2Nx2N bidir from a SATD decision to
+       an SA8D decision.
+
+       At RD 3 and 4, if the bidir SA8D cost is within 17/16 of the best
+       inter cost, then it makes an RDO decision between bestInter and
+       Bidir (allowing psy-rd to influence the decision, which is the whole
+       point)
+
+       At RD 5 and 6, 2Nx2N BIDIR is yet another RD choice at the same
+       level as 2Nx2N inter and rect and amp. (psy) RDO picks the best mode
+       for each block.
+       [4c6c28cc93d9]
+
+2014-11-11  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/x265.cpp:
+       x265: more meaningful error messages in analysis
+       [838e41fb256b]
+
+       * source/encoder/api.cpp:
+       api: cleanup
+       [3c01e8881946]
+
+       * source/encoder/api.cpp:
+       api: replace analysis data with pre defined constant
+       [b4effa4dd53b]
+
+       * source/x265.cpp:
+       x265: create and initialise recon object if analysis mode is enabled
+       [47b290236ca3]
+
+       * source/common/param.cpp:
+       param: add default value to analysis mode
+       [5c397e744cfd]
+
+2014-11-11  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/encoder/analysis.cpp, source/encoder/api.cpp,
+       source/x265.cpp, source/x265.h:
+       x265: remove redundant variables from intra and inter analysis
+       structure
+       [ad5177c86756]
+
+       * source/encoder/analysis.cpp, source/encoder/search.cpp,
+       source/encoder/search.h, source/x265.h:
+       analysis: Dump best MV statistics and re-use this for analysis load
+       mode
+
+       This patch fixes a bug in inter slices in analysis=load|save mode.
+       Inter data for all partitions is now saved correctly.
+       [c8004323493e]
+
+2014-11-10  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/cudata.cpp, source/common/cudata.h,
+       source/common/deblock.cpp, source/encoder/analysis.cpp,
+       source/encoder/entropy.cpp, source/encoder/frameencoder.cpp,
+       source/encoder/search.cpp:
+       cleanup SIZE_NONE. empty CU has MODE_NONE.
+       [32513a4c3bd4]
+
+2014-11-09  Steve Borho  <steve@borho.org>
+
+       * source/encoder/search.cpp:
+       search: fixup
+       [1e04e178a349]
+
+2014-11-08  Steve Borho  <steve@borho.org>
+
+       * source/encoder/reference.cpp, source/encoder/reference.h,
+       source/encoder/search.cpp:
+       reference: add methods for querying CU/PU pointers
+       [9687a9d1205a]
+
+       * source/encoder/analysis.cpp:
+       analysis: delay initialization of prediction cu in RD 5 and 6
+       [b9147e641ce6]
+
+2014-11-09  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/analysis.cpp:
+       fix typo
+       [3dc9857c59d3]
+
+2014-11-08  Steve Borho  <steve@borho.org>
+
+       * source/encoder/analysis.cpp:
+       analysis: delay initialization of prediction cu until just before
+       use
+
+       This avoids initializing CUs that may never be used because of
+       various early-outs
+       [3f2d68368554]
+
+       * source/encoder/search.cpp, source/encoder/search.h:
+       search: keep AMVP candidates in mode structure
+
+       This fixes some work replication in --pme and will also make
+       handling BIDIR as a seperate prediction easier.
+       [6124c837b3ab]
+
+       * source/encoder/motion.h, source/encoder/search.cpp,
+       source/encoder/slicetype.h:
+       motion: remove trivial set methods; make some members public
+       [53c146f7eb9f]
+
+2014-11-07  Steve Borho  <steve@borho.org>
+
+       * source/encoder/frameencoder.cpp:
+       nr: fix denoise offset memcopy size
+       [0912563c4ac1]
+
+       * source/encoder/entropy.h:
+       entropy: pass context model (state) to bitsCodeBin as uint32_t
+
+       Should be slightly more efficient
+       [a67b848d6c04]
+
+       * source/encoder/entropy.cpp:
+       entropy: nit
+       [b55799a2f5ad]
+
+       * source/encoder/entropy.cpp:
+       entropy: ensure X265_CHECK() has braces
+       [0fd8e0c5272a]
+
+       * source/encoder/entropy.cpp, source/encoder/entropy.h:
+       entropy: inline methods which mapped to encodeBin() calls
+       [640d2936e699]
+
+       * source/encoder/entropy.cpp, source/encoder/entropy.h:
+       entropy: inline bit counting functions
+       [ca7873cab172]
+
+       * source/encoder/entropy.cpp:
+       entropy: use bitsCodeBin in intra mode bit estimate functions
+       [84fc74874406]
+
+       * source/encoder/entropy.cpp, source/encoder/entropy.h:
+       entropy: rename encodeBinContext to bitsCodeBin, make const
+
+       The function is not modifying the context, so there is no need to
+       pass as a reference, and the function can be const. Also, group the
+       bit counting RDO functions together
+       [a1ee9422183b]
+
+       * source/encoder/entropy.cpp:
+       entropy: white-space nits
+       [429742055057]
+
+2014-11-07  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/search.cpp:
+       fix bug in 522baf03fbbd
+       [f2130a4dc876]
+
+2014-11-07  Deepthi Nandakumar  <deepthi@multicorewareinc.com>
+
+       * source/encoder/search.cpp:
+       search: fix warnings
+       [7338b1f1f43d]
+
+2014-11-07  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/encoder/analysis.cpp:
+       fix typo
+       [4f034e3adef8]
+
+2014-11-05  Ashok Kumar Mishra  <ashok@multicorewareinc.com>
+
+       * source/encoder/entropy.cpp, source/encoder/entropy.h,
+       source/encoder/search.cpp, source/encoder/search.h:
+       [REVIEW PATCH/OUTPUT CHANGED]search: removed multiple encode
+       Coefficients from estimateResidualQT()
+
+       Tried to remove multiple encode coefficients from
+       estimateResidualQT() function. Coefficients are encoded in three
+       stages: Once for calculation of distortion and twice for split and
+       unsplit block cost calculation. I have given comments where I have
+       changed the code.
+       [eb5a9eb03dd6]
+
+       * source/encoder/search.cpp, source/encoder/search.h:
+       search: made a function for null cost calculation in
+       xEstimateResidualQT()
+       [522baf03fbbd]
+
+       * source/encoder/search.cpp, source/encoder/search.h:
+       search: made separate functions for encoding cbfs in
+       xEstimateResidualQT()
+       [0b7c709335b2]
+
+2014-11-07  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/cudata.cpp, source/common/cudata.h:
+       cudata: remove default argument
+       [bc4f3dab51db]
+
+2014-11-06  Steve Borho  <steve@borho.org>
+
+       * doc/reST/presets.rst:
+       doc: fix sub-title depth
+
+       Single dash was already used by a higher section
+       [0ebd0b00bf9b]
+
+       * doc/reST/cli.rst, doc/reST/presets.rst, source/common/param.cpp:
+       param: add --tune grain
+       [ec5588025568]
+
+       * source/encoder/search.cpp:
+       search: ugly bias hack for bidir with psy-rd
+       [e33e09549c0c]
+
+       * doc/reST/cli.rst:
+       docs: document RC params, at least minimally
+       [beac946dac85]
+
+       * source/x265.h:
+       api: cleanup comments
+       [8ceaab303bfa]
+
+       * source/x265.cpp:
+       cli: cleanup CLI help, add 'verbose' tier
+
+       Remove a lot of uncommon features from the initial help output,
+       require
+       --log-level debug --help to see it all
+       [f599a4df57ac]
+
+       * source/common/param.cpp, source/x265.cpp:
+       api: expose rate control params via x265_param_parse() and CLI
+
+       Adds range checks for qCompress, which has documented limits. The
+       others have very minimal explanations; so I'm not adding them to the
+       CLI help. Users should not touch them unless they know what they are
+       doing.
+
+       Note this commit doesn't bump X265_BUILD since no new params were
+       added.
+       [b37cda5d3092]
+
+2014-11-05  Steve Borho  <steve@borho.org>
+
+       * source/common/deblock.cpp:
+       deblock: fix type conversion warnings
+       [4a3997fd4fc1]
+
+2014-11-05  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/deblock.cpp, source/common/deblock.h,
+       source/common/quant.cpp, source/common/slice.h,
+       source/encoder/encoder.cpp, source/encoder/entropy.cpp,
+       source/encoder/framefilter.cpp, source/encoder/rdcost.h,
+       source/encoder/sao.cpp:
+       refine deblocking filter
+       [65e14d5a5728]
+
+2014-11-04  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/x265.cpp:
+       cli: bug fix for validatefanout param analysis-mode=save and load
+       [2a8f3d5820a6]
+
+2014-11-04  gopi jayaraman  <gopi@multicorewareinc.com>
+
+       * source/encoder/encoder.cpp:
+       encoder: use 6 frameNumThreads for cpucount 32 and above
+       [0dcc6a1d8f02]
+
+2014-11-04  Steve Borho  <steve@borho.org>
+
+       * source/x265.h:
+       api: add void to functions that take no parameters for -Wstrict-
+       prototypes
+       [0d44fcb269a6]
+
+       * source/common/deblock.cpp, source/common/frame.cpp,
+       source/common/frame.h, source/common/framedata.h,
+       source/common/predict.cpp, source/encoder/analysis.cpp,
+       source/encoder/dpb.cpp, source/encoder/encoder.cpp,
+       source/encoder/frameencoder.cpp, source/encoder/framefilter.cpp,
+       source/encoder/ratecontrol.cpp, source/encoder/sao.cpp,
+       source/encoder/search.cpp, source/encoder/slicetype.cpp,
+       source/encoder/weightPrediction.cpp:
+       frame: rename m_reconPicYuv -> m_reconPic, m_origPicYuv -> m_fencPic
+
+       the fooPicYuv names were potentially confusing, preferred names:
+       PicYuv* fooPic; Yuv* fooYuv;
+       [67bf055c13d5]
+
+       * source/encoder/motion.cpp, source/encoder/motion.h:
+       motion: remove unused sa8d pointer and bufSA8D method
+       [59a08101dfc6]
+
+2014-11-04  Gopu Govindaswamy  <gopu@multicorewareinc.com>
+
+       * source/common/cudata.h, source/encoder/analysis.cpp,
+       source/encoder/analysis.h, source/encoder/api.cpp,
+       source/encoder/encoder.cpp, source/encoder/search.cpp,
+       source/encoder/search.h, source/x265.cpp, source/x265.h:
+       search: dump and share the best motion statistics for inter(P&B)
+       slices
+       [d5f6133b99d4]
+
+2014-11-03  Steve Borho  <steve@borho.org>
+
+       * doc/reST/cli.rst:
+       docs: fix reST parsing issues
+       [a8ec469d7fb1]
+
+2014-11-03  Min Chen  <chenm003@163.com>
+
+       * source/common/primitives.h, source/common/x86/pixel-util.h,
+       source/common/x86/pixel-util8.asm:
+       cleanup: remove unused asm calcrecon
+       [5637b495e2e1]
+
+       * source/common/x86/ipfilter8.asm:
+       asm: fix typo error in interp_8tap_vert_pp_4x4_avx2
+       [ee88b63aced0]
+
+2014-11-03  Satoshi Nakagawa  <nakagawa424@oki.com>
+
+       * source/common/cudata.cpp, source/common/cudata.h,
+       source/common/quant.cpp, source/encoder/analysis.cpp,
+       source/encoder/entropy.cpp, source/encoder/frameencoder.cpp,
+       source/encoder/search.cpp:
+       cleanup CUData::m_skipFlag
+       [2e60f3b81981]
+
+2014-10-31  Steve Borho  <steve@borho.org>
+
+       * source/encoder/encoder.cpp:
+       encoder: make it clear that --fast-cbf is innefective at lower rd
+       levels
+
+       This begs the question of whether the feature should exist, or
+       whether it should be added to the lower RD levels
+       [eebb372eec89]
+
+       * source/common/param.cpp:
+       param: show options using their CLI / param_parse names
+       [c32a733a819b]
+
+2014-10-30  Steve Borho  <steve@borho.org>
+
+       * .hgtags:
+       remove dead non-release tags
+
+       anyone interested in archeology can still find them; there's no
+       sense to keep them on the tip since we stopped tracking last known
+       good more than a year ago
+       [75cb2ab1ecec]
+
  2014-10-31  Steve Borho  <steve@borho.org>
  
+       * source/encoder/encoder.cpp:
+       Merge with stable
+       [ae8a661acdc4]
+
+       * .hgtags:
+       Added tag 1.4 for changeset 5e604833c5aa
+       [d2db9c1ab44b] <stable>
+
         * source/encoder/encoder.cpp:
         encoder: emit an Active Parameter Sets SEI in stream headers if
         interlaced