From: Doug McMahon <mc631man@gmail.com>
Date: Sun, 4 Jan 2015 16:38:26 +0000 (-0500)
Subject: Imported Debian version 1.0~trusty
X-Git-Tag: debian/1.0_trusty^0
X-Git-Url: https://git.piment-noir.org/?a=commitdiff_plain;h=HEAD;p=deb_vid.stab.git

Imported Debian version 1.0~trusty
---

80f575fcbc4aed1b4c20bf5be5e5fc910d4d8dea
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fd50e4a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+*.a
+*.o
+*.pc
+*.so
+*.so.*
+*~
+CMakeCache.txt
+CMakeFiles
+Makefile
+cmake_install.cmake
+install_manifest.txt
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..e9a2af4
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,76 @@
+cmake_minimum_required (VERSION 2.6)
+project (vid.stab)
+
+SET(CMAKE_BUILTTYPE None)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/CMakeModules/")
+
+include (FindSSE)
+
+set(MAJOR_VERSION 1)
+set(MINOR_VERSION 0)
+set(PATCH_VERSION 0)
+set(VIDSTAB_VERSION ${MAJOR_VERSION}.${MINOR_VERSION}${PATCH_VERSION})
+
+option(BUILD_SHARED_LIBS "build shared libraries instead of static libraries"
+       ON)
+
+add_definitions( -Wall -O3 -g -Wno-pointer-sign -fPIC -std=gnu99)
+# add_definitions(  -Wall -O0 -g -Wno-pointer-sign )
+
+### ORC is not used in any active code at the moment  ###
+# I tried it with 0.4.14
+#  0.4.10 did not work (not all opcode implemented)
+# find_package(Orc)
+if(ORC_FOUND)
+add_definitions( -DUSE_ORC ${ORC_DEFINITIONS} )
+include_directories( ${ORC_INCLUDE_DIRS} )
+else()
+add_definitions( -DDISABLE_ORC )
+endif()
+
+# here we should check for SSE2
+# our  -DUSE_SSE2_ASM code does not work with fpic
+if(SSE2_FOUND)
+add_definitions( -DUSE_SSE2 -msse2 -ffast-math )
+endif()
+
+set(SOURCES src/frameinfo.c src/transformtype.c src/libvidstab.c
+  src/transform.c src/transformfixedpoint.c src/motiondetect.c
+  src/motiondetect_opt.c src/serialize.c src/localmotion2transform.c
+  src/boxblur.c src/vsvector.c src/orc/motiondetectorc.c)
+
+set(HEADERS src/frameinfo.h src/transformtype.h src/libvidstab.h
+  src/transform.h src/motiondetect.h src/serialize.h
+  src/localmotion2transform.h src/boxblur.h src/vsvector.h )
+
+
+# Create the vidstab library
+add_library (vidstab ${SOURCES})
+
+#set version of lib
+set_target_properties(vidstab PROPERTIES SOVERSION ${MAJOR_VERSION}.${MINOR_VERSION})
+
+
+target_link_libraries(vidstab m)
+if(ORC_FOUND)
+target_link_libraries(vidstab ${ORC_LIBRARIES})
+endif()
+if(USE_OMP)
+target_link_libraries(vidstab gomp)
+endif()
+
+
+#if(!NOHEADERS)
+FILE(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}/src/*.h")
+INSTALL(FILES ${HEADERS} DESTINATION include/vid.stab)
+#endif()
+
+INSTALL(TARGETS vidstab
+  RUNTIME DESTINATION bin
+  LIBRARY DESTINATION lib${LIB_SUFFIX}
+  ARCHIVE DESTINATION lib${LIB_SUFFIX}
+)
+
+include(create_pkgconfig_file)
+create_pkgconfig_file(vidstab "Vid.Stab, a library for stabilizing video clips")
diff --git a/CMakeModules/FindOrc.cmake b/CMakeModules/FindOrc.cmake
new file mode 100644
index 0000000..117fd34
--- /dev/null
+++ b/CMakeModules/FindOrc.cmake
@@ -0,0 +1,29 @@
+# find ORC
+# - Try to find LibOrc-0.4
+# Once done this will define
+#  ORC_FOUND - System has LibOrc
+#  ORC_INCLUDE_DIRS - The LibOrc include directories
+#  ORC_LIBRARIES - The libraries needed to use LibOrc
+#  ORC_DEFINITIONS - Compiler switches required for using LibOrc
+
+find_package(PkgConfig) 
+pkg_check_modules(PC_ORC orc-0.4)
+set(ORC_DEFINITIONS ${PC_ORC_CFLAGS_OTHER})
+
+find_path(ORC_INCLUDE_DIR orc/orc.h
+          HINTS ${PC_ORC_INCLUDEDIR} ${PC_ORC_INCLUDE_DIRS}
+          PATH_SUFFIXES orc)
+
+find_library(ORC_LIBRARY NAMES orc-0.4
+             HINTS ${PC_ORC_LIBDIR} ${PC_ORC_LIBRARY_DIRS} )
+
+set(ORC_LIBRARIES ${ORC_LIBRARY} )
+set(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR} )
+include(FindPackageHandleStandardArgs)
+# handle the QUIETLY and REQUIRED arguments and set ORC_FOUND to TRUE
+# if all listed variables are TRUE
+find_package_handle_standard_args(LibOrc  DEFAULT_MSG
+                                  ORC_LIBRARY ORC_INCLUDE_DIR)
+
+mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY )
+# End find ORC
diff --git a/CMakeModules/FindSSE.cmake b/CMakeModules/FindSSE.cmake
new file mode 100644
index 0000000..6ece876
--- /dev/null
+++ b/CMakeModules/FindSSE.cmake
@@ -0,0 +1,104 @@
+# Check if SSE instructions are available on the machine where 
+# the project is compiled.
+
+IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+   EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
+
+   STRING(REGEX REPLACE "^.*(sse2).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "sse2" "${SSE_THERE}" SSE2_TRUE)
+   IF (SSE2_TRUE)
+      set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+   ELSE (SSE2_TRUE)
+      set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
+   ENDIF (SSE2_TRUE)
+
+   # /proc/cpuinfo apparently omits sse3 :(
+   STRING(REGEX REPLACE "^.*[^s](sse3).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "sse3" "${SSE_THERE}" SSE3_TRUE)
+   IF (NOT SSE3_TRUE)
+      STRING(REGEX REPLACE "^.*(T2300).*$" "\\1" SSE_THERE ${CPUINFO})
+      STRING(COMPARE EQUAL "T2300" "${SSE_THERE}" SSE3_TRUE)
+   ENDIF (NOT SSE3_TRUE)
+
+   STRING(REGEX REPLACE "^.*(ssse3).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "ssse3" "${SSE_THERE}" SSSE3_TRUE)
+   IF (SSE3_TRUE OR SSSE3_TRUE)
+      set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
+   ELSE (SSE3_TRUE OR SSSE3_TRUE)
+      set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+   ENDIF (SSE3_TRUE OR SSSE3_TRUE)
+   IF (SSSE3_TRUE)
+      set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
+   ELSE (SSSE3_TRUE)
+      set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+   ENDIF (SSSE3_TRUE)
+
+   STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
+   IF (SSE41_TRUE)
+      set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
+   ELSE (SSE41_TRUE)
+      set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+   ENDIF (SSE41_TRUE)
+ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
+   EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
+      CPUINFO)
+
+   STRING(REGEX REPLACE "^.*[^S](SSE2).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "SSE2" "${SSE_THERE}" SSE2_TRUE)
+   IF (SSE2_TRUE)
+      set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
+   ELSE (SSE2_TRUE)
+      set(SSE2_FOUND false CACHE BOOL "SSE2 available on host")
+   ENDIF (SSE2_TRUE)
+
+   STRING(REGEX REPLACE "^.*[^S](SSE3).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "SSE3" "${SSE_THERE}" SSE3_TRUE)
+   IF (SSE3_TRUE)
+      set(SSE3_FOUND true CACHE BOOL "SSE3 available on host")
+   ELSE (SSE3_TRUE)
+      set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
+   ENDIF (SSE3_TRUE)
+
+   STRING(REGEX REPLACE "^.*(SSSE3).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "SSSE3" "${SSE_THERE}" SSSE3_TRUE)
+   IF (SSSE3_TRUE)
+      set(SSSE3_FOUND true CACHE BOOL "SSSE3 available on host")
+   ELSE (SSSE3_TRUE)
+      set(SSSE3_FOUND false CACHE BOOL "SSSE3 available on host")
+   ENDIF (SSSE3_TRUE)
+
+   STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO})
+   STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
+   IF (SSE41_TRUE)
+      set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
+   ELSE (SSE41_TRUE)
+      set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+   ENDIF (SSE41_TRUE)
+ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
+   # TODO
+   set(SSE2_FOUND   true  CACHE BOOL "SSE2 available on host")
+   set(SSE3_FOUND   false CACHE BOOL "SSE3 available on host")
+   set(SSSE3_FOUND  false CACHE BOOL "SSSE3 available on host")
+   set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
+   set(SSE2_FOUND   true  CACHE BOOL "SSE2 available on host")
+   set(SSE3_FOUND   false CACHE BOOL "SSE3 available on host")
+   set(SSSE3_FOUND  false CACHE BOOL "SSSE3 available on host")
+   set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
+ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+
+if(NOT SSE2_FOUND)
+      MESSAGE(STATUS "Could not find hardware support for SSE2 on this machine.")
+endif(NOT SSE2_FOUND)
+if(NOT SSE3_FOUND)
+      MESSAGE(STATUS "Could not find hardware support for SSE3 on this machine.")
+endif(NOT SSE3_FOUND)
+if(NOT SSSE3_FOUND)
+      MESSAGE(STATUS "Could not find hardware support for SSSE3 on this machine.")
+endif(NOT SSSE3_FOUND)
+if(NOT SSE4_1_FOUND)
+      MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.")
+endif(NOT SSE4_1_FOUND)
+
+mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND)
diff --git a/CMakeModules/create_pkgconfig_file.cmake b/CMakeModules/create_pkgconfig_file.cmake
new file mode 100644
index 0000000..da31712
--- /dev/null
+++ b/CMakeModules/create_pkgconfig_file.cmake
@@ -0,0 +1,25 @@
+#
+# Write a pkg-config pc file for given "name" with "decription"
+# Arguments:
+#   name: a library name (without "lib" prefix and "so" suffixes
+#   desc: a desription string
+#
+macro (create_pkgconfig_file name desc)
+    set(_pkgfname "${CMAKE_CURRENT_BINARY_DIR}/${name}.pc")
+    message(STATUS "${name}: writing pkgconfig file ${_pkgfname}")
+
+    file(WRITE "${_pkgfname}" "# file generated by vid.stab cmake build
+prefix=${CMAKE_INSTALL_PREFIX}
+libdir=\${prefix}/lib${LIB_SUFFIX}
+includedir=\${prefix}/include
+
+Name: ${name}
+Description: ${desc}
+Version: ${VIDSTAB_VERSION}
+Libs: -L\${libdir} -l${name}
+Cflags: -I\${includedir}
+
+")
+
+    install(FILES ${_pkgfname} DESTINATION lib${LIB_SUFFIX}/pkgconfig)
+endmacro()
\ No newline at end of file
diff --git a/Changelog b/Changelog
new file mode 100644
index 0000000..19cfbea
--- /dev/null
+++ b/Changelog
@@ -0,0 +1,114 @@
+1.0 = 0.98 (just because of API changes the version number was bumped
+0.98...
+	small measurement fields added that only scan around mean
+	diagnostics in show=1 and show=2 use lines now.
+	fix zoom detection/treatment
+	zoomspeed param for optzoom
+	gaussian filter for cam path optimization
+0.97
+	optzoom = 2: adaptive zooming added
+	border bluring for interpolation>=bilinear
+	spurious black pixels bug fixed
+0.96
+	libavfilter plugins work
+	flexible support of pixel formats, ala ffmpeg
+0.95
+	virtual tripod mode
+
+0.94
+	local motions are now stored in the file (file format changed,
+	 old one still supported)
+	BUGFIX for zero-transforms
+
+0.93
+	single pass version filter_deshake
+	made stabilize part print the transforms to the file right away
+
+0.92
+	boxblur added and unsharp filter removed for motiondetection
+
+0.91
+	spiral search for matches in compareSubImg
+
+0.90
+	complete restructuring into own library
+	Fixed point arithmetic used for transform code
+	motiondetection optimized with ORC: speedup by factor 4
+	ORC 0.4.14 is now required
+	added tests for speed and functional tests
+	removed biquadratic interpolation routine
+
+0.80
+	keep borders with stabilized last frame (as it should be).
+	Pointed out by Guido Torelli.
+
+0.79
+	speed optimizations using SSE by Alexey Osipov
+	search tree cut, spiral search and sse:
+	 together speedup factor ~8 of stabilize run
+	stepsize is increased stepwise
+	keep border at transform plugin improved
+
+
+0.77
+	interpolation routines improved a lot, thanks to hints on ffmpeg list
+	added bicubic interpolation that uses 4x4 pixel (useful for large zoom values)
+	bilinear interpolation is now the default
+	transform plugin uses last transform for the remaining frames
+	 -> this enables to use the transform plugin for constant transformations
+
+0.76
+	BUGFIX in calcFieldTransYUV caused SEGFAULT
+
+0.75
+	two meta parameter introduces: shakiness and accuracy and
+	removal of fieldnum, fielsize, maxshift, maxfieldnum
+	field placement changed: Now they fill the frame perfectly
+	added bluring such that stepsize can be much larger: much faster now!
+        linear interpolation is now the default
+	uncertain angles are set to 0 (more robustness)
+
+0.70    selects a maximal number of fields according to their contrast
+	nicely distributed over the frame
+	Todo: remove fieldnum, allowmax (now false), and compressed parameter
+	Todo: add accuracy parameter instead of maxfields
+        Bugfix of contrast routine (error in the calculation of maxi)
+
+0.62
+	different interpolation functions. No interpolation is default and
+	 seems fine in most applications.
+	field placement changed. Now they fill the frame better.
+	"compress" option moves them more to the center (vertically)
+	fields and their areas and transforms can be drawn on the frame.
+
+0.6
+	new field placement allows for any number of fields
+	 and respects aspect ratio
+	fields with low contrast are ignored
+	non-symmetric field setups are possible
+
+0.5
+	global and per-frame zooming supported
+	optimal zoom value is calculated to avoid moving border	effect
+	add filter unsharp to sharpen the frames
+	write parameters to transforms file
+
+0.44
+	bugfixing
+	finished RGB #support
+	transforms file format supports comments
+
+0.42
+	input filename and tc_list stuff used
+0.41
+	code style from transcode used
+0.4
+	moved to CVS version we use a cleaned median to select the right
+	transform from the fields
+0.3
+	Support RGB (untestet)
+	maxshift default 50
+	allowmax added
+	scan only every second shift and then make a second small
+          scan around the best match
+	removed black rectangle around transformed frame
diff --git a/Install b/Install
new file mode 100644
index 0000000..f0dc674
--- /dev/null
+++ b/Install
@@ -0,0 +1,29 @@
+***  LIBRARY ***
+To compile the library do:
+
+cmake .
+make
+sudo make install
+
+To customize the installation prefix use
+cmake -DCMAKE_INSTALL_PREFIX:PATH=/usr/local
+
+
+*** TRANSCODE plugins ***
+
+You need to have the transcode sources installed. The best
+choice it to download the same version you have in your distribution.
+Youi don't need to compile transcode itself.
+Download from http://tcforge.berlios.de/ or
+ http://developer.berlios.de/project/showfiles.php?group_id=10094.
+Then you need to adapt the transcode/CMakeLists.txt and change
+ the variable
+> set(TRANSCODE_ROOT path/to/transcode)
+
+cd transcode/cmake
+make
+. install.sh
+
+*** FFMPEG ***
+
+You need to configure ffmpeg with --enable-libvidstab
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a09e1dc
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,16 @@
+In this project is open source in the sense of the GPL.
+
+ *   This program is free software; you can redistribute it and/or modify  *
+ *   it under the terms of the GNU General Public License as published by  *
+ *   the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                   *
+ *                                                                         *
+ *   You should have received a copy of the GNU General Public License     *
+ *   along with this program; if not, write to the                         *
+ *   Free Software Foundation, Inc.,                                       *
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
+ *                                                                         *
+ *   This program is distributed in the hope that it will be useful,       *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
+ *   GNU General Public License for more details.                          *
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..07c0d95
--- /dev/null
+++ b/README.md
@@ -0,0 +1,216 @@
+#VidStab
+
+Vidstab is a video stabilization library which can be plugged-in with Ffmpeg and Transcode.
+
+**Why is it needed**
+
+A video acquired using a hand-held camera or a camera mounted on a vehicle, typically suffers from undesirable shakes and jitters. Activities such as surfing, skiing, riding and walking while shooting videos are especially prone to erratic camera shakes. Vidstab targets these video contents to help create smoother and stable videos.
+
+**Some of the features include:**
+
+ * Fast detection of subsequent transformations e.g. translation and rotations up to a given extent.
+ * Low pass filtered smoothing with adjustable horizon.
+ * Detection algorithms:
+  * Smart and fast multi measurement fields algorithm with contrast selection.
+  * Brute force algorithm only for translations.
+ * Clipping options: keep blank (black) or keep from previous frames.
+ * Optional drawing of measurement fields and detected transformations for visual analysis.
+ * Zooming possible to get rid of jiggling borders (automatic mode).
+ * Resulting images are interpolated (different algorithms).
+ * Sharpening of the stabilized movie to compensate for interpolation effects due to rotation/zooming (only with Transcode).
+ * Single pass filter for streaming applications(only with Transcode).
+ * Virtual-tripod-mode to get a tripod experience.
+
+**NOTE:** This readme focuses mainly on using vidstab with Ffmpeg. See 
+[here](http://public.hronopik.de/vid.stab) for information regarding installation, usage and examples for using vidstab with Transcode. Or contact me at georg dot martius @ web dot de
+  
+##System Requirements
+ * A Linux-based system
+ * ffmpeg source code
+ * Cmake
+  
+##Installation Instructions
+
+For using vidstab library with ffmpeg, ffmpeg must to be configured using `--enable-libvidstab ` option.
+
+###Default Build and Installation:
+#####Installing vidstab library:
+    
+```shell    
+cd path/to/vid.stab/dir/
+cmake .
+make
+sudo make install
+```
+
+#####Installing ffmpeg:   
+   
+```shell    
+cd path/to/ffmpeg/dir/
+./configure --enable-gpl --enable-libvidstab <other configure options>
+make
+sudo make install
+```
+
+###Alternatively one can install vidstab into a custom directory this way:
+#####Installing vidstab library:
+
+```shell
+cd path/to/vid.stab/dir/
+cmake -DCMAKE_INSTALL_PREFIX:PATH=path/to/install_dir/
+make
+sudo make install
+```
+
+#####Installing ffmpeg:
+
+```shell
+cd path/to/ffmpeg/dir/
+PKG_CONFIG_PATH="path/to/install_dir/lib/pkgconfig" \
+./configure --enable-gpl --enable-libvidstab <other optionalconfigure options>
+make
+sudo make install
+```      
+      
+Before running ffmpeg for the first time, make sure to export `LD_LIBRARY_PATH` to point to vidstab library, e.g.,
+    
+```shell   
+export LD_LIBRARY_PATH=path/to/install_dir/lib:$LD_LIBRARY_PATH
+```    
+
+##Usage instructions
+
+**Currently with ffmpeg, vidstab library must run in two-pass mode.** The first pass employs the **vidstabdetect** filter and the second pass uses the **vidstabtransform** filter. 
+
+*Single pass filter with vidstab library is only available with Transcode. The 
+[deshake](http://www.ffmpeg.org/ffmpeg-filters.html#deshake) filter of ffmpeg can be used for a single-pass encoding, though using the vidstab two-pass filters will give superior results.*
+
+The vidstabdetect filter (in first pass) will generate a file with relative-translation and rotation-transform information about subsequent frames. This information will then be read by vidstabtransform filter (in second pass) to compensate for the jerky motions and produce a stable video output.
+
+Make sure that you use [unsharp](http://www.ffmpeg.org/ffmpeg-filters.html#unsharp-1) filter provided by ffmpeg for best results (only in second pass).
+
+*See [the list of ffmpeg filters](http://www.ffmpeg.org/ffmpeg-filters.html) to know more about vidstabdetect, vidstabtransform and all other filters available with ffmpeg.*
+
+###Available options with vidstab filters:
+
+#####First pass (vidstabdetect filter):
+
+<dl>
+  <dt><b>result</b></dt>
+  <dd>Set the path to the file used to write the transforms information. Default value is <b>transforms.trf</b>.</dd>
+  <dt><b>shakiness</b></dt>
+  <dd>Set the shakiness of input video or quickness of camera. It accepts an integer in the range 1-10, a value of 1 means little shakiness, a value of 10 means strong shakiness. Default value is 5.</dd>
+  <dt><b>accuracy</b></dt>
+  <dd>Set the accuracy of the detection process. It must be a value in the range 1-15. A value of 1 means low accuracy, a value of 15 means high accuracy. Default value is 15.</dd>
+  <dt><b>stepsize</b></dt>
+  <dd>Set stepsize of the search process. The region around minimum is scanned with 1 pixel resolution. Default value is 6.</dd>
+  <dt><b>mincontrast</b></dt>
+  <dd>Set minimum contrast. Any measurement field having contrast below this value is discarded. Must be a floating point value in the range 0-1. Default value is 0.3.</dd>
+  <dt><b>tripod</b></dt>
+  <dd>  Set reference frame number for tripod mode.  If enabled, the motion of the frames is compared to a reference frame in the filtered stream, identified by the specified number. The intention is to compensate all movements in a more-or-less static scene and keep the camera view absolutely still. If set to 0, it is disabled. The frames are counted starting from 1.
+  <br>NOTE: If this mode is used in first pass then it should also be used in second pass.</dd>
+  <dt><b>show</b></dt>
+  <dd>Show fields and transforms in the resulting frames for visual analysis. It accepts an integer in the range 0-2. Default value is 0, which disables any visualization.</dd>
+</dl>
+
+  
+
+#####Examples:
+  Use default values:
+```shell
+ffmpeg -i input.mp4 -vf vidstabdetect -f null -
+```  
+  
+  *` -f null - ` makes sure that no output is produced as this is just the first pass. This in-turn results in faster speed.*
+  
+  Analyzing strongly shaky video and putting the results in file `mytransforms.trf`:
+```shell
+ffmpeg -i input.mp4 -vf vidstabdetect=shakiness=10:accuracy=15:result="mytransforms.trf" -f null -
+```
+  
+  Visualizing the result of internal transformations in the resulting video:
+```shell
+ffmpeg -i input.mp4 -vf vidstabdetect=show=1 dummy_output.mp4
+```
+
+  Analyzing a video with medium shakiness:
+```shell
+ffmpeg -i input.mp4 -vf vidstabdetect=shakiness=5:show=1 dummy_output.mp4
+```  
+  
+#####Second pass (vidstabtransform filter): 
+<dl>
+  <dt><b>input</b></dt>
+  <dd>Set path to the file used to read the transforms. Default value is <b>transforms.trf</b>.</dd>
+  <dt><b>smoothing</b></dt>
+  <dd>Set the number of frames (value*2 + 1), used for lowpass filtering the camera movements. Default value is 10.<br>For example, a number of 10 means that 21 frames are used (10 in the past and 10 in the future) to smoothen the motion in the video. A larger value leads to a smoother video, but limits the acceleration of the camera (pan/tilt movements). 0 is a special case where a static camera is simulated.</dd>
+  <dt><b>optalgo</b></dt>
+  <dd>Set the camera path optimization algorithm. Accepted values are:
+  <br><i><b>gauss:</b></i> Gaussian kernel low-pass filter on camera motion (default).
+  <br><i><b>avg:</b></i> Averaging on transformations.</dd>
+  <dt><b>maxshift</b></dt>
+  <dd>Set maximal number of pixels to translate frames. Default value is -1, meaning: no limit.</dd>
+  <dt><b>maxangle</b></dt>
+  <dd>Set maximal angle in radians (degree*PI/180) to rotate frames. Default value is -1, meaning: no limit.</dd>
+  <dt><b>crop</b></dt>
+  <dd>  Specify how to deal with empty frame borders that may be shrinked-in due to movement compensation. Available values are:
+  <br><i><b>keep</b></i>: Keep image information from previous frame (default).
+  <br><i><b>black</b></i>: Fill the border-areas black.</dd>
+  <dt><b>invert</b></dt>
+  <dd>Invert transforms if set to 1. Default value is 0.</dd>
+  <dt><b>relative</b></dt>
+  <dd>Consider transforms as relative to previous frame if set to 1, absolute if set to 0. Default value is 0.</dd>
+  <dt><b>zoom</b></dt>
+  <dd>Set percentage to zoom. A positive value will result in a zoom-in effect, a negative value in a zoom-out effect. Default value is 0 (no zoom).</dd>
+  <dt><b>optzoom</b></dt>
+  <dd>Set optimal zooming to avoid blank-borders. Accepted values are:
+  <br><i><b>0</b></i>: Disabled.
+  <br><i><b>1</b></i>: Optimal static zoom value is determined (only very strong movements will lead to visible borders) (default).
+  <br><i><b>2</b></i>: Optimal adaptive zoom value is determined (no borders will be visible), see <b>zoomspeed</b>.
+  <br>Note that the value given at zoom is added to the one calculated here.</dd>
+  <dt><b>zoomspeed</b></dt>
+  <dd>Set percent to zoom maximally each frame (enabled when optzoom is set to 2). Range is from 0 to 5, default value is 0.25.</dd>
+  <dt><b>interpol</b></dt>
+  <dd>Specify type of interpolation. Available values are:
+  <br><i><b>no</b></i>: No interpolation.
+  <br><i><b>linear</b></i>: Linear only horizontal.
+  <br><i><b>bilinear</b></i>: Linear in both directions (default).
+  <br><i><b>bicubic</b></i>: Cubic in both directions (slow speed).
+  <dt><b>tripod</b></dt>
+  <dd>Enables virtual tripod mode if set to 1, which is equivalent to <b>relative=0:smoothing=0</b>. Default value is 0.
+  <br>NOTE: If this mode has been used in first pass then only it should be used in second pass.</dd>
+  <dt><b>debug</b></dt>
+  <dd>Increase log verbosity if set to 1. Also the detected global motions are written to the temporary file  <b>global_motions.trf</b> . Default value is 0. </dd>
+ 
+</dl>  
+  
+#####Examples:
+  Using default values:
+```shell  
+ffmpeg -i input.mp4 -vf vidstabtransform,unsharp=5:5:0.8:3:3:0.4 out_stabilized.mp4
+```
+Note the use of the ffmpeg's unsharp filter which is always recommended.
+
+
+Zooming-in a bit more and load transform data from a given file:
+```shell
+ffmpeg -i input.mp4 -vf vidstabtransform=zoom=5:input="mytransforms.trf" out_stabilized.mp4
+```
+
+Smoothening the video even more:
+```shell
+ffmpeg -i input.mp4 -vf vidstabtransform=smoothing=30:input="mytransforms.trf" out_stabilized.mp4
+```
+##Developement/Contributing
+
+Vidstab is an open source library - pull requests are very welcome. Some things you might like to help us out with:
+
+ * Specific video clips where vidstab is not up-to the mark.
+ * Bugs/fixes.
+ * New features and improvements.
+ * Documentation.
+  
+  
+## License
+
+See [LICENSE](./LICENSE).
diff --git a/Todo b/Todo
new file mode 100644
index 0000000..c72c4f7
--- /dev/null
+++ b/Todo
@@ -0,0 +1,7 @@
+
+Some things that could be done
+
+- scene detection, bad chunk detection: No stabilization of new scene begins or no good match can be found
+
+- make transform stage work with fifo file for transforms, such that streaming is also possible with 2-stage version - does not really work because if synchronization need other way of IPC
+
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..f4beb12
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,49 @@
+vid.stab (2:1.0~trusty) trusty; urgency=medium
+
+  * git 4ec5be1
+
+ -- Doug McMahon <mc631man@gmail.com>  Sun, 04 Jan 2015 11:38:26 -0500
+
+vid.stab (2:0.98b-dmo1) unstable; urgency=medium
+
+  * New bugfix release.
+  * Ugly, but upgrade library soname to 1.0
+
+ -- Christian Marillat <marillat@deb-multimedia.org>  Mon, 24 Mar 2014 08:12:52 +0100
+
+vid.stab (2:0.96~20131215-dmo2) unstable; urgency=medium
+
+  * Back to this release. ABI has been changed in 0.98 without library
+    soname change.
+
+ -- Christian Marillat <marillat@deb-multimedia.org>  Mon, 06 Jan 2014 00:39:43 +0100
+
+vid.stab (1:0.98a-dmo1) unstable; urgency=medium
+
+  * New upstream release.
+
+ -- Christian Marillat <marillat@deb-multimedia.org>  Sun, 05 Jan 2014 09:11:33 +0100
+
+vid.stab (1:0.98-dmo1) unstable; urgency=medium
+
+  * New upstream release.
+
+ -- Christian Marillat <marillat@deb-multimedia.org>  Sat, 04 Jan 2014 15:33:30 +0100
+
+vid.stab (1:0.96~20131215-dmo1) unstable; urgency=medium
+
+  * New git release.
+
+ -- Christian Marillat <marillat@deb-multimedia.org>  Sun, 15 Dec 2013 15:19:58 +0100
+
+vid.stab (0.96~36173857-dmo2) unstable; urgency=low
+
+  * libvidstab-dev should depends on libvidstab0.9
+
+ -- Christian Marillat <marillat@deb-multimedia.org>  Sun, 20 Oct 2013 09:37:38 +0200
+
+vid.stab (0.96~36173857-dmo1) unstable; urgency=low
+
+  * Initial release.
+
+ -- Christian Marillat <marillat@deb-multimedia.org>  Sun, 20 Oct 2013 08:54:33 +0200
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..ec63514
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..15503c5
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,21 @@
+Source: vid.stab
+Section: libs
+Priority: extra
+Maintainer: Doug McMahon <mc631man@gmail.com>
+Homepage: http://public.hronopik.de/vid.stab/
+Vcs-Git: https://github.com/georgmartius/vid.stab.git
+Standards-Version: 3.9.5
+Build-Depends: debhelper (>= 9), cmake
+
+Package: libvidstab1.0
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: Video stabilization library.
+ This package contains runtime files.
+
+Package: libvidstab-dev
+Architecture: any
+Section: libdevel
+Depends: libvidstab1.0 (= ${binary:Version}), ${misc:Depends}
+Description: Video stabilization library.
+ This package contains development files.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..035c591
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,41 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: vid.stab
+Source: https://github.com/georgmartius/vid.stab
+
+Files: *
+Copyright: 2010-2013 Georg Martius <georg.martius@web.de>
+License: GPL-2+
+ This package is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ .
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ .
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>
+ .
+ On Debian systems, the complete text of the GNU General
+ Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
+
+Files: debian/*
+Copyright: 2013 Christian Marillat <marillat@deb-multimedia.org>
+License: GPL-2+
+ This package is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ .
+ This package is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ .
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>
+ .
+ On Debian systems, the complete text of the GNU General
+ Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..8aa2eb0
--- /dev/null
+++ b/debian/docs
@@ -0,0 +1,2 @@
+README.md
+Todo
diff --git a/debian/install b/debian/install
new file mode 100644
index 0000000..672c74e
--- /dev/null
+++ b/debian/install
@@ -0,0 +1 @@
+usr/lib/libvidstab.so.*
diff --git a/debian/libvidstab-dev.install b/debian/libvidstab-dev.install
new file mode 100644
index 0000000..255ef8e
--- /dev/null
+++ b/debian/libvidstab-dev.install
@@ -0,0 +1,3 @@
+usr/include
+usr/lib/libvidstab.so
+usr/lib/pkgconfig
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..a8e6c4e
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,10 @@
+#!/usr/bin/make -f
+
+%:
+	dh $@ 
+
+override_dh_install:
+	dh_install --fail-missing
+
+override_dh_builddeb:
+	dh_builddeb -- -Zxz
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..d3827e7
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+1.0
diff --git a/libavfilter/README b/libavfilter/README
new file mode 100644
index 0000000..29ba324
--- /dev/null
+++ b/libavfilter/README
@@ -0,0 +1,6 @@
+These files are now in the ffmpeg source tree (as of 25.04.2013). They will be removed here at some point.
+
+Make sure you configure ffmpeg with
+--enable-libvidstab
+
+
diff --git a/libavfilter/git-workflow.txt b/libavfilter/git-workflow.txt
new file mode 100644
index 0000000..d42a2a9
--- /dev/null
+++ b/libavfilter/git-workflow.txt
@@ -0,0 +1,20 @@
+
+#How to work in the ffmpeg git (or in any other one actually)
+#Create a new branch
+git branch "BRANCHNAME"
+git checkout "BRANCHNAME"
+#do you stuff
+git commit
+#rebase to master such that your changes only against the master branch and can be squashed
+git rebase -i master
+#or if merged in between then
+git log
+# or to find original branch point:
+diff -u <(git rev-list --first-parent BRANCHNAME)              <(git rev-list --first-parent master) |      sed -ne 's/^ //p' | head -1
+
+git rebase -i 39423.....
+
+git format-patch -s 3969b4b861ce8152e3195e8f8c3437abd2..
+
+
+git branch -D bugfix
diff --git a/mktags.sh b/mktags.sh
new file mode 100755
index 0000000..757e886
--- /dev/null
+++ b/mktags.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+etags $(find src/ -name "*.h")
\ No newline at end of file
diff --git a/src/boxblur.c b/src/boxblur.c
new file mode 100644
index 0000000..db376fe
--- /dev/null
+++ b/src/boxblur.c
@@ -0,0 +1,188 @@
+/*
+ *  boxblur.c
+ *
+ *  Copyright (C) Georg Martius - July 2010
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "boxblur.h"
+#include "vidstabdefines.h"
+
+
+void boxblur_hori_C(unsigned char* dest, const unsigned char* src,
+                    int width, int height, int dest_strive, int src_strive, int size);
+void boxblur_vert_C(unsigned char* dest, const unsigned char* src,
+                    int width, int height, int dest_strive, int src_strive, int size);
+
+/*
+  The algorithm:
+  box filter: kernel has only 1's
+  a good blur is obtained for multiple runs of boxblur
+  - 2 runs: tent kernel,  infinity -> gaussian
+  but for our purposes is the tent kernel enough.
+
+  horizontal and vertical 1D boxfilters can be used
+
+  accumulator: acc = acc + new - old, pixel = acc/size
+*/
+
+void boxblurPlanar(VSFrame* dest, const VSFrame* src,
+    VSFrame* buffer, const VSFrameInfo* fi,
+    unsigned int size, BoxBlurColorMode colormode){
+  int localbuffer=0;
+  int size2;
+  if(size<2){
+    if(dest!=src)
+      vsFrameCopy(dest,src,fi);
+    return;
+  }
+  VSFrame buf;
+  if(buffer==0){
+    vsFrameAllocate(&buf,fi);
+    localbuffer=1;
+  }else{
+    buf = *buffer;
+  }
+  // odd and larger than 2 and maximally half of smaller image dimension
+  size  = VS_CLAMP((size/2)*2+1,3,VS_MIN(fi->height/2,fi->width/2));
+  //printf("%i\n",size);
+
+  // luminance
+  boxblur_hori_C(buf.data[0],  src->data[0],
+                 fi->width, fi->height, buf.linesize[0],src->linesize[0], size);
+  boxblur_vert_C(dest->data[0], buf.data[0],
+                 fi->width, fi->height, dest->linesize[0], buf.linesize[0], size);
+
+  size2 = size/2+1;   // odd and larger than 0
+  int plane;
+  switch (colormode){
+  case BoxBlurColor:
+    // color
+    if(size2>1){
+      for(plane=1; plane<fi->planes; plane++){
+        boxblur_hori_C(buf.data[plane], src->data[plane],
+                       fi->width  >> vsGetPlaneWidthSubS(fi,plane),
+                       fi->height >> vsGetPlaneHeightSubS(fi,plane),
+                       buf.linesize[plane], src->linesize[plane], size2);
+        boxblur_vert_C(dest->data[plane], buf.data[plane],
+                       fi->width  >> vsGetPlaneWidthSubS(fi,plane),
+                       fi->height >> vsGetPlaneHeightSubS(fi,plane),
+                       dest->linesize[plane], buf.linesize[plane], size2);
+      }
+    }
+    break;
+  case BoxBlurKeepColor:
+    // copy both color channels
+    for(plane=1; plane<fi->planes; plane++){
+      vsFrameCopyPlane(dest, src, fi, plane);
+    }
+  case BoxBlurNoColor: // do nothing
+  default:
+    break;
+  }
+
+  if(localbuffer)
+    vsFrameFree(&buf);
+}
+
+/* /\* */
+/*   The algorithm: */
+/*   see boxblurPlanar but here we for Packed */
+
+/*   we add the 3 bytes of one pixel as if they where one number */
+/* *\/ */
+/* void boxblurPacked(const unsigned char* src, unsigned char* dest,  */
+/*     unsigned char* buffer, const VSFrameInfo* fi,  */
+/*     unsigned int size){ */
+/*   int localbuffer=0; */
+/*   if(buffer==0){ */
+/*     buffer=(unsigned char*) vs_malloc(fi->framesize); */
+/*     localbuffer=1; */
+/*   } */
+/*   // odd and larger than 2 and maximal half of smaller image dimension  */
+/*   //  (and not larger than 256, because otherwise we can get an overflow) */
+/*   size  = VS_CLAMP((size/2)*2+1,3,VS_MIN(256,VS_MIN(fi->height/2,fi->width/2)));  */
+
+/*   // we need a different version of these functions for Packed */
+/*   boxblur_hori_C(src, buffer, fi->width, fi->height, fi->strive, size);   */
+/*   boxblur_vert_C(buffer, dest, fi->width, fi->height, fi->strive, size); */
+
+/*   if(localbuffer) */
+/*     vs_free(buffer); */
+/* } */
+
+
+void boxblur_hori_C(unsigned char* dest, const unsigned char* src,
+                    int width, int height, int dest_strive, int src_strive, int size){
+
+  int i,j,k;
+  unsigned int acc;
+  const unsigned char *start, *end; // start and end of kernel
+  unsigned char *current;     // current destination pixel
+  int size2 = size/2; // size of one side of the kernel without center
+  // #pragma omp parallel for private(acc),schedule(guided,2) (no speedup)
+  for(j=0; j< height; j++){
+    //  for(j=100; j< 101; j++){
+    start = end = src + j*src_strive;
+    current = dest + j*dest_strive;
+    // initialize accumulator
+    acc= (*start)*(size2+1); // left half of kernel with first pixel
+    for(k=0; k<size2; k++){  // right half of kernel
+      acc+=(*end);
+      end++;
+    }
+    // go through the image
+    for(i=0; i< width; i++){
+      acc = acc + (*end) - (*start);
+      if(i > size2) start++;
+      if(i < width - size2 - 1) end++;
+      (*current) = acc/size;
+      current++;
+    }
+  }
+}
+
+void boxblur_vert_C(unsigned char* dest, const unsigned char* src,
+        int width, int height, int dest_strive, int src_strive, int size){
+
+  int i,j,k;
+  int acc;
+  const unsigned char *start, *end; // start and end of kernel
+  unsigned char *current;     // current destination pixel
+  int size2 = size/2; // size of one side of the kernel without center
+  for(i=0; i< width; i++){
+    start = end = src + i;
+    current = dest + i;
+    // initialize accumulator
+    acc= (*start)*(size2+1); // left half of kernel with first pixel
+    for(k=0; k<size2; k++){  // right half of kernel
+      acc+=(*end);
+      end+=src_strive;
+    }
+    // go through the image
+    for(j=0; j< height; j++){
+      acc = acc - (*start) + (*end);
+      if(j > size2) start+=src_strive;
+      if(j < height - size2 - 1) end+=src_strive;
+      *current = acc/size;
+      current+=dest_strive;
+    }
+  }
+}
diff --git a/src/boxblur.h b/src/boxblur.h
new file mode 100644
index 0000000..3f8fac0
--- /dev/null
+++ b/src/boxblur.h
@@ -0,0 +1,46 @@
+/*
+ *  transformtype.h
+ *
+ *  Copyright (C) Georg Martius - July 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef __BOXBLUR_H
+#define __BOXBLUR_H
+
+#include "frameinfo.h"
+
+/** BoxBlurColor     - blur also color channels,
+    BoxBlurKeepColor - copy original color channels
+    BoxBlurNoColor   - do not touch color channels in dest
+*/
+typedef enum _BoxBlurColorMode { BoxBlurColor, BoxBlurKeepColor, BoxBlurNoColor} BoxBlurColorMode ;
+
+/** performs a boxblur operation on src and stores results in dest.
+ * It uses an accumulator method and separate horizontal and vertical runs
+ * @param buffer may be given for intermediate results.
+ *            If 0 then it is locally malloced
+ * @param size of bluring kernel, (min 3 and it is made odd)
+ * @param onlyLumincance if true color planes stay untouched
+ */
+void boxblurPlanar(VSFrame* dest, const VSFrame* src,
+    VSFrame* buffer, const VSFrameInfo* fi,
+    unsigned int size, BoxBlurColorMode colormode);
+
+#endif
diff --git a/src/frameinfo.c b/src/frameinfo.c
new file mode 100644
index 0000000..36a2c44
--- /dev/null
+++ b/src/frameinfo.c
@@ -0,0 +1,191 @@
+/*
+ *  frameinfo.c
+ *
+ *  Copyright (C) Georg Martius - Feb - 2013
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "frameinfo.h"
+#include "vidstabdefines.h"
+#include <assert.h>
+#include <string.h>
+
+int vsFrameInfoInit(VSFrameInfo* fi, int width, int height, VSPixelFormat pFormat){
+  fi->pFormat=pFormat;
+  fi->width = width;
+  fi->height = height;
+  fi->planes=3;
+  fi->log2ChromaW = 0;
+  fi->log2ChromaH = 0;
+  fi->bytesPerPixel=1;
+  assert(width%2==0 && height%2==0);
+  switch(pFormat){
+   case PF_GRAY8:
+    fi->planes=1;
+    break;
+   case PF_YUV420P:
+    fi->log2ChromaW = 1;
+    fi->log2ChromaH = 1;
+    break;
+   case PF_YUV422P:
+    fi->log2ChromaW = 1;
+    fi->log2ChromaH = 0;
+    break;
+   case PF_YUV444P:
+    break;
+   case PF_YUV410P:
+    fi->log2ChromaW = 2;
+    fi->log2ChromaH = 2;
+    break;
+   case PF_YUV411P:
+    fi->log2ChromaW = 2;
+    fi->log2ChromaH = 0;
+    break;
+   case PF_YUV440P:
+    fi->log2ChromaW = 0;
+    fi->log2ChromaH = 1;
+    break;
+   case PF_YUVA420P:
+    fi->log2ChromaW = 1;
+    fi->log2ChromaH = 1;
+    fi->planes = 4;
+    break;
+   case PF_RGB24:
+   case PF_BGR24:
+    fi->bytesPerPixel=3;
+    fi->planes = 0;
+    break;
+   case PF_RGBA:
+    fi->bytesPerPixel=4;
+    fi->planes = 0;
+    break;
+   default:
+    fi->pFormat=0;
+    return 0;
+  }
+  return 1;
+}
+
+int vsGetPlaneWidthSubS(const VSFrameInfo* fi, int plane){
+  return plane == 1 || plane == 2 ? fi->log2ChromaW : 0;
+}
+
+int vsGetPlaneHeightSubS(const VSFrameInfo* fi, int plane){
+  return  plane == 1 || plane == 2 ? fi->log2ChromaH : 0;
+}
+
+int vsFrameIsNull(const VSFrame* frame) {
+  return frame==0 || frame->data[0]==0;
+}
+
+
+int vsFramesEqual(const VSFrame* frame1,const VSFrame* frame2){
+  return frame1 && frame2 && (frame1==frame2 || frame1->data[0] == frame2->data[0]);
+}
+
+void vsFrameNull(VSFrame* frame){
+  memset(frame->data,0,sizeof(uint8_t*)*4);
+  memset(frame->linesize,0,sizeof(int)*4);
+}
+
+void vsFrameAllocate(VSFrame* frame, const VSFrameInfo* fi){
+  vsFrameNull(frame);
+  if(fi->pFormat<PF_PACKED){
+    int i;
+    assert(fi->planes > 0 && fi->planes <= 4);
+    for (i=0; i< fi->planes; i++){
+      int w = fi->width  >> vsGetPlaneWidthSubS(fi, i);
+      int h = fi->height >> vsGetPlaneHeightSubS(fi, i);
+      frame->data[i] = vs_zalloc(w * h * sizeof(uint8_t));
+      frame->linesize[i] = w;
+      if(frame->data[i]==0)
+        vs_log_error("vid.stab","out of memory: cannot allocated buffer");
+    }
+  }else{
+    assert(fi->planes==1);
+    int w = fi->width;
+    int h = fi->height;
+    frame->data[0] = vs_zalloc(w * h * sizeof(uint8_t)*fi->bytesPerPixel);
+    frame->linesize[0] = w * fi->bytesPerPixel;
+    if(frame->data[0]==0)
+      vs_log_error("vid.stab","out of memory: cannot allocated buffer");
+  }
+}
+
+void vsFrameCopyPlane(VSFrame* dest, const VSFrame* src,
+                    const VSFrameInfo* fi, int plane){
+  assert(src->data[plane]);
+  int h = fi->height >> vsGetPlaneHeightSubS(fi, plane);
+  if(src->linesize[plane] == dest->linesize[plane])
+    memcpy(dest->data[plane], src->data[plane], src->linesize[plane] *  h * sizeof(uint8_t));
+  else {
+    uint8_t* d = dest->data[plane];
+    const uint8_t* s = src->data[plane];
+    int w = fi->width  >> vsGetPlaneWidthSubS(fi, plane);
+    for (; h>0; h--) {
+      memcpy(d,s,sizeof(uint8_t) * w);
+      d += dest->linesize[plane];
+      s += src ->linesize[plane];
+    }
+  }
+}
+
+void vsFrameCopy(VSFrame* dest, const VSFrame* src, const VSFrameInfo* fi){
+  int plane;
+  assert(fi->planes > 0 && fi->planes <= 4);
+  for (plane=0; plane< fi->planes; plane++){
+    vsFrameCopyPlane(dest,src,fi,plane);
+  }
+}
+
+void vsFrameFillFromBuffer(VSFrame* frame, uint8_t* img, const VSFrameInfo* fi){
+  assert(fi->planes > 0 && fi->planes <= 4);
+  vsFrameNull(frame);
+  long int offset = 0;
+  int i;
+  for (i=0; i< fi->planes; i++){
+    int w = fi->width  >> vsGetPlaneWidthSubS(fi, i);
+    int h = fi->height >> vsGetPlaneHeightSubS(fi, i);
+    frame->data[i] = img + offset;
+    frame->linesize[i] = w*fi->bytesPerPixel;
+    offset += h * w*fi->bytesPerPixel;
+  }
+}
+
+void vsFrameFree(VSFrame* frame){
+  int plane;
+  for (plane=0; plane< 4; plane++){
+    if(frame->data[plane]) vs_free(frame->data[plane]);
+    frame->data[plane]=0;
+    frame->linesize[plane]=0;
+  }
+}
+
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/frameinfo.h b/src/frameinfo.h
new file mode 100644
index 0000000..f48e92e
--- /dev/null
+++ b/src/frameinfo.h
@@ -0,0 +1,120 @@
+/*
+ *  frameinfo.h
+ *
+ *  Copyright (C) Georg Martius - June 2007 - 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef FRAMEINFO_H
+#define FRAMEINFO_H
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+/// pixel formats
+typedef enum {PF_NONE = -1,
+              PF_GRAY8,     ///<        Y        ,  8bpp
+              PF_YUV420P,   ///< planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
+              PF_YUV422P,   ///< planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
+              PF_YUV444P,   ///< planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
+              PF_YUV410P,   ///< planar YUV 4:1:0,  9bpp, (1 Cr & Cb sample per 4x4 Y samples)
+              PF_YUV411P,   ///< planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
+              PF_YUV440P,   ///< planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
+              PF_YUVA420P,  ///< planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
+              PF_PACKED,    ///< dummy: packed formats start here
+              PF_RGB24,     ///< packed RGB 8:8:8, 24bpp, RGBRGB...
+              PF_BGR24,     ///< packed RGB 8:8:8, 24bpp, BGRBGR...
+              PF_RGBA,      ///< packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
+              PF_NUMBER     ///< number of pixel formats
+} VSPixelFormat;
+
+/** frame information for deshaking lib
+    This only works for planar image formats
+ */
+typedef struct vsframeinfo {
+  int width, height;
+  int planes;        // number of planes (1 luma, 2,3 chroma, 4 alpha)
+  int log2ChromaW; // subsampling of width in chroma planes
+  int log2ChromaH; // subsampling of height in chroma planes
+  VSPixelFormat pFormat;
+  int bytesPerPixel; // number of bytes per pixel (for packed formats)
+} VSFrameInfo;
+
+/** frame data according to frameinfo
+ */
+typedef struct vsframe {
+  uint8_t* data[4]; // data in planes. For packed data everthing is in plane 0
+  int linesize[4]; // line size of each line in a the planes
+} VSFrame;
+
+// use it to calculate the CHROMA sizes (rounding is correct)
+#define CHROMA_SIZE(width,log2sub)  (-(-(width) >> (log2sub)))
+
+/// initializes the frameinfo for the given format
+int vsFrameInfoInit(VSFrameInfo* fi, int width, int height, VSPixelFormat pFormat);
+
+
+/// returns the subsampling shift amount, horizonatally for the given plane
+int vsGetPlaneWidthSubS(const VSFrameInfo* fi, int plane);
+
+/// returns the subsampling shift amount, vertically for the given plane
+int vsGetPlaneHeightSubS(const VSFrameInfo* fi, int plane);
+
+/// zero initialization
+void vsFrameNull(VSFrame* frame);
+
+/// returns true if frame is null (data[0]==0)
+int vsFrameIsNull(const VSFrame* frame);
+
+/// compares two frames for identity (based in data[0])
+int vsFramesEqual(const VSFrame* frame1,const VSFrame* frame2);
+
+/// allocates memory for a frame
+void vsFrameAllocate(VSFrame* frame, const VSFrameInfo* fi);
+
+
+/// copies the given plane number from src to dest
+void vsFrameCopyPlane(VSFrame* dest, const VSFrame* src,
+                    const VSFrameInfo* fi, int plane);
+
+/// copies src to dest
+void vsFrameCopy(VSFrame* dest, const VSFrame* src, const VSFrameInfo* fi);
+
+/** fills the data pointer so that it corresponds to the img saved in the linear buffer.
+    No copying is performed.
+    Do not call vsFrameFree() on it.
+ */
+void vsFrameFillFromBuffer(VSFrame* frame, uint8_t* img, const VSFrameInfo* fi);
+
+/// frees memory
+void vsFrameFree(VSFrame* frame);
+
+#endif  /* FRAMEINFO_H */
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/libvidstab.c b/src/libvidstab.c
new file mode 100644
index 0000000..6ade7ba
--- /dev/null
+++ b/src/libvidstab.c
@@ -0,0 +1,79 @@
+/*
+ * libvidstab.c
+ *
+ *  Created on: Feb 21, 2011
+ *  Copyright (C) Georg Martius - February 2011
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "libvidstab.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+/**** default values for memory and logging ****/
+
+/// memory allocation with zero initialization
+void* _zalloc(size_t size){
+    return memset(malloc(size),0,size);
+}
+
+/// logging function
+int _vs_log(int type, const char* tag, const char* format, ...){
+    fprintf(stderr,"%s (%s):",
+            type == VS_ERROR_TYPE ? "Error: " :
+            type == VS_WARN_TYPE  ? "Warn:  " :
+            type == VS_INFO_TYPE  ? "Info:  " :
+            type == VS_MSG_TYPE   ? "Msg:   " : "Unknown",
+            tag);
+    va_list ap;
+    va_start (ap, format);
+    vfprintf (stderr, format, ap);
+    va_end (ap);
+    fprintf(stderr,"\n");
+    return 0;
+}
+
+
+vs_malloc_t vs_malloc   = malloc;
+vs_realloc_t vs_realloc = realloc;
+vs_free_t vs_free       = free;
+vs_zalloc_t vs_zalloc   = _zalloc;
+
+vs_strdup_t vs_strdup   = strdup;
+
+vs_log_t vs_log         = _vs_log;
+int VS_ERROR_TYPE = 0;
+int VS_WARN_TYPE  = 1;
+int VS_INFO_TYPE  = 2;
+int VS_MSG_TYPE   = 3;
+
+int VS_ERROR     = -1;
+int VS_OK        = 0;
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */
diff --git a/src/libvidstab.h b/src/libvidstab.h
new file mode 100644
index 0000000..b496120
--- /dev/null
+++ b/src/libvidstab.h
@@ -0,0 +1,48 @@
+/*
+ *  libvidstab.h
+ *
+ *  Created on: Feb 21, 2011
+ *  Copyright (C) Georg Martius - June 2007
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef LIBVIDSTAB_H
+#define LIBVIDSTAB_H
+
+#define LIBVIDSTAB_VERSION "v1.0 (2014-01-04)"
+
+#include "frameinfo.h"
+#include "motiondetect.h"
+#include "transform.h"
+#include "vsvector.h"
+#include "serialize.h"
+#include "localmotion2transform.h"
+
+#endif  /* LIBVIDSTAB_H_ */
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/localmotion2transform.c b/src/localmotion2transform.c
new file mode 100644
index 0000000..a78b86b
--- /dev/null
+++ b/src/localmotion2transform.c
@@ -0,0 +1,342 @@
+/*
+ * localmotion2transform.c
+ *
+ *  Copyright (C) Georg Martius - January 2013
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "localmotion2transform.h"
+#include "transformtype_operations.h"
+#include <assert.h>
+#include <string.h>
+
+/* #include <sys/time.h> */
+/* long timeOfDayinMS() { */
+/*   struct timeval t; */
+/*   gettimeofday(&t, 0); */
+/*   return t.tv_sec*1000 + t.tv_usec/1000; */
+/* } */
+
+int vsLocalmotions2Transforms(VSTransformData* td,
+                              const VSManyLocalMotions* motions,
+                              VSTransformations* trans ){
+  int len = vs_vector_size(motions);
+  assert(trans->len==0 && trans->ts == 0);
+  trans->ts = vs_malloc(sizeof(VSTransform)*len );
+  /* long start= timeOfDayinMS(); */
+  FILE *f=0;
+  if(td->conf.storeTransforms){
+    f = fopen("global_motions.trf","w");
+  }
+
+  if(td->conf.simpleMotionCalculation==0){
+    for(int i=0; i< vs_vector_size(motions); i++) {
+      trans->ts[i]=vsMotionsToTransform(td,VSMLMGet(motions,i), f);
+    }
+  }else{
+    for(int i=0; i< vs_vector_size(motions); i++) {
+      trans->ts[i]=vsSimpleMotionsToTransform(td->fiSrc, td->conf.modName,VSMLMGet(motions,i));
+    }
+  }
+  trans->len=len;
+
+  /* long end = timeOfDayinMS(); */
+  /* vs_log_info(td->conf.modName, "Localmotions2Transform (%i) with %i frames took %i ms\n", */
+  /*             td->conf.simpleMotionCalculation, len, end-start); */
+  if(f) fclose(f);
+  return VS_OK;
+}
+
+VSArray vsTransformToArray(const VSTransform* t){
+  VSArray a = vs_array_new(4);
+  a.dat[0] = t->x;
+  a.dat[1] = t->y;
+  a.dat[2] = t->alpha;
+  a.dat[3] = t->zoom;
+  return a;
+}
+
+VSTransform vsArrayToTransform(VSArray a){
+  return new_transform(a.dat[0],a.dat[1],a.dat[2],a.dat[3],0,0,0);
+}
+
+struct VSGradientDat {
+  VSTransformData* td;
+  const LocalMotions* motions;
+  VSArray missmatches; // if negative then local motion is ignored
+};
+
+double calcTransformQuality(VSArray params, void* dat){
+  struct VSGradientDat* gd= (struct VSGradientDat*) dat;
+  const LocalMotions* motions = gd->motions;
+  int num_motions=vs_vector_size(motions);
+  VSTransform t = vsArrayToTransform(params);
+  double error=0;
+
+  PreparedTransform pt= prepare_transform(&t, &gd->td->fiSrc);
+  int num = 1; // we start with 1 to avoid div by zero
+  for (int i = 0; i < num_motions; i++) {
+    if(gd->missmatches.dat[i]>=0){
+      LocalMotion* m = LMGet(motions,i);
+      double vx,vy;
+      transform_vec_double(&vx, &vy, &pt, (Vec*)&m->f);
+      vx -= m->f.x; vy -= m->f.y;
+      double e   = sqr(vx - m->v.x) +  sqr(vy - m->v.y);
+      gd->missmatches.dat[i]=e;
+      error += e;
+      num++;
+    }
+  }
+  // 1 pixel translation missmatch is roughly (with size 500):
+  // alpha=0.11 (degree), zoom=0.2; The zoom is however often much larger, so less penalty.
+  return error/num + fabs(t.alpha)/5.0 + fabs(t.zoom)/500.0;
+}
+
+double intMean(const int* ds, int len) {
+  double sum=0;
+  for (int i = 0; i < len; i++) sum += ds[i];
+  return sum / len;
+}
+
+// only calcates means transform to initialise gradient descent
+VSTransform meanMotions(VSTransformData* td, const LocalMotions* motions){
+  int len = vs_vector_size(motions);
+  int* xs = localmotions_getx(motions);
+  int* ys = localmotions_gety(motions);
+  VSTransform t = null_transform();
+  if(motions==0 || len==0) {
+    t.extra = 1; // prob. blank frame or too low contrast, ignore later
+    return t;
+  }
+  t.x = intMean(xs,len);
+  t.y = intMean(ys,len);
+  vs_free(xs);
+  vs_free(ys);
+  return t;
+}
+
+/* Disables those fields (mask = -1) whose (miss)quality is high.
+   @param mask: fields masks (<0 means disabled)
+   @param missqualities: measure for each field (larger is worse)
+   @param stddevs: x standard deviations to exclude
+   Both array have to be of the same length.
+   @return number of disabled fields
+*/
+int disableFields(VSArray mask, VSArray missqualities, double stddevs){
+  assert(mask.len == missqualities.len);
+  // first we throw away those fields that match badely (during motion detection)
+  double mu   = mean(missqualities.dat, missqualities.len);
+  double sigma = stddev(missqualities.dat, missqualities.len, mu);
+  double thresh = mu + stddevs * sigma;
+  int cnt=0;
+  for(int i=0; i< mask.len; i++){
+    if(missqualities.dat[i]>thresh){
+      mask.dat[i]=-1.0; // disable field
+      cnt++;
+    }
+  }
+  return cnt;
+}
+
+VSTransform vsMotionsToTransform(VSTransformData* td,
+                                 const LocalMotions* motions,
+                                 FILE* f){
+  VSTransform t = meanMotions(td, motions);
+  if(motions==0 || vs_vector_size(motions)==0){
+    if (f) fprintf(f,"0 0 0 0 0 %i\n# no fields\n", t.extra);
+    return t;
+  }
+  VSArray missmatches = vs_array_new(vs_vector_size(motions));
+  VSArray params = vsTransformToArray(&t);
+  double residual;
+  struct VSGradientDat dat;
+  dat.motions = motions;
+  dat.td      = td;
+  dat.missmatches = missmatches;
+
+  // first we throw away those fields that match badely (during motion detection)
+  VSArray matchQualities = localmotionsGetMatch(motions);
+  int dis1=disableFields(missmatches, matchQualities, 1.5);
+  vs_array_free(matchQualities);
+
+  VSArray result;
+  double ss[] = {0.2, 0.2, 0.00005, 0.1};
+  int k;
+  int dis2=0;
+  for(k=0; k<3; k++){
+    // optimize params to minimize transform quality (12 steps per dimension)
+    result = vsGradientDescent(calcTransformQuality, params, &dat,
+                                        16, vs_array(ss,4), 0.01, &residual);
+    vs_array_free(params);
+    // now we need to ignore the fields that don't fit well (e.g. moving objects)
+    // cut off everthing above 1 std. dev. for skewed distributions
+    // this will cut off the tail
+    // do this only two times (3 gradient optimizations in total)
+    if((k==0 && residual>0.1) || (k==1 && residual>20)){
+      dis2 += disableFields(missmatches, missmatches, 1.0);
+      params = result;
+    } else break;
+  }
+
+  if(td->conf.verbose  & VS_DEBUG)
+    vs_log_info(td->conf.modName, "disabled (%i+%i)/%i,\tresidual: %f (%i)\n",
+                dis1, dis2, vs_vector_size(motions), residual, k+1);
+  t = vsArrayToTransform(result);
+  vs_array_free(result);
+  vs_array_free(missmatches);
+  // check if sufficiently good match was achieved:
+  if(residual>100){ // test threshold.
+    t.extra=1;
+  }
+  if(f){
+    fprintf(f,"0 %f %f %f %f %i\n#\t\t\t\t\t %f %i\n", t.x, t.y, t.alpha, t.zoom, t.extra,
+            residual, k + 1);
+  }
+  if(!td->conf.smoothZoom)
+    t.zoom=0;
+  return t;
+}
+
+
+
+/* n-dimensional general purpose gradient descent algorithm */
+VSArray vsGradientDescent(double (*eval)(VSArray, void*),
+                         VSArray params, void* dat,
+                         int N, VSArray stepsizes, double threshold, double* residual){
+  int dim=params.len;
+  double v = eval(params, dat);
+  VSArray x = vs_array_copy(params);
+  VSArray grad = vs_array_new(dim);
+  assert(stepsizes.len == params.len);
+  for(int i=0; i< N*dim && v > threshold; i++){
+    int k=i%dim;
+    VSArray x2 = vs_array_copy(x);
+    double h = rand()%2 ? 1e-6 : -1e-6;
+    x2.dat[k]+=h;
+    double v2 = eval(x2, dat);
+    vs_array_zero(&grad);
+    grad.dat[k] = (v - v2)/h;
+    vs_array_plus(&x2, x, *vs_array_scale(&x2, grad, stepsizes.dat[k]));
+    v2 = eval(x2, dat);
+    if(v2 < v){
+      //fprintf(stderr,"+");
+      vs_array_free(x);
+      x = x2;
+      v = v2;
+      stepsizes.dat[k]*=1.2; // increase stepsize (4 successful steps will double it)
+    }else{ // overshoot: reduce stepsize and don't do the step
+      //fprintf(stderr,".");
+      stepsizes.dat[k]/=2.0;
+      vs_array_free(x2);
+    }
+    //if(k==3) fprintf(stderr," ");
+  }
+  vs_array_free(grad);
+  vs_array_free(stepsizes);
+  if(residual != NULL) *residual=v;
+  return x;
+}
+
+
+/* *** old  calculation ***/
+
+/* calculates rotation angle for the given transform and
+ * field with respect to the given center-point
+ */
+double vsCalcAngle(const LocalMotion* lm, int center_x, int center_y){
+  // we better ignore fields that are to close to the rotation center
+  if (abs(lm->f.x - center_x) + abs(lm->f.y - center_y) < lm->f.size*2) {
+    return 0;
+  } else {
+    // double r = sqrt(lm->f.x*lm->f.x + lm->f.y*lm->f.y);
+    double a1 = atan2(lm->f.y - center_y, lm->f.x - center_x);
+    double a2 = atan2(lm->f.y - center_y + lm->v.y,
+                      lm->f.x - center_x + lm->v.x);
+    double diff = a2 - a1;
+    return (diff > M_PI) ? diff - 2 * M_PI : ((diff < -M_PI) ? diff + 2
+                * M_PI : diff);
+  }
+}
+
+
+VSTransform vsSimpleMotionsToTransform(VSFrameInfo fi, const char* modName,
+                                       const LocalMotions* motions){
+  int center_x = 0;
+  int center_y = 0;
+  VSTransform t = null_transform();
+  if(motions==0) return t;
+  int num_motions=vs_vector_size(motions);
+  double *angles = (double*) vs_malloc(sizeof(double) * num_motions);
+  LocalMotion meanmotion;
+  int i;
+  if(num_motions < 1)
+    return t;
+
+  // calc center point of all remaining fields
+  for (i = 0; i < num_motions; i++) {
+    center_x += LMGet(motions,i)->f.x;
+    center_y += LMGet(motions,i)->f.y;
+  }
+  center_x /= num_motions;
+  center_y /= num_motions;
+
+  // cleaned mean
+  meanmotion = cleanmean_localmotions(motions);
+
+  // figure out angle
+  if (num_motions < 6) {
+    // the angle calculation is inaccurate for 5 and less fields
+    t.alpha = 0;
+  } else {
+    for (i = 0; i < num_motions; i++) {
+      // substract avg and calc angle
+      LocalMotion m = sub_localmotion(LMGet(motions,i),&meanmotion);
+      angles[i] = vsCalcAngle(&m, center_x, center_y);
+    }
+    double min, max;
+    t.alpha = -cleanmean(angles, num_motions, &min, &max);
+    if (max - min > 1.0) {
+      t.alpha = 0;
+      vs_log_info(modName, "too large variation in angle(%f)\n",
+      max-min);
+    }
+  }
+  vs_free(angles);
+  // compensate for off-center rotation
+  double p_x = (center_x - fi.width / 2);
+  double p_y = (center_y - fi.height / 2);
+  t.x = meanmotion.v.x + (cos(t.alpha) - 1) * p_x - sin(t.alpha) * p_y;
+  t.y = meanmotion.v.y + sin(t.alpha) * p_x + (cos(t.alpha) - 1) * p_y;
+
+  return t;
+}
+
+
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/localmotion2transform.h b/src/localmotion2transform.h
new file mode 100644
index 0000000..18940e5
--- /dev/null
+++ b/src/localmotion2transform.h
@@ -0,0 +1,86 @@
+/*
+ * localmotion2transform.h
+ *
+ *  Copyright (C) Georg Martius - January 2013
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __LOCALMOTION2TRANSFORM_H
+#define __LOCALMOTION2TRANSFORM_H
+
+
+#include "transform.h"
+#include "transformtype.h"
+#include "serialize.h"
+
+
+/** converts for each frame the localmotions into a transform
+ */
+int vsLocalmotions2Transforms(VSTransformData* td,
+                              const VSManyLocalMotions* motions,
+                              VSTransformations* trans );
+
+/** calculates rotation angle for the given transform and
+ * field with respect to the given center-point
+ */
+double vsCalcAngle(const LocalMotion* lm, int center_x, int center_y);
+
+/** calculates the transformation that caused the observed motions.
+    Using a simple cleaned-means approach to eliminate outliers.
+    translation and rotation is calculated.
+    calculate shift as cleaned mean of all local motions
+    calculate rotation angle of each field in respect to center of fields
+    after shift removal
+    calculate rotation angle as cleaned mean of all angles
+    compensate for possibly off-center rotation
+*/
+VSTransform vsSimpleMotionsToTransform(VSFrameInfo fi, const char* modname,
+                                       const LocalMotions* motions);
+
+
+/** calculates the transformation that caused the observed motions.
+    Using a gradient descent algorithm.
+    Outliers are removed by repeated gaussianizing error distribution.
+    (File for exporting transforms)
+*/
+VSTransform vsMotionsToTransform(VSTransformData* td,
+                                 const LocalMotions* motions,
+                                 FILE* f);
+
+
+
+/** general purpose gradient descent algorithm
+
+ * Parameters:
+ *       eval: evaluation function (value/energy to be minimized)
+ *     params: initial starting parameters
+ *        dat: custom data for eval function
+ *          N: number of iterations (100)
+ *  stepsizes: stepsizes for each dimension of the gradient {0.1,0.1...} (will be deleted)
+ *  threshold: value below which the value/energy is considered to be minimized (0)
+ *   residual: residual value (call by reference) (can be NULL)
+ * Return Value:
+ *     Optimized parameters
+ */
+VSArray vsGradientDescent(double (*eval)(VSArray, void*),
+                         VSArray params, void* dat,
+                         int N, VSArray stepsizes, double threshold, double* residual);
+
+#endif
diff --git a/src/motiondetect.c b/src/motiondetect.c
new file mode 100644
index 0000000..46d52bf
--- /dev/null
+++ b/src/motiondetect.c
@@ -0,0 +1,907 @@
+/*
+ * motiondetect.c
+ *
+ *  Copyright (C) Georg Martius - February 1007-2011
+ *   georg dot martius at web dot de
+ *  Copyright (C) Alexey Osipov - Jule 2011
+ *   simba at lerlan dot ru
+ *   speed optimizations (threshold, spiral, SSE, asm)
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include "motiondetect.h"
+#include "motiondetect_internal.h"
+#include "motiondetect_opt.h"
+#include <math.h>
+#include <limits.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#ifdef USE_OMP
+#include <omp.h>
+#endif
+
+#include "boxblur.h"
+#include "vidstabdefines.h"
+#include "localmotion2transform.h"
+#include "transformtype_operations.h"
+#include "transformtype_operations.h"
+
+#define USE_SPIRAL_FIELD_CALC
+
+
+/* internal data structures */
+
+// structure that contains the contrast and the index of a field
+typedef struct _contrast_idx {
+  double contrast;
+  int index;
+} contrast_idx;
+
+
+VSMotionDetectConfig vsMotionDetectGetDefaultConfig(const char* modName){
+  VSMotionDetectConfig conf;
+  conf.stepSize          = 6;
+  conf.accuracy          = 15;
+  conf.shakiness         = 5;
+  conf.virtualTripod     = 0;
+  conf.contrastThreshold = 0.25;
+  conf.show              = 0;
+  conf.modName           = modName;
+  return conf;
+}
+
+void vsMotionDetectGetConfig(VSMotionDetectConfig* conf, const VSMotionDetect* md){
+  if(md && conf)
+    *conf = md->conf;
+}
+
+const VSFrameInfo* vsMotionDetectGetFrameInfo(const VSMotionDetect* md){
+  return &md->fi;
+}
+
+
+int vsMotionDetectInit(VSMotionDetect* md, const VSMotionDetectConfig* conf, const VSFrameInfo* fi){
+  assert(md && fi);
+  md->conf = *conf;
+  md->fi = *fi;
+
+  if(fi->pFormat<=PF_NONE ||  fi->pFormat==PF_PACKED || fi->pFormat>=PF_NUMBER) {
+    vs_log_warn(md->conf.modName, "unsupported Pixel Format (%i)\n",
+                md->fi.pFormat);
+    return VS_ERROR;
+  }
+
+  vsFrameAllocate(&md->prev, &md->fi);
+  if (vsFrameIsNull(&md->prev)) {
+    vs_log_error(md->conf.modName, "malloc failed");
+    return VS_ERROR;
+  }
+
+  vsFrameNull(&md->curr);
+  vsFrameNull(&md->currorig);
+  vsFrameNull(&md->currtmp);
+  md->hasSeenOneFrame = 0;
+  md->frameNum = 0;
+
+  // TODO: get rid of shakiness parameter in the long run
+  md->conf.shakiness = VS_MIN(10,VS_MAX(1,md->conf.shakiness));
+  md->conf.accuracy = VS_MIN(15,VS_MAX(1,md->conf.accuracy));
+  if (md->conf.accuracy < md->conf.shakiness / 2) {
+    vs_log_info(md->conf.modName, "Accuracy should not be lower than shakiness/2 -- fixed");
+    md->conf.accuracy = md->conf.shakiness / 2;
+  }
+  if (md->conf.accuracy > 9 && md->conf.stepSize > 6) {
+    vs_log_info(md->conf.modName, "For high accuracy use lower stepsize  -- set to 6 now");
+    md->conf.stepSize = 6; // maybe 4
+  }
+
+  int minDimension = VS_MIN(md->fi.width, md->fi.height);
+//  shift: shakiness 1: height/40; 10: height/4
+//  md->maxShift = VS_MAX(4,(minDimension*md->conf.shakiness)/40);
+//  size: shakiness 1: height/40; 10: height/6 (clipped)
+//  md->fieldSize = VS_MAX(4,VS_MIN(minDimension/6, (minDimension*md->conf.shakiness)/40));
+
+  // fixed size and shift now
+  int maxShift      = VS_MAX(16, minDimension/7);
+  int fieldSize     = VS_MAX(16, minDimension/10);
+  int fieldSizeFine = VS_MAX(6, minDimension/60);
+#if defined(USE_SSE2) || defined(USE_SSE2_ASM)
+  fieldSize     = (fieldSize / 16 + 1) * 16;
+  fieldSizeFine = (fieldSizeFine / 16 + 1) * 16;
+#endif
+  if (!initFields(md, &md->fieldscoarse, fieldSize, maxShift, md->conf.stepSize,
+                  1, 0, md->conf.contrastThreshold)) {
+    return VS_ERROR;
+  }
+  // for the fine check we use a smaller size and smaller maximal shift (=size)
+  if (!initFields(md, &md->fieldsfine, fieldSizeFine, fieldSizeFine,
+                  2, 1, fieldSizeFine, md->conf.contrastThreshold/2)) {
+    return VS_ERROR;
+  }
+
+  vsFrameAllocate(&md->curr,&md->fi);
+  vsFrameAllocate(&md->currtmp, &md->fi);
+
+  md->initialized = 2;
+  return VS_OK;
+}
+
+void vsMotionDetectionCleanup(VSMotionDetect* md) {
+  if(md->fieldscoarse.fields) {
+    vs_free(md->fieldscoarse.fields);
+    md->fieldscoarse.fields=0;
+  }
+  if(md->fieldsfine.fields) {
+    vs_free(md->fieldsfine.fields);
+    md->fieldsfine.fields=0;
+  }
+  vsFrameFree(&md->prev);
+  vsFrameFree(&md->curr);
+  vsFrameFree(&md->currtmp);
+
+  md->initialized = 0;
+}
+
+// returns true if match of local motion is better than threshold
+short lm_match_better(void* thresh, void* lm){
+  if(((LocalMotion*)lm)->match <= *((double*)thresh))
+    return 1;
+  else
+    return 0;
+}
+
+int vsMotionDetection(VSMotionDetect* md, LocalMotions* motions, VSFrame *frame) {
+ assert(md->initialized==2);
+
+  md->currorig = *frame;
+  // smoothen image to do better motion detection
+  //  (larger stepsize or eventually gradient descent (need higher resolution))
+  if (md->fi.pFormat > PF_PACKED) {
+    // we could calculate a grayscale version and use the PLANAR stuff afterwards
+    // so far smoothing is only implemented for PLANAR
+    vsFrameCopy(&md->curr, frame, &md->fi);
+  } else {
+    // box-kernel smoothing (plain average of pixels), which is fine for us
+    boxblurPlanar(&md->curr, frame, &md->currtmp, &md->fi, md->conf.stepSize*1/*1.4*/,
+               BoxBlurNoColor);
+    // two times yields tent-kernel smoothing, which may be better, but I don't
+    //  think we need it
+    //boxblurPlanar(md->curr, md->curr, md->currtmp, &md->fi, md->stepSize*1,
+    // BoxBlurNoColor);
+  }
+
+  if (md->hasSeenOneFrame) {
+    LocalMotions motionscoarse;
+    LocalMotions motionsfine;
+    vs_vector_init(&motionsfine,0);
+    //    md->curr = frame;
+    if (md->fi.pFormat > PF_PACKED) {
+      motionscoarse = calcTransFields(md, &md->fieldscoarse,
+                                      calcFieldTransPacked, contrastSubImgPacked);
+    } else { // PLANAR
+      motionscoarse = calcTransFields(md, &md->fieldscoarse,
+                                      calcFieldTransPlanar, contrastSubImgPlanar);
+    }
+    int num_motions = vs_vector_size(&motionscoarse);
+    if (num_motions < 1) {
+      vs_log_warn(md->conf.modName, "too low contrast. \
+(no translations are detected in frame %i)\n", md->frameNum);
+    }else{
+      // calc transformation and perform another scan with small fields
+      VSTransform t = vsSimpleMotionsToTransform(md->fi, md->conf.modName, &motionscoarse);
+      md->fieldsfine.offset    = t;
+      md->fieldsfine.useOffset = 1;
+      LocalMotions motions2;
+      if (md->fi.pFormat > PF_PACKED) {
+        motions2 = calcTransFields(md, &md->fieldsfine,
+                                   calcFieldTransPacked, contrastSubImgPacked);
+      } else { // PLANAR
+        motions2 = calcTransFields(md, &md->fieldsfine,
+                                   calcFieldTransPlanar, contrastSubImgPlanar);
+      }
+      // through out those with bad match (worse than mean of coarse scan)
+      VSArray matchQualities1 = localmotionsGetMatch(&motionscoarse);
+      double meanMatch = cleanmean(matchQualities1.dat, matchQualities1.len, NULL, NULL);
+      motionsfine      = vs_vector_filter(&motions2, lm_match_better, &meanMatch);
+      if(0){
+        printf("\nMatches: mean:  %f | ", meanMatch);
+        vs_array_print(matchQualities1, stdout);
+        printf("\n         fine: ");
+        VSArray matchQualities2 = localmotionsGetMatch(&motions2);
+        vs_array_print(matchQualities2, stdout);
+        printf("\n");
+      }
+    }
+    if (md->conf.show) { // draw fields and transforms into frame.
+      int num_motions_fine = vs_vector_size(&motionsfine);
+      // this has to be done one after another to handle possible overlap
+      if (md->conf.show > 1) {
+        for (int i = 0; i < num_motions; i++)
+          drawFieldScanArea(md, LMGet(&motionscoarse,i), md->fieldscoarse.maxShift);
+      }
+      for (int i = 0; i < num_motions; i++)
+        drawField(md, LMGet(&motionscoarse,i), 1);
+      for (int i = 0; i < num_motions_fine; i++)
+        drawField(md, LMGet(&motionsfine,i), 0);
+      for (int i = 0; i < num_motions; i++)
+        drawFieldTrans(md, LMGet(&motionscoarse,i),180);
+      for (int i = 0; i < num_motions_fine; i++)
+        drawFieldTrans(md, LMGet(&motionsfine,i), 64);
+    }
+    *motions = vs_vector_concat(&motionscoarse,&motionsfine);
+    //*motions = motionscoarse;
+    //*motions = motionsfine;
+  } else {
+    vs_vector_init(motions,1); // dummy vector
+    md->hasSeenOneFrame = 1;
+  }
+
+  // for tripod we keep a certain reference frame
+  if(md->conf.virtualTripod < 1 || md->frameNum < md->conf.virtualTripod)
+    // copy current frame (smoothed) to prev for next frame comparison
+    vsFrameCopy(&md->prev, &md->curr, &md->fi);
+  md->frameNum++;
+  return VS_OK;
+}
+
+
+/** initialise measurement fields on the frame.
+    The size of the fields and the maxshift is used to
+    calculate an optimal distribution in the frame.
+    if border is set then they are placed savely away from the border for maxShift
+*/
+
+int initFields(VSMotionDetect* md, VSMotionDetectFields* fs,
+               int size, int maxShift, int stepSize,
+               short keepBorder, int spacing, double contrastThreshold) {
+  fs->fieldSize = size;
+  fs->maxShift  = maxShift;
+  fs->stepSize  = stepSize;
+  fs->useOffset = 0;
+  fs->contrastThreshold = contrastThreshold;
+
+  int rows = VS_MAX(3,(md->fi.height - fs->maxShift*2)/(size+spacing)-1);
+  int cols = VS_MAX(3,(md->fi.width - fs->maxShift*2)/(size+spacing)-1);
+  // make sure that the remaining rows have the same length
+  fs->fieldNum = rows * cols;
+  fs->fieldRows = rows;
+
+  if (!(fs->fields = (Field*) vs_malloc(sizeof(Field) * fs->fieldNum))) {
+    vs_log_error(md->conf.modName, "malloc failed!\n");
+    return 0;
+  } else {
+    int i, j;
+    int border=fs->stepSize;
+    // the border is the amount by which the field centers
+    // have to be away from the image boundary
+    // (stepsize is added in case shift is increased through stepsize)
+    if(keepBorder)
+      border = size / 2 + fs->maxShift + fs->stepSize;
+    int step_x = (md->fi.width  - 2 * border) / VS_MAX(cols-1,1);
+    int step_y = (md->fi.height - 2 * border) / VS_MAX(rows-1,1);
+    for (j = 0; j < rows; j++) {
+      for (i = 0; i < cols; i++) {
+        int idx = j * cols + i;
+        fs->fields[idx].x = border + i * step_x;
+        fs->fields[idx].y = border + j * step_y;
+        fs->fields[idx].size = size;
+      }
+    }
+  }
+  fs->maxFields = (md->conf.accuracy) * fs->fieldNum / 15;
+  vs_log_info(md->conf.modName, "Fieldsize: %i, Maximal translation: %i pixel\n",
+              fs->fieldSize, fs->maxShift);
+  vs_log_info(md->conf.modName, "Number of used measurement fields: %i out of %i\n",
+              fs->maxFields, fs->fieldNum);
+
+  return 1;
+}
+
+/** \see contrastSubImg*/
+double contrastSubImgPlanar(VSMotionDetect* md, const Field* field) {
+#ifdef USE_SSE2
+  return contrastSubImg1_SSE(md->curr.data[0], field, md->curr.linesize[0],md->fi.height);
+#else
+  return contrastSubImg(md->curr.data[0],field,md->curr.linesize[0],md->fi.height,1);
+#endif
+
+}
+
+/**
+   \see contrastSubImg_Michelson three times called with bytesPerPixel=3
+   for all channels
+*/
+double contrastSubImgPacked(VSMotionDetect* md, const Field* field) {
+  unsigned char* const I = md->curr.data[0];
+  int linesize2 = md->curr.linesize[0]/3; // linesize in pixels
+  return (contrastSubImg(I, field, linesize2, md->fi.height, 3)
+          + contrastSubImg(I + 1, field, linesize2, md->fi.height, 3)
+          + contrastSubImg(I + 2, field, linesize2, md->fi.height, 3)) / 3;
+}
+
+/**
+   calculates Michelson-contrast in the given small part of the given image
+   to be more compatible with the absolute difference formula this is scaled by 0.1
+
+   \param I pointer to framebuffer
+   \param field Field specifies position(center) and size of subimage
+   \param width width of frame (linesize in pixels)
+   \param height height of frame
+   \param bytesPerPixel calc contrast for only for first channel
+*/
+double contrastSubImg(unsigned char* const I, const Field* field, int width,
+                      int height, int bytesPerPixel) {
+  int k, j;
+  unsigned char* p = NULL;
+  int s2 = field->size / 2;
+  unsigned char mini = 255;
+  unsigned char maxi = 0;
+
+  p = I + ((field->x - s2) + (field->y - s2) * width) * bytesPerPixel;
+  for (j = 0; j < field->size; j++) {
+    for (k = 0; k < field->size; k++) {
+      mini = (mini < *p) ? mini : *p;
+      maxi = (maxi > *p) ? maxi : *p;
+      p += bytesPerPixel;
+    }
+    p += (width - field->size) * bytesPerPixel;
+  }
+  return (maxi - mini) / (maxi + mini + 0.1); // +0.1 to avoid division by 0
+}
+
+/* calculates the optimal transformation for one field in Planar frames
+ * (only luminance)
+ */
+LocalMotion calcFieldTransPlanar(VSMotionDetect* md, VSMotionDetectFields* fs,
+                                 const Field* field, int fieldnum) {
+  int tx = 0;
+  int ty = 0;
+  uint8_t *Y_c = md->curr.data[0], *Y_p = md->prev.data[0];
+  int linesize_c = md->curr.linesize[0], linesize_p = md->prev.linesize[0];
+  // we only use the luminance part of the image
+  int i, j;
+  int stepSize = fs->stepSize;
+  int maxShift = fs->maxShift;
+  Vec offset = { 0, 0};
+  LocalMotion lm = null_localmotion();
+  if(fs->useOffset){
+    // Todo: we could put the preparedtransform into fs
+    PreparedTransform pt = prepare_transform(&fs->offset, &md->fi);
+    Vec fieldpos = {field->x, field->y};
+    offset = sub_vec(transform_vec(&pt, &fieldpos), fieldpos);
+    // is the field still in the frame
+    int s2 = field->size/2;
+    if(unlikely(fieldpos.x+offset.x-s2-maxShift-stepSize < 0 ||
+                fieldpos.x+offset.x+s2+maxShift+stepSize >= md->fi.width ||
+                fieldpos.y+offset.y-s2-maxShift-stepSize < 0 ||
+                fieldpos.y+offset.y+s2+maxShift+stepSize >= md->fi.height)){
+      lm.match=-1;
+      return lm;
+    }
+  }
+
+#ifdef STABVERBOSE
+  // printf("%i %i %f\n", md->frameNum, fieldnum, contr);
+  FILE *f = NULL;
+  char buffer[32];
+  vs_snprintf(buffer, sizeof(buffer), "f%04i_%02i.dat", md->frameNum, fieldnum);
+  f = fopen(buffer, "w");
+  fprintf(f, "# splot \"%s\"\n", buffer);
+#endif
+
+#ifdef USE_SPIRAL_FIELD_CALC
+  unsigned int minerror = UINT_MAX;
+
+  // check all positions by outgoing spiral
+  i = 0; j = 0;
+  int limit = 1;
+  int step = 0;
+  int dir = 0;
+  while (j >= -maxShift && j <= maxShift && i >= -maxShift && i <= maxShift) {
+    unsigned int error = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p,
+                                       md->fi.height, 1, i + offset.x, j + offset.y,
+                                       minerror);
+
+    if (error < minerror) {
+      minerror = error;
+      tx = i;
+      ty = j;
+    }
+
+    //spiral indexing...
+    step++;
+    switch (dir) {
+     case 0:
+      i += stepSize;
+      if (step == limit) {
+        dir = 1;
+        step = 0;
+      }
+      break;
+     case 1:
+      j += stepSize;
+      if (step == limit) {
+        dir = 2;
+        step = 0;
+        limit++;
+      }
+      break;
+     case 2:
+      i -= stepSize;
+      if (step == limit) {
+        dir = 3;
+        step = 0;
+      }
+      break;
+     case 3:
+      j -= stepSize;
+      if (step == limit) {
+        dir = 0;
+        step = 0;
+        limit++;
+      }
+      break;
+    }
+  }
+#else
+  /* Here we improve speed by checking first the most probable position
+     then the search paths are most effectively cut. (0,0) is a simple start
+  */
+  unsigned int minerror = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p,
+                                        md->fi.height, 1, 0, 0, UINT_MAX);
+  // check all positions...
+  for (i = -maxShift; i <= maxShift; i += stepSize) {
+    for (j = -maxShift; j <= maxShift; j += stepSize) {
+      if( i==0 && j==0 )
+        continue; //no need to check this since already done
+      unsigned int error = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p,
+                                         md->fi.height, 1, i+offset.x, j+offset.y, minerror);
+      if (error < minerror) {
+        minerror = error;
+        tx = i;
+        ty = j;
+      }
+#ifdef STABVERBOSE
+      fprintf(f, "%i %i %f\n", i, j, error);
+#endif
+    }
+  }
+
+#endif
+
+  while(stepSize > 1) {// make fine grain check around the best match
+    int txc = tx; // save the shifts
+    int tyc = ty;
+    int newStepSize = stepSize/2;
+    int r = stepSize - newStepSize;
+    for (i = txc - r; i <= txc + r; i += newStepSize) {
+      for (j = tyc - r; j <= tyc + r; j += newStepSize) {
+        if (i == txc && j == tyc)
+          continue; //no need to check this since already done
+        unsigned int error = compareSubImg(Y_c, Y_p, field, linesize_c, linesize_p,
+                                           md->fi.height, 1, i+offset.x, j+offset.y, minerror);
+#ifdef STABVERBOSE
+        fprintf(f, "%i %i %f\n", i, j, error);
+#endif
+        if (error < minerror) {
+          minerror = error;
+          tx = i;
+          ty = j;
+        }
+      }
+    }
+    stepSize /= 2;
+  }
+#ifdef STABVERBOSE
+  fclose(f);
+  vs_log_msg(md->modName, "Minerror: %f\n", minerror);
+#endif
+
+  if (unlikely(fabs(tx) >= maxShift + stepSize - 1  ||
+               fabs(ty) >= maxShift + stepSize)) {
+#ifdef STABVERBOSE
+    vs_log_msg(md->modName, "maximal shift ");
+#endif
+    lm.match =-1.0; // to be kicked out
+    return lm;
+  }
+  lm.f = *field;
+  lm.v.x = tx + offset.x;
+  lm.v.y = ty + offset.y;
+  lm.match = ((double) minerror)/(field->size*field->size);
+  return lm;
+}
+
+/* calculates the optimal transformation for one field in Packed
+ *   slower than the Planar version because it uses all three color channels
+ */
+LocalMotion calcFieldTransPacked(VSMotionDetect* md, VSMotionDetectFields* fs,
+                                 const Field* field, int fieldnum) {
+  int tx = 0;
+  int ty = 0;
+  uint8_t *I_c = md->curr.data[0], *I_p = md->prev.data[0];
+  int width1 = md->curr.linesize[0]/3; // linesize in pixels
+  int width2 = md->prev.linesize[0]/3; // linesize in pixels
+  int i, j;
+  int stepSize = fs->stepSize;
+  int maxShift = fs->maxShift;
+
+  Vec offset = { 0, 0};
+  LocalMotion lm = null_localmotion();
+  if(fs->useOffset){
+    PreparedTransform pt = prepare_transform(&fs->offset, &md->fi);
+    offset = transform_vec(&pt, (Vec*)field);
+    // is the field still in the frame
+    if(unlikely(offset.x-maxShift-stepSize < 0 || offset.x+maxShift+stepSize >= md->fi.width ||
+                offset.y-maxShift-stepSize < 0 || offset.y+maxShift+stepSize >= md->fi.height)){
+      lm.match=-1;
+      return lm;
+    }
+  }
+
+  /* Here we improve speed by checking first the most probable position
+     then the search paths are most effectively cut. (0,0) is a simple start
+  */
+  unsigned int minerror = compareSubImg(I_c, I_p, field, width1, width2, md->fi.height,
+                                        3, offset.x, offset.y, UINT_MAX);
+  // check all positions...
+  for (i = -maxShift; i <= maxShift; i += stepSize) {
+    for (j = -maxShift; j <= maxShift; j += stepSize) {
+      if( i==0 && j==0 )
+        continue; //no need to check this since already done
+      unsigned int error = compareSubImg(I_c, I_p, field, width1, width2,
+                                         md->fi.height, 3, i + offset.x, j + offset.y, minerror);
+      if (error < minerror) {
+        minerror = error;
+        tx = i;
+        ty = j;
+      }
+    }
+  }
+  if (stepSize > 1) { // make fine grain check around the best match
+    int txc = tx; // save the shifts
+    int tyc = ty;
+    int r = stepSize - 1;
+    for (i = txc - r; i <= txc + r; i += 1) {
+      for (j = tyc - r; j <= tyc + r; j += 1) {
+        if (i == txc && j == tyc)
+          continue; //no need to check this since already done
+        unsigned int error = compareSubImg(I_c, I_p, field, width1, width2,
+                                           md->fi.height, 3, i + offset.x, j + offset.y, minerror);
+        if (error < minerror) {
+          minerror = error;
+          tx = i;
+          ty = j;
+        }
+      }
+    }
+  }
+
+  if (fabs(tx) >= maxShift + stepSize - 1 || fabs(ty) >= maxShift + stepSize - 1) {
+#ifdef STABVERBOSE
+    vs_log_msg(md->modName, "maximal shift ");
+#endif
+    lm.match = -1;
+    return lm;
+  }
+  lm.f = *field;
+  lm.v.x = tx + offset.x;
+  lm.v.y = ty + offset.y;
+  lm.match = ((double)minerror)/(field->size*field->size);
+  return lm;
+}
+
+/* compares contrast_idx structures respect to the contrast
+   (for sort function)
+*/
+int cmp_contrast_idx(const void *ci1, const void* ci2) {
+  double a = ((contrast_idx*) ci1)->contrast;
+  double b = ((contrast_idx*) ci2)->contrast;
+  return a < b ? 1 : (a > b ? -1 : 0);
+}
+
+/* select only the best 'maxfields' fields
+   first calc contrasts then select from each part of the
+   frame some fields
+   We may simplify here by using random. People want high quality, so typically we use all.
+*/
+VSVector selectfields(VSMotionDetect* md, VSMotionDetectFields* fs,
+                      contrastSubImgFunc contrastfunc) {
+  int i, j;
+  VSVector goodflds;
+  contrast_idx *ci =
+    (contrast_idx*) vs_malloc(sizeof(contrast_idx) * fs->fieldNum);
+  vs_vector_init(&goodflds, fs->fieldNum);
+
+  // we split all fields into row+1 segments and take from each segment
+  // the best fields
+  int numsegms = (fs->fieldRows + 1);
+  int segmlen = fs->fieldNum / (fs->fieldRows + 1) + 1;
+  // split the frame list into rows+1 segments
+  contrast_idx *ci_segms =
+    (contrast_idx*) vs_malloc(sizeof(contrast_idx) * fs->fieldNum);
+  int remaining = 0;
+  // calculate contrast for each field
+  // #pragma omp parallel for shared(ci,md) no speedup because to short
+  for (i = 0; i < fs->fieldNum; i++) {
+    ci[i].contrast = contrastfunc(md, &fs->fields[i]);
+    ci[i].index = i;
+    if (ci[i].contrast < fs->contrastThreshold)
+      ci[i].contrast = 0;
+    // else printf("%i %lf\n", ci[i].index, ci[i].contrast);
+  }
+
+  memcpy(ci_segms, ci, sizeof(contrast_idx) * fs->fieldNum);
+  // get best fields from each segment
+  for (i = 0; i < numsegms; i++) {
+    int startindex = segmlen * i;
+    int endindex = segmlen * (i + 1);
+    endindex = endindex > fs->fieldNum ? fs->fieldNum : endindex;
+    //printf("Segment: %i: %i-%i\n", i, startindex, endindex);
+
+    // sort within segment
+    qsort(ci_segms + startindex, endindex - startindex,
+          sizeof(contrast_idx), cmp_contrast_idx);
+    // take maxfields/numsegms
+    for (j = 0; j < fs->maxFields / numsegms; j++) {
+      if (startindex + j >= endindex)
+        continue;
+      // printf("%i %lf\n", ci_segms[startindex+j].index,
+      //                    ci_segms[startindex+j].contrast);
+      if (ci_segms[startindex + j].contrast > 0) {
+        vs_vector_append_dup(&goodflds, &ci[ci_segms[startindex+j].index],
+                             sizeof(contrast_idx));
+        // don't consider them in the later selection process
+        ci_segms[startindex + j].contrast = 0;
+      }
+    }
+  }
+  // check whether enough fields are selected
+  // printf("Phase2: %i\n", vs_list_size(goodflds));
+  remaining = fs->maxFields - vs_vector_size(&goodflds);
+  if (remaining > 0) {
+    // take the remaining from the leftovers
+    qsort(ci_segms, fs->fieldNum, sizeof(contrast_idx), cmp_contrast_idx);
+    for (j = 0; j < remaining; j++) {
+      if (ci_segms[j].contrast > 0) {
+        vs_vector_append_dup(&goodflds, &ci_segms[j], sizeof(contrast_idx));
+      }
+    }
+  }
+  // printf("Ende: %i\n", vs_list_size(goodflds));
+  vs_free(ci);
+  vs_free(ci_segms);
+  return goodflds;
+}
+
+/* tries to register current frame onto previous frame.
+ *   Algorithm:
+ *   discards fields with low contrast
+ *   select maxfields fields according to their contrast
+ *   check theses fields for vertical and horizontal transformation
+ *   use minimal difference of all possible positions
+ */
+LocalMotions calcTransFields(VSMotionDetect* md,
+                             VSMotionDetectFields* fields,
+                             calcFieldTransFunc fieldfunc,
+                             contrastSubImgFunc contrastfunc) {
+  LocalMotions localmotions;
+  vs_vector_init(&localmotions,fields->maxFields);
+
+#ifdef STABVERBOSE
+  FILE *file = NULL;
+  char buffer[32];
+  vs_snprintf(buffer, sizeof(buffer), "k%04i.dat", md->frameNum);
+  file = fopen(buffer, "w");
+  fprintf(file, "# plot \"%s\" w l, \"\" every 2:1:0\n", buffer);
+#endif
+
+  VSVector goodflds = selectfields(md, fields, contrastfunc);
+  // use all "good" fields and calculate optimal match to previous frame
+#ifdef USE_OMP
+#pragma omp parallel for shared(goodflds, md, localmotions, fs) // does not bring speedup
+#endif
+  for(int index=0; index < vs_vector_size(&goodflds); index++){
+    int i = ((contrast_idx*)vs_vector_get(&goodflds,index))->index;
+    LocalMotion m;
+    m = fieldfunc(md, fields, &fields->fields[i], i); // e.g. calcFieldTransPlanar
+    if(m.match >= 0){
+      m.contrast = ((contrast_idx*)vs_vector_get(&goodflds,index))->contrast;
+#ifdef STABVERBOSE
+      fprintf(file, "%i %i\n%f %f %f %f\n \n\n", m.f.x, m.f.y,
+              m.f.x + m.v.x, m.f.y + m.v.y, m.match, m.contrast);
+#endif
+      vs_vector_append_dup(&localmotions, &m, sizeof(LocalMotion));
+    }
+  }
+  vs_vector_del(&goodflds);
+
+#ifdef STABVERBOSE
+  fclose(file);
+#endif
+  return localmotions;
+}
+
+
+
+
+
+/** draws the field scanning area */
+void drawFieldScanArea(VSMotionDetect* md, const LocalMotion* lm, int maxShift) {
+  if (md->fi.pFormat > PF_PACKED)
+    return;
+  drawRectangle(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1, lm->f.x, lm->f.y,
+                lm->f.size + 2 * maxShift, lm->f.size + 2 * maxShift, 80);
+}
+
+/** draws the field */
+void drawField(VSMotionDetect* md, const LocalMotion* lm, short box) {
+  if (md->fi.pFormat > PF_PACKED)
+    return;
+  if(box)
+    drawBox(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1,
+            lm->f.x, lm->f.y, lm->f.size, lm->f.size, /*lm->match >100 ? 100 :*/ 40);
+  else
+    drawRectangle(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1,
+                  lm->f.x, lm->f.y, lm->f.size, lm->f.size, /*lm->match >100 ? 100 :*/ 40);
+}
+
+/** draws the transform data of this field */
+void drawFieldTrans(VSMotionDetect* md, const LocalMotion* lm, int color) {
+  if (md->fi.pFormat > PF_PACKED)
+    return;
+  Vec end = add_vec(field_to_vec(lm->f),lm->v);
+  drawBox(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1,
+          lm->f.x, lm->f.y, 5, 5, 0); // draw center
+  drawBox(md->currorig.data[0], md->currorig.linesize[0], md->fi.height, 1,
+          lm->f.x + lm->v.x, lm->f.y + lm->v.y, 5, 5, 250); // draw translation
+  drawLine(md->currorig.data[0], md->currorig.linesize[0],  md->fi.height, 1,
+           (Vec*)&lm->f, &end, 3, color);
+
+}
+
+/**
+ * draws a box at the given position x,y (center) in the given color
+ (the same for all channels)
+*/
+void drawBox(unsigned char* I, int width, int height, int bytesPerPixel, int x,
+       int y, int sizex, int sizey, unsigned char color) {
+
+  unsigned char* p = NULL;
+  int j, k;
+  p = I + ((x - sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel;
+  for (j = 0; j < sizey; j++) {
+    for (k = 0; k < sizex * bytesPerPixel; k++) {
+      *p = color;
+      p++;
+    }
+    p += (width - sizex) * bytesPerPixel;
+  }
+}
+
+/**
+ * draws a rectangle (not filled) at the given position x,y (center) in the given color
+ at the first channel
+*/
+void drawRectangle(unsigned char* I, int width, int height, int bytesPerPixel, int x,
+                   int y, int sizex, int sizey, unsigned char color) {
+
+  unsigned char* p;
+  int k;
+  p = I + ((x - sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel;
+  for (k = 0; k < sizex; k++) { *p = color; p+= bytesPerPixel; } // upper line
+  p = I + ((x - sizex / 2) + (y + sizey / 2) * width) * bytesPerPixel;
+  for (k = 0; k < sizex; k++) { *p = color; p+= bytesPerPixel; } // lower line
+  p = I + ((x - sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel;
+  for (k = 0; k < sizey; k++) { *p = color; p+= width * bytesPerPixel; } // left line
+  p = I + ((x + sizex / 2) + (y - sizey / 2) * width) * bytesPerPixel;
+  for (k = 0; k < sizey; k++) { *p = color; p+= width * bytesPerPixel; } // right line
+}
+
+/**
+ * draws a line from a to b with given thickness(not filled) at the given position x,y (center) in the given color
+ at the first channel
+*/
+void drawLine(unsigned char* I, int width, int height, int bytesPerPixel,
+              Vec* a, Vec* b, int thickness, unsigned char color) {
+  unsigned char* p;
+  Vec div = sub_vec(*b,*a);
+  if(div.y==0){ // horizontal line
+    if(div.x<0) {*a=*b; div.x*=-1;}
+    for(int r=-thickness/2; r<=thickness/2; r++){
+      p = I + ((a->x) + (a->y+r) * width) * bytesPerPixel;
+      for (int k = 0; k <= div.x; k++) { *p = color; p+= bytesPerPixel; }
+    }
+  }else{
+    if(div.x==0){ // vertical line
+      if(div.y<0) {*a=*b; div.y*=-1;}
+      for(int r=-thickness/2; r<=thickness/2; r++){
+        p = I + ((a->x+r) + (a->y) * width) * bytesPerPixel;
+        for (int k = 0; k <= div.y; k++) { *p = color; p+= width * bytesPerPixel; }
+      }
+    }else{
+      double m = (double)div.x/(double)div.y;
+      int horlen = thickness + fabs(m);
+      for( int c=0; c<= abs(div.y); c++){
+        int dy = div.y<0 ? -c : c;
+        int x = a->x + m*dy - horlen/2;
+        p = I + (x + (a->y+dy) * width) * bytesPerPixel;
+        for( int k=0; k<= horlen; k++){ *p = color; p+= bytesPerPixel; }
+      }
+    }
+  }
+}
+
+
+// void addTrans(VSMotionDetect* md, VSTransform sl) {
+//   if (!md->transs) {
+//     md->transs = vs_list_new(0);
+//   }
+//   vs_list_append_dup(md->transs, &sl, sizeof(sl));
+// }
+
+// VSTransform getLastVSTransform(VSMotionDetect* md){
+//   if (!md->transs || !md->transs->head) {
+//     return null_transform();
+//   }
+//   return *((VSTransform*)md->transs->tail);
+// }
+
+
+//#ifdef TESTING
+/// plain C implementation of compareSubImg (without ORC)
+unsigned int compareSubImg_thr(unsigned char* const I1, unsigned char* const I2,
+                               const Field* field, int width1, int width2, int height,
+           int bytesPerPixel, int d_x, int d_y,
+           unsigned int threshold) {
+  int k, j;
+  unsigned char* p1 = NULL;
+  unsigned char* p2 = NULL;
+  int s2 = field->size / 2;
+  unsigned int sum = 0;
+
+  p1 = I1 + ((field->x - s2) + (field->y - s2) * width1) * bytesPerPixel;
+  p2 = I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y) * width2)
+    * bytesPerPixel;
+  for (j = 0; j < field->size; j++) {
+    for (k = 0; k < field->size * bytesPerPixel; k++) {
+      sum += abs((int) *p1 - (int) *p2);
+      p1++;
+      p2++;
+    }
+    if( sum > threshold) // no need to calculate any longer: worse than the best match
+      break;
+    p1 += (width1 - field->size) * bytesPerPixel;
+    p2 += (width2 - field->size) * bytesPerPixel;
+  }
+  return sum;
+}
+
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   tab-width:  2
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ */
diff --git a/src/motiondetect.h b/src/motiondetect.h
new file mode 100644
index 0000000..f72c0a5
--- /dev/null
+++ b/src/motiondetect.h
@@ -0,0 +1,150 @@
+/*
+ *  motiondetect.h
+ *
+ *  Copyright (C) Georg Martius - February 2011
+ *   georg dot martius at web dot de
+ *  Copyright (C) Alexey Osipov - Jule 2011
+ *   simba at lerlan dot ru
+ *   speed optimizations (threshold, spiral, SSE, asm)
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef MOTIONDETECT_H
+#define MOTIONDETECT_H
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "transformtype.h"
+#include "vidstabdefines.h"
+#include "vsvector.h"
+#include "frameinfo.h"
+
+typedef struct _vsmotiondetectconfig {
+  /* meta parameter for maxshift and fieldsize between 1 and 15 */
+  int         shakiness;
+  int         accuracy;         // meta parameter for number of fields between 1 and 10
+  int         stepSize;         // stepsize of field transformation detection
+  int         algo;             // deprecated
+  int         virtualTripod;
+  /* if 1 and 2 then the fields and transforms are shown in the frames */
+  int         show;
+  /* measurement fields with lower contrast are discarded */
+  double      contrastThreshold;
+  const char* modName;          // module name (used for logging)
+} VSMotionDetectConfig;
+
+/** structure for motion detection fields */
+typedef struct _vsmotiondetectfields {
+  /* maximum number of pixels we expect the shift of subsequent frames */
+  int maxShift;
+  int stepSize;                 // stepsize for detection
+  int fieldNum;                 // number of measurement fields
+  int maxFields;                // maximum number of fields used (selected by contrast)
+  double contrastThreshold;     // fields with lower contrast are discarded
+  int fieldSize;                // size = min(md->width, md->height)/10;
+  int fieldRows;                // number of rows
+  Field* fields;                // measurement fields
+  short useOffset;              // if true then the offset us used
+  VSTransform offset;           // offset for detection (e.g. known from coarse scan)
+} VSMotionDetectFields;
+
+/** data structure for motion detection part of deshaking*/
+typedef struct _vsmotiondetect {
+  VSFrameInfo fi;
+
+  VSMotionDetectConfig conf;
+
+  VSMotionDetectFields fieldscoarse;
+  VSMotionDetectFields fieldsfine;
+
+  VSFrame curr;                 // blurred version of current frame buffer
+  VSFrame currorig;             // current frame buffer (original) (only pointer)
+  VSFrame currtmp;              // temporary buffer for blurring
+  VSFrame prev;                 // frame buffer for last frame (copied)
+  short hasSeenOneFrame;        // true if we have a valid previous frame
+  int initialized;              // 1 if initialized and 2 if configured
+
+  int frameNum;
+} VSMotionDetect;
+
+static const char vs_motiondetect_help[] = ""
+    "Overview:\n"
+    "    Generates a file with relative transform information\n"
+    "     (translation, rotation) about subsequent frames."
+    " See also transform.\n"
+    "Options\n"
+    "    'result'      path to the file used to write the transforms\n"
+    "                  (def:inputfile.stab)\n"
+    "    'shakiness'   how shaky is the video and how quick is the camera?\n"
+    "                  1: little (fast) 10: very strong/quick (slow) (def: 5)\n"
+    "    'accuracy'    accuracy of detection process (>=shakiness)\n"
+    "                  1: low (fast) 15: high (slow) (def: 9)\n"
+    "    'stepsize'    stepsize of search process, region around minimum \n"
+    "                  is scanned with 1 pixel resolution (def: 6)\n"
+    "    'mincontrast' below this contrast a field is discarded (0-1) (def: 0.3)\n"
+    "    'tripod'      virtual tripod mode (if >0): motion is compared to a \n"
+    "                  reference frame (frame # is the value) (def: 0)\n"
+    "    'show'        0: draw nothing (def); 1,2: show fields and transforms\n"
+    "                  in the resulting frames. Consider the 'preview' filter\n"
+    "    'help'        print this help message\n";
+
+
+/** returns the default config
+ */
+VSMotionDetectConfig vsMotionDetectGetDefaultConfig(const char* modName);
+
+/** initialized the VSMotionDetect structure and allocates memory
+ *  for the frames and stuff
+ *  @return VS_OK on success otherwise VS_ERROR
+ */
+int vsMotionDetectInit(VSMotionDetect* md, const VSMotionDetectConfig* conf,
+                       const VSFrameInfo* fi);
+
+/**
+ *  Performs a motion detection step
+ *  Only the new current frame is given. The last frame
+ *  is stored internally
+ *  @param motions: calculated local motions. (must be deleted manually)
+ * */
+int vsMotionDetection(VSMotionDetect* md, LocalMotions* motions, VSFrame *frame);
+
+/** Deletes internal data structures.
+ * In order to use the VSMotionDetect again, you have to call vsMotionDetectInit
+ */
+void vsMotionDetectionCleanup(VSMotionDetect* md);
+
+/// returns the current config
+void vsMotionDetectGetConfig(VSMotionDetectConfig* conf, const VSMotionDetect* md);
+
+/// returns the frame info
+const VSFrameInfo* vsMotionDetectGetFrameInfo(const VSMotionDetect* md);
+
+#endif  /* MOTIONDETECT_H */
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/motiondetect_internal.h b/src/motiondetect_internal.h
new file mode 100644
index 0000000..6e09c14
--- /dev/null
+++ b/src/motiondetect_internal.h
@@ -0,0 +1,93 @@
+/*
+ *  motiondetect_internal.h
+ *
+ *  Copyright (C) Georg Martius - February 2011
+ *   georg dot martius at web dot de
+ *  Copyright (C) Alexey Osipov - Jule 2011
+ *   simba at lerlan dot ru
+ *   speed optimizations (threshold, spiral, SSE, asm)
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef MOTIONDETECT_INTERNAL_H
+#define MOTIONDETECT_INTERNAL_H
+
+#include "motiondetect.h"
+
+/* type for a function that calculates the transformation of a certain field
+ */
+typedef LocalMotion (*calcFieldTransFunc)(VSMotionDetect*, VSMotionDetectFields*,
+                                          const Field*, int);
+
+/* type for a function that calculates the contrast of a certain field
+ */
+typedef double (*contrastSubImgFunc)(VSMotionDetect*, const Field*);
+
+
+int initFields(VSMotionDetect* md, VSMotionDetectFields* fs,
+               int fieldSize, int maxShift, int stepSize, short border,
+               int spacing, double contrastThreshold );
+
+double contrastSubImgPlanar(VSMotionDetect* md, const Field* field);
+double contrastSubImgPacked(VSMotionDetect* md, const Field* field);
+double contrastSubImg(unsigned char* const I, const Field* field,
+                      int width, int height, int bytesPerPixel);
+
+
+int cmp_contrast_idx(const void *ci1, const void* ci2);
+VSVector selectfields(VSMotionDetect* md, VSMotionDetectFields* fields,
+                      contrastSubImgFunc contrastfunc);
+
+LocalMotion calcFieldTransPlanar(VSMotionDetect* md, VSMotionDetectFields* fields,
+                                 const Field* field, int fieldnum);
+LocalMotion calcFieldTransPacked(VSMotionDetect* md, VSMotionDetectFields* fields,
+                                 const Field* field, int fieldnum);
+LocalMotions calcTransFields(VSMotionDetect* md, VSMotionDetectFields* fields,
+                             calcFieldTransFunc fieldfunc,
+                             contrastSubImgFunc contrastfunc);
+
+
+void drawFieldScanArea(VSMotionDetect* md, const LocalMotion* motion, int maxShift);
+void drawField(VSMotionDetect* md, const LocalMotion* motion, short box);
+void drawFieldTrans(VSMotionDetect* md, const LocalMotion* motion, int color);
+void drawBox(unsigned char* I, int width, int height, int bytesPerPixel,
+             int x, int y, int sizex, int sizey, unsigned char color);
+void drawRectangle(unsigned char* I, int width, int height, int bytesPerPixel,
+                   int x, int y, int sizex, int sizey, unsigned char color);
+
+void drawLine(unsigned char* I, int width, int height, int bytesPerPixel,
+              Vec* a, Vec* b, int thickness, unsigned char color);
+
+unsigned int compareSubImg_thr(unsigned char* const I1, unsigned char* const I2,
+                               const Field* field, int width1, int width2, int height,
+                               int bytesPerPixel,
+                               int d_x, int d_y, unsigned int threshold);
+
+#endif  /* MOTIONDETECT_INTERNAL_H */
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/motiondetect_opt.c b/src/motiondetect_opt.c
new file mode 100644
index 0000000..de63e3b
--- /dev/null
+++ b/src/motiondetect_opt.c
@@ -0,0 +1,422 @@
+/*
+ * motiondetect_opt.c
+ *
+ *  Copyright (C) Georg Martius - February 1007-2012
+ *   georg dot martius at web dot de
+ *  Copyright (C) Alexey Osipov - Jule 2011
+ *   simba at lerlan dot ru
+ *   speed optimizations (threshold, spiral, SSE, asm)
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include "motiondetect_opt.h"
+
+#ifdef USE_ORC
+#include "orc/motiondetectorc.h"
+#endif
+
+#ifdef USE_SSE2
+#include <emmintrin.h>
+
+#define USE_SSE2_CMP_HOR
+#define SSE2_CMP_SUM_ROWS 8
+#endif
+
+#ifdef USE_SSE2
+/**
+   \see contrastSubImg using SSE2 optimization, Planar (1 byte per channel) only
+*/
+double contrastSubImg1_SSE(unsigned char* const I, const Field* field,
+                           int width, int height)
+{
+  int k, j;
+  unsigned char* p = NULL;
+  int s2 = field->size / 2;
+
+  static unsigned char full[16] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
+
+  p = I + ((field->x - s2) + (field->y - s2)*width);
+
+  __m128i mmin, mmax;
+
+  mmin = _mm_loadu_si128((__m128i const*)full);
+  mmax = _mm_setzero_si128();
+
+  for (j = 0; j < field->size; j++){
+    for (k = 0; k < field->size; k += 16) {
+      __m128i xmm0;
+      xmm0 = _mm_loadu_si128((__m128i const*)p);
+      mmin = _mm_min_epu8(mmin, xmm0);
+      mmax = _mm_max_epu8(mmax, xmm0);
+      p += 16;
+    }
+    p += (width - field->size);
+  }
+
+  __m128i xmm1;
+  xmm1 = _mm_srli_si128(mmin, 8);
+  mmin = _mm_min_epu8(mmin, xmm1);
+  xmm1 = _mm_srli_si128(mmin, 4);
+  mmin = _mm_min_epu8(mmin, xmm1);
+  xmm1 = _mm_srli_si128(mmin, 2);
+  mmin = _mm_min_epu8(mmin, xmm1);
+  xmm1 = _mm_srli_si128(mmin, 1);
+  mmin = _mm_min_epu8(mmin, xmm1);
+  unsigned char mini = (unsigned char)_mm_extract_epi16(mmin, 0);
+
+  xmm1 = _mm_srli_si128(mmax, 8);
+  mmax = _mm_max_epu8(mmax, xmm1);
+  xmm1 = _mm_srli_si128(mmax, 4);
+  mmax = _mm_max_epu8(mmax, xmm1);
+  xmm1 = _mm_srli_si128(mmax, 2);
+  mmax = _mm_max_epu8(mmax, xmm1);
+  xmm1 = _mm_srli_si128(mmax, 1);
+  mmax = _mm_max_epu8(mmax, xmm1);
+  unsigned char maxi = (unsigned char)_mm_extract_epi16(mmax, 0);
+
+  return (maxi-mini)/(maxi+mini+0.1); // +0.1 to avoid division by 0
+}
+#endif
+
+#ifdef USE_ORC
+/**
+   calculates the contrast in the given small part of the given image
+   using the absolute difference from mean luminance (like Root-Mean-Square,
+   but with abs() (Manhattan-Norm))
+   For multichannel images use contrastSubImg_Michelson()
+
+   \param I pointer to framebuffer
+   \param field Field specifies position(center) and size of subimage
+   \param width width of frame
+   \param height height of frame
+*/
+double contrastSubImg_variance_orc(unsigned char* const I, const Field* field,
+                                   int width, int height) {
+  unsigned char* p = NULL;
+  int s2 = field->size / 2;
+  int numpixel = field->size*field->size;
+
+  p = I + ((field->x - s2) + (field->y - s2) * width);
+
+  unsigned int sum=0;
+  image_sum_optimized((signed int*)&sum, p, width, field->size, field->size);
+  unsigned char mean = sum / numpixel;
+  int var=0;
+  image_variance_optimized(&var, p, width, mean, field->size, field->size);
+  return (double)var/numpixel/255.0;
+}
+
+/// plain C implementation of variance based contrastSubImg (without ORC)
+double contrastSubImg_variance_C(unsigned char* const I,
+                                 const Field* field, int width, int height) {
+  int k, j;
+  unsigned char* p = NULL;
+  unsigned char* pstart = NULL;
+  int s2 = field->size / 2;
+  unsigned int sum=0;
+  int mean;
+  int var=0;
+  int numpixel = field->size*field->size;
+
+  pstart = I + ((field->x - s2) + (field->y - s2) * width);
+  p = pstart;
+  for (j = 0; j < field->size; j++) {
+    for (k = 0; k < field->size; k++, p++) {
+      sum+=*p;
+    }
+    p += (width - field->size);
+  }
+  mean=sum/numpixel;
+  p = pstart;
+  for (j = 0; j < field->size; j++) {
+    for (k = 0; k < field->size; k++, p++) {
+      var+=abs(*p-mean);
+    }
+    p += (width - field->size);
+  }
+  return (double)var/numpixel/255.0;
+}
+#endif
+
+
+
+
+
+
+#ifdef USE_ORC
+/**
+   compares a small part of two given images
+   and returns the average absolute difference.
+   Field center, size and shift have to be choosen,
+   so that no clipping is required.
+   Uses optimized inner loops by ORC.
+
+   \param field Field specifies position(center) and size of subimage
+   \param d_x shift in x direction
+   \param d_y shift in y direction
+*/
+unsigned int compareSubImg_thr_orc(unsigned char* const I1, unsigned char* const I2,
+                                   const Field* field, int width1, int width2, int height,
+                                   int bytesPerPixel, int d_x, int d_y,
+                                   unsigned int threshold) {
+  unsigned char* p1 = NULL;
+  unsigned char* p2 = NULL;
+  int s2 = field->size / 2;
+  int j;
+  unsigned int sum = 0;
+  p1 = I1 + ((field->x - s2) + (field->y - s2) * width1) * bytesPerPixel;
+  p2 = I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y) * width2) * bytesPerPixel;
+
+  for (j = 0; j < field->size; j++) {
+    unsigned int s = 0;
+    image_line_difference_optimized(&s, p1, p2, field->size* bytesPerPixel);
+    sum += s;
+    if( sum > threshold) // no need to calculate any longer: worse than the best match
+      break;
+    p1 += width1 * bytesPerPixel;
+    p2 += width2 * bytesPerPixel;
+  }
+
+
+  return sum;
+}
+
+// implementation with 1 orc function, but no threshold
+unsigned int compareSubImg_orc(unsigned char* const I1, unsigned char* const I2,
+                               const Field* field, int width1, int width2, int height,
+                               int bytesPerPixel, int d_x, int d_y,
+                               unsigned int threshold) {
+  unsigned char* p1 = NULL;
+  unsigned char* p2 = NULL;
+  int s2 = field->size / 2;
+  unsigned int sum=0;
+  p1 = I1 + ((field->x - s2) + (field->y - s2) * width1) * bytesPerPixel;
+  p2 = I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y) * width2)
+    * bytesPerPixel;
+
+  image_difference_optimized(&sum, p1, width1 * bytesPerPixel, p2, width2 * bytesPerPixel,
+                             field->size* bytesPerPixel , field->size);
+  return sum;
+}
+#endif
+
+#ifdef USE_SSE2
+unsigned int compareSubImg_thr_sse2(unsigned char* const I1, unsigned char* const I2,
+                                    const Field* field,
+                                    int width1, int width2, int height,
+                                    int bytesPerPixel, int d_x, int d_y,
+                                    unsigned int treshold) {
+  int k, j;
+  unsigned char* p1 = NULL;
+  unsigned char* p2 = NULL;
+  int s2 = field->size / 2;
+  unsigned int sum = 0;
+
+  static unsigned char mask[16] = {0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00};
+  unsigned char row = 0;
+#ifndef USE_SSE2_CMP_HOR
+  unsigned char summes[16];
+  int i;
+#endif
+  __m128i xmmsum, xmmmask;
+  xmmsum = _mm_setzero_si128();
+  xmmmask = _mm_loadu_si128((__m128i const*)mask);
+
+  p1=I1 + ((field->x - s2) + (field->y - s2)*width1)*bytesPerPixel;
+  p2=I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y)*width2)*bytesPerPixel;
+  for (j = 0; j < field->size; j++){
+    for (k = 0; k < field->size * bytesPerPixel; k+=16){
+      {
+        __m128i xmm0, xmm1, xmm2;
+        xmm0 = _mm_loadu_si128((__m128i const *)p1);
+        xmm1 = _mm_loadu_si128((__m128i const *)p2);
+
+        xmm2 = _mm_subs_epu8(xmm0, xmm1);
+        xmm0 = _mm_subs_epu8(xmm1, xmm0);
+        xmm0 = _mm_adds_epu8(xmm0, xmm2);
+
+        xmm1 = _mm_and_si128(xmm0, xmmmask);
+        xmm0 = _mm_srli_si128(xmm0, 1);
+        xmm0 = _mm_and_si128(xmm0, xmmmask);
+
+        xmmsum = _mm_adds_epu16(xmmsum, xmm0);
+        xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+      }
+
+      p1+=16;
+      p2+=16;
+
+      row++;
+      if (row == SSE2_CMP_SUM_ROWS) {
+        row = 0;
+#ifdef USE_SSE2_CMP_HOR
+        {
+          __m128i xmm1;
+
+          xmm1 = _mm_srli_si128(xmmsum, 8);
+          xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+          xmm1 = _mm_srli_si128(xmmsum, 4);
+          xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+          xmm1 = _mm_srli_si128(xmmsum, 2);
+          xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+          sum += _mm_extract_epi16(xmmsum, 0);
+        }
+#else
+        _mm_storeu_si128((__m128i*)summes, xmmsum);
+        for(i = 0; i < 16; i+=2)
+          sum += summes[i] + summes[i+1]*256;
+#endif
+        xmmsum = _mm_setzero_si128();
+      }
+    }
+    if (sum > treshold)
+      break;
+    p1 += (width1 - field->size) * bytesPerPixel;
+    p2 += (width2 - field->size) * bytesPerPixel;
+  }
+
+#if (SSE2_CMP_SUM_ROWS != 1) && (SSE2_CMP_SUM_ROWS != 2) && (SSE2_CMP_SUM_ROWS != 4) \
+  && (SSE2_CMP_SUM_ROWS != 8) && (SSE2_CMP_SUM_ROWS != 16)
+  //process all data left unprocessed
+  //this part can be safely ignored if
+  //SSE_SUM_ROWS = {1, 2, 4, 8, 16}
+#ifdef USE_SSE2_CMP_HOR
+  {
+    __m128i xmm1;
+
+    xmm1 = _mm_srli_si128(xmmsum, 8);
+    xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+    xmm1 = _mm_srli_si128(xmmsum, 4);
+    xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+    xmm1 = _mm_srli_si128(xmmsum, 2);
+    xmmsum = _mm_adds_epu16(xmmsum, xmm1);
+
+    sum += _mm_extract_epi16(xmmsum, 0);
+  }
+#else
+  _mm_storeu_si128((__m128i*)summes, xmmsum);
+  for(i = 0; i < 16; i+=2)
+    sum += summes[i] + summes[i+1]*256;
+#endif
+#endif
+
+  return sum;
+}
+#endif // USE_SSE2
+
+#ifdef USE_SSE2_ASM
+unsigned int compareSubImg_thr_sse2_asm(unsigned char* const I1, unsigned char* const I2,
+                                        const Field* field,
+                                        int width1, int width2, int height,
+                                        int bytesPerPixel, int d_x, int d_y,
+                                        unsigned int treshold) {
+  unsigned char* p1 = NULL;
+  unsigned char* p2 = NULL;
+  int s2 = field->size / 2;
+  unsigned int sum = 0;
+
+  static unsigned char mask[16] = {0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00, 0xFF, 0x00};
+  p1=I1 + ((field->x - s2) + (field->y - s2)*width1)*bytesPerPixel;
+  p2=I2 + ((field->x - s2 + d_x) + (field->y - s2 + d_y)*width2)*bytesPerPixel;
+  asm (
+    "xor %0,%0\n"
+    "pxor %%xmm4,%%xmm4\n"         //8 x 16bit partial sums
+    "movdqu (%3),%%xmm3\n"         //mask
+
+    //main loop
+    "movl %4,%%edx\n"              //edx = field->size * bytesPerPixel / 16
+    "mov $8,%%ecx\n"               //cx = 8
+    "1:\n"
+
+    //calc intermediate sum of abs differences for 16 bytes
+    "movdqu (%1),%%xmm0\n"       //p1
+    "movdqu (%2),%%xmm1\n"       //p2
+    "movdqu %%xmm0,%%xmm2\n"     //xmm2 = xmm0
+    "psubusb %%xmm1,%%xmm0\n"    //xmm0 = xmm0 - xmm1 (by bytes)
+    "psubusb %%xmm2,%%xmm1\n"    //xmm1 = xmm1 - xmm2 (by bytes)
+    "paddusb %%xmm1,%%xmm0\n"    //xmm0 = xmm0 + xmm1 (absolute difference)
+    "movdqu %%xmm0,%%xmm2\n"     //xmm2 = xmm0
+    "pand %%xmm3,%%xmm2\n"       //xmm2 = xmm2 & xmm3 (apply mask)
+    "psrldq $1,%%xmm0\n"         //xmm0 = xmm0 >> 8 (shift by 1 byte)
+    "pand %%xmm3,%%xmm0\n"       //xmm0 = xmm0 & xmm3 (apply mask)
+    "paddusw %%xmm0,%%xmm4\n"    //xmm4 = xmm4 + xmm0 (by words)
+    "paddusw %%xmm2,%%xmm4\n"    //xmm4 = xmm4 + xmm2 (by words)
+
+    "add $16,%1\n"               //move to next 16 bytes (p1)
+    "add $16,%2\n"               //move to next 16 bytes (p2)
+
+    //check if we need flush sum (i.e. xmm4 is about to saturate)
+    "dec %%ecx\n"
+    "jnz 2f\n"                   //skip flushing if not
+    //flushing...
+    "movdqu %%xmm4,%%xmm0\n"
+    "psrldq $8,%%xmm0\n"
+    "paddusw %%xmm0,%%xmm4\n"
+    "movdqu %%xmm4,%%xmm0\n"
+    "psrldq $4,%%xmm0\n"
+    "paddusw %%xmm0,%%xmm4\n"
+    "movdqu %%xmm4,%%xmm0\n"
+    "psrldq $2,%%xmm0\n"
+    "paddusw %%xmm0,%%xmm4\n"
+    "movd %%xmm4,%%ecx\n"
+    "and $0xFFFF,%%ecx\n"
+    "addl %%ecx,%0\n"
+    "pxor %%xmm4,%%xmm4\n"       //clearing xmm4
+    "mov $8,%%ecx\n"             //cx = 8
+
+    //check if we need to go to another line
+    "2:\n"
+    "dec %%edx\n"
+    "jnz 1b\n"                   //skip if not
+
+    //move p1 and p2 to the next line
+    "add %5,%1\n"
+    "add %5,%2\n"
+    "cmp %7,%0\n"                //if (sum > treshold)
+    "ja 3f\n"                    //    break;
+    "movl %4,%%edx\n"
+
+    //check if all lines done
+    "decl %6\n"
+    "jnz 1b\n"                   //if not, continue looping
+    "3:\n"
+    :"=r"(sum)
+    :"r"(p1),"r"(p2),"r"(mask),"g"(field->size * bytesPerPixel / 16),"g"((unsigned char*)((width1 - field->size) * bytesPerPixel)),"g"(field->size), "g"(treshold), "0"(sum)
+    :"%xmm0","%xmm1","%xmm2","%xmm3","%xmm4","%ecx","%edx"
+    );
+  // TODO width2 is not properly used here
+  return sum;
+}
+#endif // USE_SSE2_ASM
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   tab-width:  2
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/motiondetect_opt.h b/src/motiondetect_opt.h
new file mode 100644
index 0000000..75fa6c9
--- /dev/null
+++ b/src/motiondetect_opt.h
@@ -0,0 +1,96 @@
+/*
+ *  motiondetect_opt.h
+ *
+ *  Copyright (C) Georg Martius - February 2011
+ *   georg dot martius at web dot de
+ *  Copyright (C) Alexey Osipov - Jule 2011
+ *   simba at lerlan dot ru
+ *   speed optimizations (threshold, spiral, SSE, asm)
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef MOTIONDETECT_OPT_H
+#define MOTIONDETECT_OPT_H
+
+#include "motiondetect.h"
+
+#ifdef USE_SSE2_ASM //enable SSE2 inline asm code
+#define compareSubImg compareSubImg_thr_sse2_asm
+#elif defined(USE_SSE2)      //enable SSE2 code
+#define compareSubImg compareSubImg_thr_sse2
+#elif defined(USE_ORC)
+#define compareSubImg compareSubImg_thr_orc
+#else
+#define compareSubImg compareSubImg_thr
+#endif
+
+#ifdef USE_SSE2
+double contrastSubImg1_SSE(unsigned char* const I, const Field* field,
+                           int width, int height);
+#endif
+
+#ifdef USE_ORC
+double contrastSubImg_variance_orc(unsigned char* const I, const Field* field,
+                          int width, int height);
+double contrastSubImg_variance_C(unsigned char* const I, const Field* field,
+                        int width, int height);
+
+#endif
+
+#ifdef USE_ORC
+unsigned int compareSubImg_orc(unsigned char* const I1, unsigned char* const I2,
+                               const Field* field, int width1, int width2, int height,
+                               int bytesPerPixel, int d_x, int d_y,
+                               unsigned int threshold);
+
+
+unsigned int compareSubImg_thr_orc(unsigned char* const I1, unsigned char* const I2,
+                                   const Field* field, int width1, int width2, int height,
+                                   int bytesPerPixel, int d_x, int d_y,
+                                   unsigned int threshold);
+#endif
+
+#ifdef USE_SSE2
+unsigned int compareSubImg_thr_sse2(unsigned char* const I1, unsigned char* const I2,
+                                    const Field* field, int width1, int width2, int height,
+                                    int bytesPerPixel, int d_x, int d_y,
+                                    unsigned int threshold);
+#endif
+
+#ifdef USE_SSE2_ASM
+unsigned int compareSubImg_thr_sse2_asm(unsigned char* const I1, unsigned char* const I2,
+                                        const Field* field, int width1, int width2,
+                                        int height, int bytesPerPixel,
+                                        int d_x, int d_y, unsigned int threshold);
+#endif
+
+#endif  /* MOTIONDETECT_OPT_H */
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   tab-width:  2
+ *   c-basic-offset: 2 t
+
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/orc/Makefile b/src/orc/Makefile
new file mode 100644
index 0000000..fc63d87
--- /dev/null
+++ b/src/orc/Makefile
@@ -0,0 +1,14 @@
+
+all: motiondetectorc.c motiondetectorc.h transformorc.c transformorc.h
+
+motiondetectorc.c : motiondetectorc.orc
+	orcc --implementation -o motiondetectorc.c motiondetectorc.orc
+
+motiondetectorc.h : motiondetectorc.orc
+	orcc --header -o motiondetectorc.h motiondetectorc.orc
+
+transformorc.c : transformorc.orc
+	orcc --implementation -o transformorc.c transformorc.orc
+
+transformorc.h : transformorc.orc
+	orcc --header -o transformorc.h transformorc.orc
\ No newline at end of file
diff --git a/src/orc/motiondetectorc.c b/src/orc/motiondetectorc.c
new file mode 100644
index 0000000..e7329d7
--- /dev/null
+++ b/src/orc/motiondetectorc.c
@@ -0,0 +1,539 @@
+
+/* autogenerated from motiondetectorc.orc */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _ORC_INTEGER_TYPEDEFS_
+#define _ORC_INTEGER_TYPEDEFS_
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#include <stdint.h>
+typedef int8_t orc_int8;
+typedef int16_t orc_int16;
+typedef int32_t orc_int32;
+typedef int64_t orc_int64;
+typedef uint8_t orc_uint8;
+typedef uint16_t orc_uint16;
+typedef uint32_t orc_uint32;
+typedef uint64_t orc_uint64;
+#define ORC_UINT64_C(x) UINT64_C(x)
+#elif defined(_MSC_VER)
+typedef signed __int8 orc_int8;
+typedef signed __int16 orc_int16;
+typedef signed __int32 orc_int32;
+typedef signed __int64 orc_int64;
+typedef unsigned __int8 orc_uint8;
+typedef unsigned __int16 orc_uint16;
+typedef unsigned __int32 orc_uint32;
+typedef unsigned __int64 orc_uint64;
+#define ORC_UINT64_C(x) (x##Ui64)
+#define inline __inline
+#else
+#include <limits.h>
+typedef signed char orc_int8;
+typedef short orc_int16;
+typedef int orc_int32;
+typedef unsigned char orc_uint8;
+typedef unsigned short orc_uint16;
+typedef unsigned int orc_uint32;
+#if INT_MAX == LONG_MAX
+typedef long long orc_int64;
+typedef unsigned long long orc_uint64;
+#define ORC_UINT64_C(x) (x##ULL)
+#else
+typedef long orc_int64;
+typedef unsigned long orc_uint64;
+#define ORC_UINT64_C(x) (x##UL)
+#endif
+#endif
+typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
+typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
+typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
+#endif
+#ifndef ORC_RESTRICT
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define ORC_RESTRICT restrict
+#elif defined(__GNUC__) && __GNUC__ >= 4
+#define ORC_RESTRICT __restrict__
+#else
+#define ORC_RESTRICT
+#endif
+#endif
+
+#ifndef DISABLE_ORC
+#include <orc/orc.h>
+#endif
+void image_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m);
+void image_line_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int n);
+void image_sum_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int n, int m);
+void image_variance_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int p2, int n, int m);
+
+
+/* begin Orc C target preamble */
+#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
+#define ORC_ABS(a) ((a)<0 ? -(a) : (a))
+#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
+#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
+#define ORC_SB_MAX 127
+#define ORC_SB_MIN (-1-ORC_SB_MAX)
+#define ORC_UB_MAX 255
+#define ORC_UB_MIN 0
+#define ORC_SW_MAX 32767
+#define ORC_SW_MIN (-1-ORC_SW_MAX)
+#define ORC_UW_MAX 65535
+#define ORC_UW_MIN 0
+#define ORC_SL_MAX 2147483647
+#define ORC_SL_MIN (-1-ORC_SL_MAX)
+#define ORC_UL_MAX 4294967295U
+#define ORC_UL_MIN 0
+#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
+#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
+#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
+#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
+#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
+#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
+#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))
+#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))
+#define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
+#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
+#define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
+#define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
+#define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
+#define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
+#ifndef ORC_RESTRICT
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define ORC_RESTRICT restrict
+#elif defined(__GNUC__) && __GNUC__ >= 4
+#define ORC_RESTRICT __restrict__
+#else
+#define ORC_RESTRICT
+#endif
+#endif
+/* end Orc C target preamble */
+
+
+
+/* image_difference_optimized */
+#ifdef DISABLE_ORC
+void
+image_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m){
+  int i;
+  int j;
+  const orc_int8 * ORC_RESTRICT ptr4;
+  const orc_int8 * ORC_RESTRICT ptr5;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var32;
+  orc_int8 var33;
+
+  for (j = 0; j < m; j++) {
+    ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
+    ptr5 = ORC_PTR_OFFSET(s2, s2_stride * j);
+
+
+    for (i = 0; i < n; i++) {
+      /* 0: loadb */
+      var32 = ptr4[i];
+      /* 1: loadb */
+      var33 = ptr5[i];
+      /* 2: accsadubl */
+      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
+    }
+  }
+  *a1 = var12.i;
+
+}
+
+#else
+static void
+_backup_image_difference_optimized (OrcExecutor * ORC_RESTRICT ex)
+{
+  int i;
+  int j;
+  int n = ex->n;
+  int m = ex->params[ORC_VAR_A1];
+  const orc_int8 * ORC_RESTRICT ptr4;
+  const orc_int8 * ORC_RESTRICT ptr5;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var32;
+  orc_int8 var33;
+
+  for (j = 0; j < m; j++) {
+    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
+    ptr5 = ORC_PTR_OFFSET(ex->arrays[5], ex->params[5] * j);
+
+
+    for (i = 0; i < n; i++) {
+      /* 0: loadb */
+      var32 = ptr4[i];
+      /* 1: loadb */
+      var33 = ptr5[i];
+      /* 2: accsadubl */
+      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
+    }
+  }
+  ex->accumulators[0] = var12.i;
+
+}
+
+void
+image_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m)
+{
+  OrcExecutor _ex, *ex = &_ex;
+  static int p_inited = 0;
+  static OrcCode *c = 0;
+  void (*func) (OrcExecutor *);
+
+  if (!p_inited) {
+    orc_once_mutex_lock ();
+    if (!p_inited) {
+      OrcProgram *p;
+
+      p = orc_program_new ();
+      orc_program_set_2d (p);
+      orc_program_set_name (p, "image_difference_optimized");
+      orc_program_set_backup_function (p, _backup_image_difference_optimized);
+      orc_program_add_source (p, 1, "s1");
+      orc_program_add_source (p, 1, "s2");
+      orc_program_add_accumulator (p, 4, "a1");
+
+      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
+
+      orc_program_compile (p);
+      c = orc_program_take_code (p);
+      orc_program_free (p);
+    }
+    p_inited = TRUE;
+    orc_once_mutex_unlock ();
+  }
+  ex->arrays[ORC_VAR_A2] = c;
+  ex->program = 0;
+
+  ex->n = n;
+  ORC_EXECUTOR_M(ex) = m;
+  ex->arrays[ORC_VAR_S1] = (void *)s1;
+  ex->params[ORC_VAR_S1] = s1_stride;
+  ex->arrays[ORC_VAR_S2] = (void *)s2;
+  ex->params[ORC_VAR_S2] = s2_stride;
+
+  func = c->exec;
+  func (ex);
+  *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
+}
+#endif
+
+
+/* image_line_difference_optimized */
+#ifdef DISABLE_ORC
+void
+image_line_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int n){
+  int i;
+  const orc_int8 * ORC_RESTRICT ptr4;
+  const orc_int8 * ORC_RESTRICT ptr5;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var32;
+  orc_int8 var33;
+
+  ptr4 = (orc_int8 *)s1;
+  ptr5 = (orc_int8 *)s2;
+
+
+  for (i = 0; i < n; i++) {
+    /* 0: loadb */
+    var32 = ptr4[i];
+    /* 1: loadb */
+    var33 = ptr5[i];
+    /* 2: accsadubl */
+    var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
+  }
+  *a1 = var12.i;
+
+}
+
+#else
+static void
+_backup_image_line_difference_optimized (OrcExecutor * ORC_RESTRICT ex)
+{
+  int i;
+  int n = ex->n;
+  const orc_int8 * ORC_RESTRICT ptr4;
+  const orc_int8 * ORC_RESTRICT ptr5;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var32;
+  orc_int8 var33;
+
+  ptr4 = (orc_int8 *)ex->arrays[4];
+  ptr5 = (orc_int8 *)ex->arrays[5];
+
+
+  for (i = 0; i < n; i++) {
+    /* 0: loadb */
+    var32 = ptr4[i];
+    /* 1: loadb */
+    var33 = ptr5[i];
+    /* 2: accsadubl */
+    var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
+  }
+  ex->accumulators[0] = var12.i;
+
+}
+
+void
+image_line_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int n)
+{
+  OrcExecutor _ex, *ex = &_ex;
+  static int p_inited = 0;
+  static OrcCode *c = 0;
+  void (*func) (OrcExecutor *);
+
+  if (!p_inited) {
+    orc_once_mutex_lock ();
+    if (!p_inited) {
+      OrcProgram *p;
+
+      p = orc_program_new ();
+      orc_program_set_name (p, "image_line_difference_optimized");
+      orc_program_set_backup_function (p, _backup_image_line_difference_optimized);
+      orc_program_add_source (p, 1, "s1");
+      orc_program_add_source (p, 1, "s2");
+      orc_program_add_accumulator (p, 4, "a1");
+
+      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_S2, ORC_VAR_D1);
+
+      orc_program_compile (p);
+      c = orc_program_take_code (p);
+      orc_program_free (p);
+    }
+    p_inited = TRUE;
+    orc_once_mutex_unlock ();
+  }
+  ex->arrays[ORC_VAR_A2] = c;
+  ex->program = 0;
+
+  ex->n = n;
+  ex->arrays[ORC_VAR_S1] = (void *)s1;
+  ex->arrays[ORC_VAR_S2] = (void *)s2;
+
+  func = c->exec;
+  func (ex);
+  *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
+}
+#endif
+
+
+/* image_sum_optimized */
+#ifdef DISABLE_ORC
+void
+image_sum_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int n, int m){
+  int i;
+  int j;
+  const orc_int8 * ORC_RESTRICT ptr4;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var34;
+  orc_union16 var35;
+  orc_union32 var36;
+
+  for (j = 0; j < m; j++) {
+    ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
+
+
+    for (i = 0; i < n; i++) {
+      /* 0: loadb */
+      var34 = ptr4[i];
+      /* 1: convubw */
+      var35.i = (orc_uint8)var34;
+      /* 2: convuwl */
+      var36.i = (orc_uint16)var35.i;
+      /* 3: accl */
+      var12.i = var12.i + var36.i;
+    }
+  }
+  *a1 = var12.i;
+
+}
+
+#else
+static void
+_backup_image_sum_optimized (OrcExecutor * ORC_RESTRICT ex)
+{
+  int i;
+  int j;
+  int n = ex->n;
+  int m = ex->params[ORC_VAR_A1];
+  const orc_int8 * ORC_RESTRICT ptr4;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var34;
+  orc_union16 var35;
+  orc_union32 var36;
+
+  for (j = 0; j < m; j++) {
+    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
+
+
+    for (i = 0; i < n; i++) {
+      /* 0: loadb */
+      var34 = ptr4[i];
+      /* 1: convubw */
+      var35.i = (orc_uint8)var34;
+      /* 2: convuwl */
+      var36.i = (orc_uint16)var35.i;
+      /* 3: accl */
+      var12.i = var12.i + var36.i;
+    }
+  }
+  ex->accumulators[0] = var12.i;
+
+}
+
+void
+image_sum_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int n, int m)
+{
+  OrcExecutor _ex, *ex = &_ex;
+  static int p_inited = 0;
+  static OrcCode *c = 0;
+  void (*func) (OrcExecutor *);
+
+  if (!p_inited) {
+    orc_once_mutex_lock ();
+    if (!p_inited) {
+      OrcProgram *p;
+
+      p = orc_program_new ();
+      orc_program_set_2d (p);
+      orc_program_set_name (p, "image_sum_optimized");
+      orc_program_set_backup_function (p, _backup_image_sum_optimized);
+      orc_program_add_source (p, 1, "s1");
+      orc_program_add_accumulator (p, 4, "a1");
+      orc_program_add_temporary (p, 2, "t1");
+      orc_program_add_temporary (p, 4, "t2");
+
+      orc_program_append_2 (p, "convubw", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, ORC_VAR_D1);
+      orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
+      orc_program_append_2 (p, "accl", 0, ORC_VAR_A1, ORC_VAR_T2, ORC_VAR_D1, ORC_VAR_D1);
+
+      orc_program_compile (p);
+      c = orc_program_take_code (p);
+      orc_program_free (p);
+    }
+    p_inited = TRUE;
+    orc_once_mutex_unlock ();
+  }
+  ex->arrays[ORC_VAR_A2] = c;
+  ex->program = 0;
+
+  ex->n = n;
+  ORC_EXECUTOR_M(ex) = m;
+  ex->arrays[ORC_VAR_S1] = (void *)s1;
+  ex->params[ORC_VAR_S1] = s1_stride;
+
+  func = c->exec;
+  func (ex);
+  *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
+}
+#endif
+
+
+/* image_variance_optimized */
+#ifdef DISABLE_ORC
+void
+image_variance_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int p2, int n, int m){
+  int i;
+  int j;
+  const orc_int8 * ORC_RESTRICT ptr4;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var32;
+  orc_int8 var33;
+
+  for (j = 0; j < m; j++) {
+    ptr4 = ORC_PTR_OFFSET(s1, s1_stride * j);
+
+      /* 1: loadpb */
+        var33 = p2;
+
+    for (i = 0; i < n; i++) {
+      /* 0: loadb */
+      var32 = ptr4[i];
+      /* 2: accsadubl */
+      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
+    }
+  }
+  *a1 = var12.i;
+
+}
+
+#else
+static void
+_backup_image_variance_optimized (OrcExecutor * ORC_RESTRICT ex)
+{
+  int i;
+  int j;
+  int n = ex->n;
+  int m = ex->params[ORC_VAR_A1];
+  const orc_int8 * ORC_RESTRICT ptr4;
+  orc_union32 var12 =  { 0 };
+  orc_int8 var32;
+  orc_int8 var33;
+
+  for (j = 0; j < m; j++) {
+    ptr4 = ORC_PTR_OFFSET(ex->arrays[4], ex->params[4] * j);
+
+      /* 1: loadpb */
+        var33 = ex->params[25];
+
+    for (i = 0; i < n; i++) {
+      /* 0: loadb */
+      var32 = ptr4[i];
+      /* 2: accsadubl */
+      var12.i = var12.i + ORC_ABS((orc_int32)(orc_uint8)var32 - (orc_int32)(orc_uint8)var33);
+    }
+  }
+  ex->accumulators[0] = var12.i;
+
+}
+
+void
+image_variance_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int p2, int n, int m)
+{
+  OrcExecutor _ex, *ex = &_ex;
+  static int p_inited = 0;
+  static OrcCode *c = 0;
+  void (*func) (OrcExecutor *);
+
+  if (!p_inited) {
+    orc_once_mutex_lock ();
+    if (!p_inited) {
+      OrcProgram *p;
+
+      p = orc_program_new ();
+      orc_program_set_2d (p);
+      orc_program_set_name (p, "image_variance_optimized");
+      orc_program_set_backup_function (p, _backup_image_variance_optimized);
+      orc_program_add_source (p, 1, "s1");
+      orc_program_add_accumulator (p, 4, "a1");
+      orc_program_add_parameter (p, 1, "p2");
+
+      orc_program_append_2 (p, "accsadubl", 0, ORC_VAR_A1, ORC_VAR_S1, ORC_VAR_P2, ORC_VAR_D1);
+
+      orc_program_compile (p);
+      c = orc_program_take_code (p);
+      orc_program_free (p);
+    }
+    p_inited = TRUE;
+    orc_once_mutex_unlock ();
+  }
+  ex->arrays[ORC_VAR_A2] = c;
+  ex->program = 0;
+
+  ex->n = n;
+  ORC_EXECUTOR_M(ex) = m;
+  ex->arrays[ORC_VAR_S1] = (void *)s1;
+  ex->params[ORC_VAR_S1] = s1_stride;
+  ex->params[ORC_VAR_P2] = p2;
+
+  func = c->exec;
+  func (ex);
+  *a1 = orc_executor_get_accumulator (ex, ORC_VAR_A1);
+}
+#endif
+
+
diff --git a/src/orc/motiondetectorc.h b/src/orc/motiondetectorc.h
new file mode 100644
index 0000000..1c2c7ae
--- /dev/null
+++ b/src/orc/motiondetectorc.h
@@ -0,0 +1,79 @@
+
+/* autogenerated from motiondetectorc.orc */
+
+#ifndef _MOTIONDETECTORC_H_
+#define _MOTIONDETECTORC_H_
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+
+#ifndef _ORC_INTEGER_TYPEDEFS_
+#define _ORC_INTEGER_TYPEDEFS_
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#include <stdint.h>
+typedef int8_t orc_int8;
+typedef int16_t orc_int16;
+typedef int32_t orc_int32;
+typedef int64_t orc_int64;
+typedef uint8_t orc_uint8;
+typedef uint16_t orc_uint16;
+typedef uint32_t orc_uint32;
+typedef uint64_t orc_uint64;
+#define ORC_UINT64_C(x) UINT64_C(x)
+#elif defined(_MSC_VER)
+typedef signed __int8 orc_int8;
+typedef signed __int16 orc_int16;
+typedef signed __int32 orc_int32;
+typedef signed __int64 orc_int64;
+typedef unsigned __int8 orc_uint8;
+typedef unsigned __int16 orc_uint16;
+typedef unsigned __int32 orc_uint32;
+typedef unsigned __int64 orc_uint64;
+#define ORC_UINT64_C(x) (x##Ui64)
+#define inline __inline
+#else
+#include <limits.h>
+typedef signed char orc_int8;
+typedef short orc_int16;
+typedef int orc_int32;
+typedef unsigned char orc_uint8;
+typedef unsigned short orc_uint16;
+typedef unsigned int orc_uint32;
+#if INT_MAX == LONG_MAX
+typedef long long orc_int64;
+typedef unsigned long long orc_uint64;
+#define ORC_UINT64_C(x) (x##ULL)
+#else
+typedef long orc_int64;
+typedef unsigned long orc_uint64;
+#define ORC_UINT64_C(x) (x##UL)
+#endif
+#endif
+typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
+typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
+typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
+#endif
+#ifndef ORC_RESTRICT
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define ORC_RESTRICT restrict
+#elif defined(__GNUC__) && __GNUC__ >= 4
+#define ORC_RESTRICT __restrict__
+#else
+#define ORC_RESTRICT
+#endif
+#endif
+void image_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, const orc_uint8 * ORC_RESTRICT s2, int s2_stride, int n, int m);
+void image_line_difference_optimized (orc_uint32 * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, const orc_uint8 * ORC_RESTRICT s2, int n);
+void image_sum_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int n, int m);
+void image_variance_optimized (int * ORC_RESTRICT a1, const orc_uint8 * ORC_RESTRICT s1, int s1_stride, int p2, int n, int m);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/orc/motiondetectorc.orc b/src/orc/motiondetectorc.orc
new file mode 100644
index 0000000..fd825dd
--- /dev/null
+++ b/src/orc/motiondetectorc.orc
@@ -0,0 +1,84 @@
+################################################################################
+# Optimized functions 
+################################################################################
+
+# Image Difference
+#
+#  for (j = 0; j < field->size; j++) {
+#    for (k = 0; k < field->size * bytesPerPixel; k++) {
+#      sum += abs((int) *p1 - (int) *p2);
+#      p1++;
+#      p2++;
+#    }
+#    p1 += (width - field->size) * bytesPerPixel;
+#    p2 += (width - field->size) * bytesPerPixel;
+#  }
+
+.function image_difference_optimized
+.flags 2d
+.source 1 s1 uint8_t
+.source 1 s2 uint8_t
+.accumulator 4 sum uint32_t
+accsadubl sum, s1, s2
+
+
+.function image_line_difference_optimized
+.source 1 s1 uint8_t
+.source 1 s2 uint8_t
+.accumulator 4 sum uint32_t
+accsadubl sum, s1, s2
+
+
+
+# Image Contrast
+#  SUM
+# p = pstart;
+# for (j = 0; j < field->size; j++) {      
+#   for (k = 0; k < field->size; k++, p++) {
+#     sum+=*p;
+#   }
+#   p += (width - field->size);
+# }
+# mean=sum/numpixel;
+# p = pstart;  
+#  VARIANCE
+# for (j = 0; j < field->size; j++) {      
+#   for (k = 0; k < field->size; k++, p++) {
+#     var+=abs(*p-mean);
+#   }
+#   p += (width - field->size);
+# }
+
+
+# Image Contrast functions
+# Sum of all pixels (used to calculate mean)
+.function image_sum_optimized
+.flags 2d
+.accumulator 4 sum int
+.source 1 s uint8_t
+.temp 2 t1 
+.temp 4 t2
+convubw t1 s
+convuwl t2 t1
+accl sum, t2
+
+# this implementation appears to be slower
+# .function image_sum_optimized
+# .flags 2d
+# .accumulator 4 sum int
+# .source 1 s uint8_t
+# .const 1 c1 0
+# accsadubl sum, s, c1
+
+# Variance of the image in Manhattan-Norm (absolute value)
+.function image_variance_optimized
+.flags 2d
+.accumulator 4 var int
+.source 1 s uint8_t
+.param 1 mean uint8_t
+accsadubl var, s, mean
+
+
+
+
+
diff --git a/src/orc/transformorc.orc b/src/orc/transformorc.orc
new file mode 100644
index 0000000..3a5ba68
--- /dev/null
+++ b/src/orc/transformorc.orc
@@ -0,0 +1,65 @@
+################################################################################
+# Optimized functions 
+################################################################################
+
+# Hint: use only one space between opcode and operands (and also between them)
+
+# Rotation and Translation of one line
+
+# for (x = 0; x < td->fiDest.width; x++) {
+# 	int32_t x_d1 = (xs[x] - c_d_x);
+# 	x_ss[x]  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
+# 	y_ss[x]  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
+# }
+
+
+.function transform_one_line_optimized
+.dest 4 x_ss int32_t    # fp16
+.dest 4 y_ss int32_t    # fp16
+.source 4 xs int32_t
+.param 4 y_d1 int32_t
+.param 4 c_d_x int32_t
+.param 4 c_tx int32_t   # fp16
+.param 4 c_ty int32_t   # fp16
+.param 4 zcos_a int32_t # fp16
+.param 4 zsin_a int32_t # fp16
+.temp 4 x_d1
+.temp 4 tmp1
+.temp 4 tmp2
+
+subl x_d1, xs, c_d_x
+mulll tmp1 zcos_a x_d1
+mulll tmp2 zsin_a y_d1
+addl tmp1 tmp1 tmp2
+addl x_ss tmp1 c_tx
+mulll tmp1 zcos_a y_d1
+mulll tmp2 zsin_a x_d1
+subl tmp1 tmp1 tmp2
+addl y_ss tmp1 c_ty
+
+
+.function transform_one_line_optimized1
+.dest 4 x_ss int32_t    # fp16
+.source 4 xs int32_t
+.param 4 y_d1 int32_t
+.param 4 c_d_x int32_t
+.param 4 c_tx int32_t   # fp16
+.param 4 c_ty int32_t   # fp16
+.param 4 zcos_a int32_t # fp16
+.param 4 zsin_a int32_t # fp16
+.param 4 sin_y int32_t # fp16
+.param 4 cos_y int32_t # fp16
+.temp 4 x_d1
+.temp 4 tmp1
+.temp 4 tmp2
+subl x_d1, xs, c_d_x
+mulll tmp1, x_d1, zcos_a
+addl tmp1, tmp1, sin_y
+addl x_ss, tmp1, c_tx
+mulll tmp1, x_d1, zsin_a
+mulll tmp1, tmp1, -1
+addl tmp2, tmp1, cos_y
+# addl y_ss, tmp1, c_ty
+
+
+
diff --git a/src/serialize.c b/src/serialize.c
new file mode 100644
index 0000000..e3ee68d
--- /dev/null
+++ b/src/serialize.c
@@ -0,0 +1,265 @@
+/*
+ * serialize.c
+ *
+ *  Copyright (C) Georg Martius - January 2013
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "serialize.h"
+#include "transformtype.h"
+#include "transformtype_operations.h"
+#include "motiondetect.h"
+
+const char* modname = "vid.stab - serialization";
+
+
+int storeLocalmotion(FILE* f, const LocalMotion* lm){
+  return fprintf(f,"(LM %i %i %i %i %i %lf %lf)", lm->v.x,lm->v.y,lm->f.x,lm->f.y,lm->f.size,
+                 lm->contrast, lm->match);
+}
+
+/// restore local motion from file
+LocalMotion restoreLocalmotion(FILE* f){
+  LocalMotion lm;
+  char c;
+  if(fscanf(f,"(LM %i %i %i %i %i %lf %lf", &lm.v.x,&lm.v.y,&lm.f.x,&lm.f.y,&lm.f.size,
+            &lm.contrast, &lm.match) != 7) {
+    vs_log_error(modname, "Cannot parse localmotion!\n");
+    return null_localmotion();
+  }
+  while((c=fgetc(f)) && c!=')' && c!=EOF);
+  if(c==EOF){
+    vs_log_error(modname, "Cannot parse localmotion missing ')'!\n");
+    return null_localmotion();
+  }
+  return lm;
+}
+
+int vsStoreLocalmotions(FILE* f, const LocalMotions* lms){
+  int len = vs_vector_size(lms);
+  int i;
+  fprintf(f,"List %i [",len);
+  for (i=0; i<len; i++){
+    if(i>0) fprintf(f,",");
+    if(storeLocalmotion(f,LMGet(lms,i)) <= 0) return 0;
+  }
+  fprintf(f,"]");
+  return 1;
+}
+
+/// restores local motions from file
+LocalMotions vsRestoreLocalmotions(FILE* f){
+  LocalMotions lms;
+  int i;
+  char c;
+  int len;
+  vs_vector_init(&lms,0);
+  if(fscanf(f,"List %i [", &len) != 1) {
+    vs_log_error(modname, "Cannot parse localmotions list expect 'List len ['!\n");
+    return lms;
+  }
+  if (len>0){
+    vs_vector_init(&lms,len);
+    for (i=0; i<len; i++){
+      if(i>0) while((c=fgetc(f)) && c!=',' && c!=EOF);
+      LocalMotion lm = restoreLocalmotion(f);
+      vs_vector_append_dup(&lms,&lm,sizeof(LocalMotion));
+    }
+  }
+  if(len != vs_vector_size(&lms)){
+    vs_log_error(modname, "Cannot parse the given number of localmotions!\n");
+    return lms;
+  }
+  while((c=fgetc(f)) && c!=']' && c!=EOF);
+  if(c==EOF){
+    vs_log_error(modname, "Cannot parse localmotions list missing ']'!\n");
+    return lms;
+  }
+  return lms;
+}
+
+int vsPrepareFile(const VSMotionDetect* md, FILE* f){
+    if(!f) return VS_ERROR;
+    fprintf(f, "VID.STAB 1\n");
+    fprintf(f, "#      accuracy = %d\n", md->conf.accuracy);
+    fprintf(f, "#     shakiness = %d\n", md->conf.shakiness);
+    fprintf(f, "#      stepsize = %d\n", md->conf.stepSize);
+    fprintf(f, "#   mincontrast = %f\n", md->conf.contrastThreshold);
+    return VS_OK;
+}
+
+int vsWriteToFile(const VSMotionDetect* md, FILE* f, const LocalMotions* lms){
+  if(!f || !lms) return VS_ERROR;
+
+  if(fprintf(f, "Frame %i (", md->frameNum)>0
+     && vsStoreLocalmotions(f,lms)>0 && fprintf(f, ")\n"))
+    return VS_OK;
+  else
+    return VS_ERROR;
+}
+
+/// reads the header of the file and return the version number
+int vsReadFileVersion(FILE* f){
+  if(!f) return VS_ERROR;
+  int version;
+  if(fscanf(f, "VID.STAB %i\n", &version)!=1)
+    return VS_ERROR;
+  else return version;
+}
+
+int vsReadFromFile(FILE* f, LocalMotions* lms){
+  char c = fgetc(f);
+  if(c=='F') {
+    int num;
+    if(fscanf(f,"rame %i (", &num)!=1) {
+      vs_log_error(modname,"cannot read file, expect 'Frame num (...'");
+      return VS_ERROR;
+    }
+    *lms = vsRestoreLocalmotions(f);
+    if(fscanf(f,")\n")<0) {
+      vs_log_error(modname,"cannot read file, expect '...)'");
+      return VS_ERROR;
+    }
+    return num;
+  } else if(c=='#') {
+    char l[1024];
+    if(fgets(l, sizeof(l), f)==0) return VS_ERROR;
+    return vsReadFromFile(f,lms);
+  } else if(c=='\n' || c==' ') {
+    return vsReadFromFile(f,lms);
+  } else if(c==EOF) {
+    return VS_ERROR;
+  } else {
+    vs_log_error(modname,"cannot read frame local motions from file, got %c (%i)",
+                 c, (int) c);
+    return VS_ERROR;
+  }
+}
+
+int vsReadLocalMotionsFile(FILE* f, VSManyLocalMotions* mlms){
+  int version = vsReadFileVersion(f);
+  if(version<1) // old format or unknown
+    return VS_ERROR;
+  if(version>1){
+    vs_log_error(modname,"Version of VID.STAB file too large: got %i, expect <= 1",
+                 version);
+    return VS_ERROR;
+  }
+  assert(mlms);
+  // initial number of frames, but it will automatically be increaseed
+  vs_vector_init(mlms,1024);
+  int index;
+  int oldindex = 0;
+  LocalMotions lms;
+  while((index = vsReadFromFile(f,&lms)) != VS_ERROR){
+    if(index > oldindex+1){
+      vs_log_info(modname,"VID.STAB file: index of frames is not continuous %i -< %i",
+                  oldindex, index);
+    }
+    if(index<1){
+      vs_log_info(modname,"VID.STAB file: Frame number < 1 (%i)", index);
+    } else {
+      vs_vector_set_dup(mlms,index-1,&lms, sizeof(LocalMotions));
+    }
+    oldindex=index;
+  }
+  return VS_OK;
+}
+
+
+/**
+ * vsReadOldTransforms: read transforms file (Deprecated format)
+ *  The format is as follows:
+ *   Lines with # at the beginning are comments and will be ignored
+ *   Data lines have 5 columns seperated by space or tab containing
+ *   time, x-translation, y-translation, alpha-rotation, extra
+ *   where time and extra are integers
+ *   and the latter is unused at the moment
+ *
+ * Parameters:
+ *         f:  file description
+ *         trans: place to store the transforms
+ * Return value:
+ *         number of transforms read
+ * Preconditions: f is opened
+ */
+int vsReadOldTransforms(const VSTransformData* td, FILE* f , VSTransformations* trans)
+{
+  char l[1024];
+  int s = 0;
+  int i = 0;
+  int ti; // time (ignored)
+  VSTransform t;
+
+  while (fgets(l, sizeof(l), f)) {
+    t = null_transform();
+    if (l[0] == '#')
+      continue;    //  ignore comments
+    if (strlen(l) == 0)
+      continue; //  ignore empty lines
+    // try new format
+    if (sscanf(l, "%i %lf %lf %lf %lf %i", &ti, &t.x, &t.y, &t.alpha,
+               &t.zoom, &t.extra) != 6) {
+      if (sscanf(l, "%i %lf %lf %lf %i", &ti, &t.x, &t.y, &t.alpha,
+                 &t.extra) != 5) {
+        vs_log_error(td->conf.modName, "Cannot parse line: %s", l);
+        return 0;
+      }
+      t.zoom=0;
+    }
+
+    if (i>=s) { // resize transform array
+      if (s == 0)
+        s = 256;
+      else
+        s*=2;
+      /* vs_log_info(td->modName, "resize: %i\n", s); */
+      trans->ts = vs_realloc(trans->ts, sizeof(VSTransform)* s);
+      if (!trans->ts) {
+        vs_log_error(td->conf.modName, "Cannot allocate memory"
+                     " for transformations: %i\n", s);
+        return 0;
+      }
+    }
+    trans->ts[i] = t;
+    i++;
+  }
+  trans->len = i;
+
+  return i;
+}
+
+
+//     t = vsSimpleMotionsToTransform(md, &localmotions);
+
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/serialize.h b/src/serialize.h
new file mode 100644
index 0000000..2142a7d
--- /dev/null
+++ b/src/serialize.h
@@ -0,0 +1,77 @@
+/*
+ * serialize.h
+ *
+ *  Copyright (C) Georg Martius - January 2013
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef __SERIALIZE_H
+#define __SERIALIZE_H
+
+#include "transformtype.h"
+#include "motiondetect.h"
+#include "transform.h"
+
+
+/// Vector of LocalMotions
+typedef VSVector VSManyLocalMotions;
+/// helper macro to access a localmotions vector in the VSVector of all Frames
+#define VSMLMGet(manylocalmotions,index) \
+    ((LocalMotions*)vs_vector_get(manylocalmotions,index))
+
+
+/// stores local motions to file
+int vsStoreLocalmotions(FILE* f, const LocalMotions* lms);
+
+/// restores local motions from file
+LocalMotions vsRestoreLocalmotions(FILE* f);
+
+
+/// writes the header to the file that is to be holding the local motions
+int vsPrepareFile(const VSMotionDetect* td, FILE* f);
+
+/// appends the given localmotions to the file
+int vsWriteToFile(const VSMotionDetect* td, FILE* f, const LocalMotions* lms);
+
+/// reads the header of the file and return the version number (used by readLocalmotionsFile)
+int vsReadFileVersion(FILE* f);
+
+/*
+ * reads the next set of localmotions from the file, return VS_ERROR on error or
+ * if nothing is read (used by readLocalmotionsFile)
+ */
+int vsReadFromFile(FILE* f, LocalMotions* lms);
+
+/*
+ * reads the entire file of localmotions, return VS_ERROR on error or if nothing is read
+ *
+ *  The format is as follows:
+ *   The file must begin with 'VID.STAB version\n'
+ *   Lines with # at the beginning are comments and will be ignored
+ *   Data lines have the structure: Frame NUM (<LocalMotions>)
+ *   where LocalMotions ::= List [(LM v.x v.y f.x f.y f.size contrast match),...]
+ */
+int vsReadLocalMotionsFile(FILE* f, VSManyLocalMotions* lms);
+
+// read the transformations from the given file (Deprecated format)
+int vsReadOldTransforms(const VSTransformData* td, FILE* f , VSTransformations* trans);
+
+
+#endif
diff --git a/src/transform.c b/src/transform.c
new file mode 100644
index 0000000..a236b11
--- /dev/null
+++ b/src/transform.c
@@ -0,0 +1,572 @@
+/*
+ *  transform.c
+ *
+ *  Copyright (C) Georg Martius - June 2007 - 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "transform.h"
+#include "transform_internal.h"
+#include "transformtype_operations.h"
+
+#include "transformfixedpoint.h"
+#ifdef TESTING
+#include "transformfloat.h"
+#endif
+
+#include <math.h>
+#include <libgen.h>
+#include <string.h>
+
+const char* interpol_type_names[5] = {"No (0)", "Linear (1)", "Bi-Linear (2)",
+                                      "Bi-Cubic (3)"};
+
+const char* getInterpolationTypeName(VSInterpolType type){
+  if (type >= VS_Zero && type < VS_NBInterPolTypes)
+    return interpol_type_names[(int) type];
+  else
+    return "unknown";
+}
+
+// default initialization: attention the ffmpeg filter cannot call it
+VSTransformConfig vsTransformGetDefaultConfig(const char* modName){
+  VSTransformConfig conf;
+  /* Options */
+  conf.maxShift           = -1;
+  conf.maxAngle           = -1;
+  conf.crop               = VSKeepBorder;
+  conf.relative           = 1;
+  conf.invert             = 0;
+  conf.smoothing          = 15;
+  conf.zoom               = 0;
+  conf.optZoom            = 1;
+  conf.zoomSpeed          = 0.25;
+  conf.interpolType       = VS_BiLinear;
+  conf.verbose            = 0;
+  conf.modName            = modName;
+  conf.simpleMotionCalculation = 0;
+  conf.storeTransforms    = 0;
+  conf.smoothZoom         = 0;
+  conf.camPathAlgo        = VSOptimalL1;
+  return conf;
+}
+
+void vsTransformGetConfig(VSTransformConfig* conf, const VSTransformData* td){
+  if(td && conf)
+    *conf = td->conf;
+}
+
+const VSFrameInfo* vsTransformGetSrcFrameInfo(const VSTransformData* td){
+  return &td->fiSrc;
+}
+
+const VSFrameInfo* vsTransformGetDestFrameInfo(const VSTransformData* td){
+  return &td->fiDest;
+}
+
+
+int vsTransformDataInit(VSTransformData* td, const VSTransformConfig* conf,
+                        const VSFrameInfo* fi_src, const VSFrameInfo* fi_dest){
+  td->conf = *conf;
+
+  td->fiSrc = *fi_src;
+  td->fiDest = *fi_dest;
+
+  vsFrameNull(&td->src);
+  td->srcMalloced = 0;
+
+  vsFrameNull(&td->destbuf);
+  vsFrameNull(&td->dest);
+
+  if (td->conf.maxShift > td->fiDest.width/2)
+    td->conf.maxShift = td->fiDest.width/2;
+  if (td->conf.maxShift > td->fiDest.height/2)
+    td->conf.maxShift = td->fiDest.height/2;
+
+  td->conf.interpolType = VS_MAX(VS_MIN(td->conf.interpolType,VS_BiCubic),VS_Zero);
+
+  // not yet implemented
+  if(td->conf.camPathAlgo==VSOptimalL1) td->conf.camPathAlgo=VSGaussian;
+
+  switch(td->conf.interpolType){
+   case VS_Zero:     td->interpolate = &interpolateZero; break;
+   case VS_Linear:   td->interpolate = &interpolateLin; break;
+   case VS_BiLinear: td->interpolate = &interpolateBiLin; break;
+   case VS_BiCubic:  td->interpolate = &interpolateBiCub; break;
+   default: td->interpolate = &interpolateBiLin;
+  }
+#ifdef TESTING
+  switch(td->conf.interpolType){
+   case VS_Zero:     td->_FLT(interpolate) = &_FLT(interpolateZero); break;
+   case VS_Linear:   td->_FLT(interpolate) = &_FLT(interpolateLin); break;
+   case VS_BiLinear: td->_FLT(interpolate) = &_FLT(interpolateBiLin); break;
+   case VS_BiCubic:  td->_FLT(interpolate) = &_FLT(interpolateBiCub); break;
+   default: td->_FLT(interpolate)          = &_FLT(interpolateBiLin);
+  }
+
+#endif
+  return VS_OK;
+}
+
+void vsTransformDataCleanup(VSTransformData* td){
+  if (td->srcMalloced && !vsFrameIsNull(&td->src)) {
+    vsFrameFree(&td->src);
+  }
+  if (td->conf.crop == VSKeepBorder && !vsFrameIsNull(&td->destbuf)) {
+    vsFrameFree(&td->destbuf);
+  }
+}
+
+int vsTransformPrepare(VSTransformData* td, const VSFrame* src, VSFrame* dest){
+  // we first copy the frame to td->src and then overwrite the destination
+  // with the transformed version
+  td->dest = *dest;
+  if(src==dest || td->srcMalloced){ // in place operation: we have to copy the src first
+    if(vsFrameIsNull(&td->src)) {
+      vsFrameAllocate(&td->src,&td->fiSrc);
+      td->srcMalloced = 1;
+    }
+    if (vsFrameIsNull(&td->src)) {
+      vs_log_error(td->conf.modName, "vs_malloc failed\n");
+      return VS_ERROR;
+    }
+    vsFrameCopy(&td->src, src, &td->fiSrc);
+  }else{ // otherwise no copy needed
+    td->src=*src;
+  }
+  if (td->conf.crop == VSKeepBorder) {
+    if(vsFrameIsNull(&td->destbuf)) {
+      // if we keep the borders, we need a second buffer to store
+      //  the previous stabilized frame, so we use destbuf
+      vsFrameAllocate(&td->destbuf,&td->fiDest);
+      if (vsFrameIsNull(&td->destbuf)) {
+        vs_log_error(td->conf.modName, "vs_malloc failed\n");
+        return VS_ERROR;
+      }
+      // if we keep borders, save first frame into the background buffer (destbuf)
+      vsFrameCopy(&td->destbuf, src, &td->fiSrc); // here we have to take care
+    }
+  }else{ // otherwise we directly operate on the destination
+    td->destbuf = *dest;
+  }
+  return VS_OK;
+}
+
+int vsDoTransform(VSTransformData* td, VSTransform t){
+  if (td->fiSrc.pFormat < PF_PACKED)
+    return transformPlanar(td, t);
+  else
+    return transformPacked(td, t);
+}
+
+
+int vsTransformFinish(VSTransformData* td){
+  if(td->conf.crop == VSKeepBorder){
+    // we have to store our result to video buffer
+    // note: destbuf stores stabilized frame to be the default for next frame
+    vsFrameCopy(&td->dest, &td->destbuf, &td->fiSrc);
+  }
+  return VS_OK;
+}
+
+
+VSTransform vsGetNextTransform(const VSTransformData* td, VSTransformations* trans){
+  if(trans->len <=0 ) return null_transform();
+  if (trans->current >= trans->len) {
+    trans->current = trans->len;
+    if(!trans->warned_end)
+      vs_log_warn(td->conf.modName, "not enough transforms found, use last transformation!\n");
+    trans->warned_end = 1;
+  }else{
+    trans->current++;
+  }
+  return trans->ts[trans->current-1];
+}
+
+void vsTransformationsInit(VSTransformations* trans){
+  trans->ts = 0;
+  trans->len = 0;
+  trans->current = 0;
+  trans->warned_end = 0;
+}
+
+void vsTransformationsCleanup(VSTransformations* trans){
+  if (trans->ts) {
+    vs_free(trans->ts);
+    trans->ts = NULL;
+  }
+  trans->len=0;
+}
+
+/*
+ *  This is actually the core algorithm for canceling the jiggle in the
+ *  movie. We have different implementations which are patched here.
+ */
+int cameraPathOptimization(VSTransformData* td, VSTransformations* trans){
+  switch(td->conf.camPathAlgo){
+   case VSAvg: return cameraPathAvg(td,trans);
+   case VSOptimalL1: // not yet implenented
+   case VSGaussian: return cameraPathGaussian(td,trans);
+//   case VSOptimalL1: return cameraPathOptimalL1(td,trans);
+  }
+  return VS_ERROR;
+}
+
+/*
+ *  We perform a low-pass filter on the camera path.
+ *  This supports slow camera movemen, but in a smooth fasion.
+ *  Here we use gaussian filter (gaussian kernel) lowpass filter
+ */
+int cameraPathGaussian(VSTransformData* td, VSTransformations* trans){
+  VSTransform* ts = trans->ts;
+  if (trans->len < 1)
+    return VS_ERROR;
+  if (td->conf.verbose & VS_DEBUG) {
+    vs_log_msg(td->conf.modName, "Preprocess transforms:");
+  }
+
+  /* relative to absolute (integrate transformations) */
+  if (td->conf.relative) {
+    VSTransform t = ts[0];
+    for (int i = 1; i < trans->len; i++) {
+      ts[i] = add_transforms(&ts[i], &t);
+      t = ts[i];
+    }
+  }
+
+  if (td->conf.smoothing>0) {
+    VSTransform* ts2 = vs_malloc(sizeof(VSTransform) * trans->len);
+    memcpy(ts2, ts, sizeof(VSTransform) * trans->len);
+    int s = td->conf.smoothing * 2 + 1;
+    VSArray kernel = vs_array_new(s);
+    // initialize gaussian kernel
+    int mu        = td->conf.smoothing;
+    double sigma2 = sqr(mu/2.0);
+    for(int i=0; i<=mu; i++){
+      kernel.dat[i] = kernel.dat[s-i-1] = exp(-sqr(i-mu)/sigma2);
+    }
+    // vs_array_print(kernel, stdout);
+
+    for (int i = 0; i < trans->len; i++) {
+      // make a convolution:
+      double weightsum=0;
+      VSTransform avg = null_transform();
+      for(int k=0; k<s; k++){
+        int idx = i+k-mu;
+        if(idx>=0 && idx<trans->len){
+          if(unlikely(0 && ts2[idx].extra==1)){ // deal with scene cuts or bad frames
+            if(k<mu) { // in the past of our frame: ignore everthing before
+              avg=null_transform();
+              weightsum=0;
+              continue;
+            }else{           //current frame or in future: stop here
+              if(k==mu)      //for current frame: ignore completely
+                weightsum=0;
+              break;
+            }
+          }
+          weightsum+=kernel.dat[k];
+          avg=add_transforms_(avg, mult_transform(&ts2[idx], kernel.dat[k]));
+        }
+      }
+      if(weightsum>0){
+        avg = mult_transform(&avg, 1.0/weightsum);
+
+        // high frequency must be transformed away
+        ts[i] = sub_transforms(&ts[i], &avg);
+      }
+      if (td->conf.verbose & VS_DEBUG) {
+        vs_log_msg(td->conf.modName,
+                   " avg: %5lf, %5lf, %5lf extra: %i weightsum %5lf",
+                   avg.x, avg.y, avg.alpha, ts[i].extra, weightsum
+                  );
+      }
+    }
+  }
+  return VS_OK;
+}
+
+/*
+ *  We perform a low-pass filter in terms of transformations.
+ *  This supports slow camera movement (low frequency), but in a smooth fasion.
+ *  Here a simple average based filter
+ */
+int cameraPathAvg(VSTransformData* td, VSTransformations* trans){
+  VSTransform* ts = trans->ts;
+
+  if (trans->len < 1)
+    return VS_ERROR;
+  if (td->conf.verbose & VS_DEBUG) {
+   vs_log_msg(td->conf.modName, "Preprocess transforms:");
+  }
+  if (td->conf.smoothing>0) {
+    /* smoothing */
+    VSTransform* ts2 = vs_malloc(sizeof(VSTransform) * trans->len);
+    memcpy(ts2, ts, sizeof(VSTransform) * trans->len);
+
+    /*  we will do a sliding average with minimal update
+     *   \hat x_{n/2} = x_1+x_2 + .. + x_n
+     *   \hat x_{n/2+1} = x_2+x_3 + .. + x_{n+1} = x_{n/2} - x_1 + x_{n+1}
+     *   avg = \hat x / n
+     */
+    int s = td->conf.smoothing * 2 + 1;
+    VSTransform null = null_transform();
+    /* avg is the average over [-smoothing, smoothing] transforms
+       around the current point */
+    VSTransform avg;
+    /* avg2 is a sliding average over the filtered signal! (only to past)
+     *  with smoothing * 2 horizon to kill offsets */
+    VSTransform avg2 = null_transform();
+    double tau = 1.0/(2 * s);
+    /* initialise sliding sum with hypothetic sum centered around
+     * -1st element. We have two choices:
+     * a) assume the camera is not moving at the beginning
+     * b) assume that the camera moves and we use the first transforms
+     */
+    VSTransform s_sum = null;
+    for (int i = 0; i < td->conf.smoothing; i++){
+      s_sum = add_transforms(&s_sum, i < trans->len ? &ts2[i]:&null);
+    }
+    mult_transform(&s_sum, 2); // choice b (comment out for choice a)
+
+    for (int i = 0; i < trans->len; i++) {
+      VSTransform* old = ((i - td->conf.smoothing - 1) < 0)
+        ? &null : &ts2[(i - td->conf.smoothing - 1)];
+      VSTransform* new = ((i + td->conf.smoothing) >= trans->len)
+        ? &null : &ts2[(i + td->conf.smoothing)];
+      s_sum = sub_transforms(&s_sum, old);
+      s_sum = add_transforms(&s_sum, new);
+
+      avg = mult_transform(&s_sum, 1.0/s);
+
+      /* lowpass filter:
+       * meaning high frequency must be transformed away
+       */
+      ts[i] = sub_transforms(&ts2[i], &avg);
+      /* kill accumulating offset in the filtered signal*/
+      avg2 = add_transforms_(mult_transform(&avg2, 1 - tau),
+                             mult_transform(&ts[i], tau));
+      ts[i] = sub_transforms(&ts[i], &avg2);
+
+      if (td->conf.verbose & VS_DEBUG) {
+        vs_log_msg(td->conf.modName,
+                   "s_sum: %5lf %5lf %5lf, ts: %5lf, %5lf, %5lf\n",
+                   s_sum.x, s_sum.y, s_sum.alpha,
+                   ts[i].x, ts[i].y, ts[i].alpha);
+        vs_log_msg(td->conf.modName,
+                   "  avg: %5lf, %5lf, %5lf avg2: %5lf, %5lf, %5lf",
+                   avg.x, avg.y, avg.alpha,
+                   avg2.x, avg2.y, avg2.alpha);
+      }
+    }
+    vs_free(ts2);
+  }
+  /* relative to absolute */
+  if (td->conf.relative) {
+    VSTransform t = ts[0];
+    for (int i = 1; i < trans->len; i++) {
+      ts[i] = add_transforms(&ts[i], &t);
+      t = ts[i];
+    }
+  }
+  return VS_OK;
+}
+
+
+/**
+ * vsPreprocessTransforms: camera path optimization, relative to absolute conversion,
+ *  and cropping of too large transforms.
+ *
+ * Parameters:
+ *            td: transform private data structure
+ *         trans: list of transformations (changed)
+ * Return value:
+ *     1 for success and 0 for failure
+ * Preconditions:
+ *     None
+ * Side effects:
+ *     td->trans will be modified
+ */
+int vsPreprocessTransforms(VSTransformData* td, VSTransformations* trans)
+{
+  // works inplace on trans
+  if(cameraPathOptimization(td, trans)!=VS_OK) return VS_ERROR;
+  VSTransform* ts = trans->ts;
+  /*  invert? */
+  if (td->conf.invert) {
+    for (int i = 0; i < trans->len; i++) {
+      ts[i] = mult_transform(&ts[i], -1);
+    }
+  }
+
+  /* crop at maximal shift */
+  if (td->conf.maxShift != -1)
+    for (int i = 0; i < trans->len; i++) {
+      ts[i].x     = VS_CLAMP(ts[i].x, -td->conf.maxShift, td->conf.maxShift);
+      ts[i].y     = VS_CLAMP(ts[i].y, -td->conf.maxShift, td->conf.maxShift);
+    }
+  if (td->conf.maxAngle != - 1.0)
+    for (int i = 0; i < trans->len; i++)
+      ts[i].alpha = VS_CLAMP(ts[i].alpha, -td->conf.maxAngle, td->conf.maxAngle);
+
+  /* Calc optimal zoom (1)
+   *  cheap algo is to only consider translations
+   *  uses cleaned max and min to eliminate 99% of transforms
+   */
+  if (td->conf.optZoom == 1 && trans->len > 1){
+    VSTransform min_t, max_t;
+    cleanmaxmin_xy_transform(ts, trans->len, 1, &min_t, &max_t);  // 99% of all transformations
+    // the zoom value only for x
+    double zx = 2*VS_MAX(max_t.x,fabs(min_t.x))/td->fiSrc.width;
+    // the zoom value only for y
+    double zy = 2*VS_MAX(max_t.y,fabs(min_t.y))/td->fiSrc.height;
+    td->conf.zoom += 100 * VS_MAX(zx,zy); // use maximum
+    td->conf.zoom = VS_CLAMP(td->conf.zoom,-60,60);
+    vs_log_info(td->conf.modName, "Final zoom: %lf\n", td->conf.zoom);
+  }
+  /* Calc optimal zoom (2)
+   *  sliding average to zoom only as much as needed also using rotation angles
+   *  the baseline zoom is the mean required zoom + global zoom
+   *  in order to avoid too much zooming in and out
+   */
+  if (td->conf.optZoom == 2 && trans->len > 1){
+    double* zooms=(double*)vs_zalloc(sizeof(double)*trans->len);
+    int w = td->fiSrc.width;
+    int h = td->fiSrc.height;
+    double req;
+    double meanzoom;
+    for (int i = 0; i < trans->len; i++) {
+      zooms[i] = transform_get_required_zoom(&ts[i], w, h);
+    }
+    meanzoom = mean(zooms, trans->len) + td->conf.zoom; // add global zoom
+    // forward - propagation (to make the zooming smooth)
+    req = meanzoom;
+    for (int i = 0; i < trans->len; i++) {
+      req = VS_MAX(req, zooms[i]);
+      ts[i].zoom=VS_MAX(ts[i].zoom,req);
+      req= VS_MAX(meanzoom, req - td->conf.zoomSpeed); // zoom-out each frame
+    }
+    // backward - propagation
+    req = meanzoom;
+    for (int i = trans->len-1; i >= 0; i--) {
+      req = VS_MAX(req, zooms[i]);
+      ts[i].zoom=VS_MAX(ts[i].zoom,req);
+      req= VS_MAX(meanzoom, req - td->conf.zoomSpeed);
+    }
+    vs_free(zooms);
+  }else if (td->conf.zoom != 0){ /* apply global zoom */
+    for (int i = 0; i < trans->len; i++)
+      ts[i].zoom += td->conf.zoom;
+  }
+
+  return VS_OK;
+}
+
+
+/**
+ * vsLowPassTransforms: single step smoothing of transforms, using only the past.
+ *  see also vsPreprocessTransforms. Here only relative transformations are
+ *  considered (produced by motiondetection). Also cropping of too large transforms.
+ *
+ * Parameters:
+ *            td: transform private data structure
+ *           mem: memory for sliding average transformation
+ *         trans: current transform (from previous to current frame)
+ * Return value:
+ *         new transformation for current frame
+ * Preconditions:
+ *     None
+ */
+VSTransform vsLowPassTransforms(VSTransformData* td, VSSlidingAvgTrans* mem,
+                                const VSTransform* trans)
+{
+
+  if (!mem->initialized){
+    // use the first transformation as the average camera movement
+    mem->avg=*trans;
+    mem->initialized=1;
+    mem->zoomavg=0.0;
+    mem->accum = null_transform();
+    return mem->accum;
+  }else{
+    double s = 1.0/(td->conf.smoothing + 1);
+    double tau = 1.0/(3.0 * (td->conf.smoothing + 1));
+    if(td->conf.smoothing>0){
+      // otherwise do the sliding window
+      mem->avg = add_transforms_(mult_transform(&mem->avg, 1 - s),
+                                 mult_transform(trans, s));
+    }else{
+      mem->avg = *trans;
+    }
+
+    /* lowpass filter:
+     * meaning high frequency must be transformed away
+     */
+    VSTransform newtrans = sub_transforms(trans, &mem->avg);
+
+    /* relative to absolute */
+    if (td->conf.relative) {
+      newtrans = add_transforms(&newtrans, &mem->accum);
+      mem->accum = newtrans;
+      if(td->conf.smoothing>0){
+        // kill accumulating effects
+        mem->accum = mult_transform(&mem->accum, 1.0 - tau);
+      }
+    }
+
+    /* crop at maximal shift */
+    if (td->conf.maxShift != -1){
+      newtrans.x     = VS_CLAMP(newtrans.x, -td->conf.maxShift, td->conf.maxShift);
+      newtrans.y     = VS_CLAMP(newtrans.y, -td->conf.maxShift, td->conf.maxShift);
+    }
+    if (td->conf.maxAngle != - 1.0)
+      newtrans.alpha = VS_CLAMP(newtrans.alpha, -td->conf.maxAngle, td->conf.maxAngle);
+
+    /* Calc sliding optimal zoom
+     *  cheap algo is to only consider translations and to sliding avg
+     */
+    if (td->conf.optZoom != 0 && td->conf.smoothing > 0){
+      // the zoom value only for x
+      double zx = 2*newtrans.x/td->fiSrc.width;
+      // the zoom value only for y
+      double zy = 2*newtrans.y/td->fiSrc.height;
+      double reqzoom = 100* VS_MAX(fabs(zx),fabs(zy)); // maximum is requried zoom
+      mem->zoomavg = (mem->zoomavg*(1-s) + reqzoom*s);
+      // since we only use past it is good to aniticipate
+      //  and zoom a little in any case (so set td->zoom to 2 or so)
+      newtrans.zoom = mem->zoomavg;
+    }
+    if (td->conf.zoom != 0){
+      newtrans.zoom += td->conf.zoom;
+    }
+    return newtrans;
+  }
+}
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/transform.h b/src/transform.h
new file mode 100644
index 0000000..0308ad3
--- /dev/null
+++ b/src/transform.h
@@ -0,0 +1,226 @@
+/*
+ *  transform.h
+ *
+ *  Copyright (C) Georg Martius - June 2007 - 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef __TRANSFORM_H
+#define __TRANSFORM_H
+
+#include <math.h>
+#include <libgen.h>
+#include "transformtype.h"
+#include "frameinfo.h"
+#include "vidstabdefines.h"
+#ifdef TESTING
+#include "transformfloat.h"
+#endif
+
+
+typedef struct _vstransformations {
+    VSTransform* ts; // array of transformations
+    int current;   // index to current transformation
+    int len;       // length of trans array
+    short warned_end; // whether we warned that there is no transform left
+} VSTransformations;
+
+typedef struct _vsslidingavgtrans {
+    VSTransform avg; // average transformation
+    VSTransform accum; // accumulator for relative to absolute conversion
+    double zoomavg;     // average zoom value
+    short initialized; // whether it was initialized or not
+} VSSlidingAvgTrans;
+
+
+/// interpolation types
+typedef enum { VS_Zero, VS_Linear, VS_BiLinear, VS_BiCubic, VS_NBInterPolTypes} VSInterpolType;
+
+/// returns a name for the interpolation type
+const char* getInterpolationTypeName(VSInterpolType type);
+
+typedef enum { VSKeepBorder = 0, VSCropBorder } VSBorderType;
+typedef enum { VSOptimalL1 = 0, VSGaussian, VSAvg } VSCamPathAlgo;
+
+/**
+ * interpolate: general interpolation function pointer for one channel image data
+ *              for fixed point numbers/calculations
+ * Parameters:
+ *             rv: destination pixel (call by reference)
+ *            x,y: the source coordinates in the image img. Note this
+ *                 are real-value coordinates (in fixed point format 24.8),
+ *                 that's why we interpolate
+ *            img: source image
+ *   width,height: dimension of image
+ *            def: default value if coordinates are out of range
+ * Return value:  None
+ */
+typedef void (*vsInterpolateFun)(uint8_t *rv, int32_t x, int32_t y,
+                                 const uint8_t *img, int linesize,
+                                 int width, int height, uint8_t def);
+
+typedef struct _VSTransformConfig {
+
+    /* whether to consider transforms as relative (to previous frame)
+     * or absolute transforms
+     */
+    int            relative;
+    /* number of frames (forward and backward)
+     * to use for smoothing transforms */
+    int            smoothing;
+    VSBorderType   crop;        // 1: black bg, 0: keep border from last frame(s)
+    int            invert;      // 1: invert transforms, 0: nothing
+    double         zoom;        // percentage to zoom: 0->no zooming 10:zoom in 10%
+    int            optZoom;     // 2: optimal adaptive zoom 1: optimal static zoom, 0: nothing
+    double         zoomSpeed;   // for adaptive zoom: zoom per frame in percent
+    VSInterpolType interpolType; // type of interpolation: 0->Zero,1->Lin,2->BiLin,3->Sqr
+    int            maxShift;    // maximum number of pixels we will shift
+    double         maxAngle;    // maximum angle in rad
+    const char*    modName;     // module name (used for logging)
+    int            verbose;     // level of logging
+    // if 1 then the simple but fast method to termine the global motion is used
+    int            simpleMotionCalculation;
+    int            storeTransforms; // stores calculated transforms to file
+    int            smoothZoom;   // if 1 the zooming is also smoothed. Typically not recommended.
+    VSCamPathAlgo  camPathAlgo;  // algorithm to use for camera path optimization
+} VSTransformConfig;
+
+typedef struct _VSTransformData {
+    VSFrameInfo fiSrc;
+    VSFrameInfo fiDest;
+
+    VSFrame src;         // copy of the current frame buffer
+    VSFrame destbuf;     // pointer to an additional buffer or
+                         // to the destination buffer (depending on crop)
+    VSFrame dest;        // pointer to the destination buffer
+
+    short srcMalloced;   // 1 if the source buffer was internally malloced
+
+    vsInterpolateFun interpolate; // pointer to interpolation function
+#ifdef TESTING
+    _FLT(vsInterpolateFun) _FLT(interpolate);
+#endif
+
+    /* Options */
+    VSTransformConfig conf;
+
+    int initialized; // 1 if initialized and 2 if configured
+} VSTransformData;
+
+
+static const char vs_transform_help[] = ""
+    "Overview\n"
+    "    Reads a file with transform information for each frame\n"
+    "     and applies them. See also filter stabilize.\n"
+    "Options\n"
+    "    'input'     path to the file used to read the transforms\n"
+    "                (def: inputfile.trf)\n"
+    "    'smoothing' number of frames*2 + 1 used for lowpass filtering \n"
+    "                used for stabilizing (def: 10)\n"
+    "    'maxshift'  maximal number of pixels to translate image\n"
+    "                (def: -1 no limit)\n"
+    "    'maxangle'  maximal angle in rad to rotate image (def: -1 no limit)\n"
+    "    'crop'      0: keep border (def), 1: black background\n"
+    "    'invert'    1: invert transforms(def: 0)\n"
+    "    'relative'  consider transforms as 0: absolute, 1: relative (def)\n"
+    "    'zoom'      percentage to zoom >0: zoom in, <0 zoom out (def: 0)\n"
+    "    'optzoom'   0: nothing, 1: determine optimal static zoom (def)\n"
+    "                i.e. no (or only little) border should be visible.\n"
+    "                2: determine optimal adaptive zoom\n"
+    "                Note that the value given at 'zoom' is added to the \n"
+    "                here calculated one\n"
+    "    'zoomspeed' for adaptive zoom: zoom per frame in percent \n"
+    "    'interpol'  type of interpolation: 0: no interpolation, \n"
+    "                1: linear (horizontal), 2: bi-linear (def), \n"
+    "                3: bi-cubic\n"
+    "    'sharpen'   amount of sharpening: 0: no sharpening (def: 0.8)\n"
+    "                uses filter unsharp with 5x5 matrix\n"
+    "    'tripod'    virtual tripod mode (=relative=0:smoothing=0)\n"
+    "    'help'      print this help message\n";
+
+/** returns the default config
+ */
+VSTransformConfig vsTransformGetDefaultConfig(const char* modName);
+
+/** initialized the VSTransformData structure using the config and allocates memory
+ *  for the frames and stuff
+ *  @return VS_OK on success otherwise VS_ERROR
+ */
+int vsTransformDataInit(VSTransformData* td, const VSTransformConfig* conf,
+                        const VSFrameInfo* fi_src, const VSFrameInfo* fi_dest);
+
+
+/** Deletes internal data structures.
+ * In order to use the VSTransformData again, you have to call vsTransformDataInit
+ */
+void vsTransformDataCleanup(VSTransformData* td);
+
+/// returns the current config
+void vsTransformGetConfig(VSTransformConfig* conf, const VSTransformData* td);
+
+/// returns the frame info for the src
+const VSFrameInfo* vsTransformGetSrcFrameInfo(const VSTransformData* td);
+/// returns the frame info for the dest
+const VSFrameInfo* vsTransformGetDestFrameInfo(const VSTransformData* td);
+
+
+/// initializes VSTransformations structure
+void vsTransformationsInit(VSTransformations* trans);
+/// deletes VSTransformations internal memory
+void vsTransformationsCleanup(VSTransformations* trans);
+
+/// return next Transform and increases internal counter
+VSTransform vsGetNextTransform(const VSTransformData* td, VSTransformations* trans);
+
+/** preprocesses the list of transforms all at once. Here the deshaking is calculated!
+ */
+int vsPreprocessTransforms(VSTransformData* td, VSTransformations* trans);
+
+/**
+ * vsLowPassTransforms: single step smoothing of transforms, using only the past.
+ *  see also vsPreprocessTransforms. */
+VSTransform vsLowPassTransforms(VSTransformData* td, VSSlidingAvgTrans* mem,
+                            const VSTransform* trans);
+
+/** call this function to prepare for a next transformation (transformPacked/transformPlanar)
+    and supply the src frame buffer and the frame to write to. These can be the same pointer
+    for an inplace operation (working on framebuffer directly)
+ */
+int vsTransformPrepare(VSTransformData* td, const VSFrame* src, VSFrame* dest);
+
+/// does the actual transformation
+int vsDoTransform(VSTransformData* td, VSTransform t);
+
+
+/** call this function to finish the transformation of a frame (transformPacked/transformPlanar)
+ */
+int vsTransformFinish(VSTransformData* td);
+
+
+#endif
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */
diff --git a/src/transform_internal.h b/src/transform_internal.h
new file mode 100644
index 0000000..e4d3135
--- /dev/null
+++ b/src/transform_internal.h
@@ -0,0 +1,57 @@
+/*
+ *  transform_internal.h
+ *
+ *  Copyright (C) Georg Martius - June 2007 - 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef __TRANSFORM_INTERNAL_H
+#define __TRANSFORM_INTERNAL_H
+
+#include "transform.h"
+
+#include "transformfixedpoint.h"
+#ifdef TESTING
+#include "transformfloat.h"
+#endif
+
+/// name of the interpolation type
+const char* getInterpolationTypeName(VSInterpolType type);
+
+/** performs the smoothing of the camera path and modifies the transforms
+    to compensate for the jiggle
+    */
+int cameraPathOptimization(VSTransformData* td, VSTransformations* trans);
+
+int cameraPathAvg(VSTransformData* td, VSTransformations* trans);
+int cameraPathGaussian(VSTransformData* td, VSTransformations* trans);
+int cameraPathOptimalL1(VSTransformData* td, VSTransformations* trans);
+
+#endif
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/transformfixedpoint.c b/src/transformfixedpoint.c
new file mode 100644
index 0000000..621aa23
--- /dev/null
+++ b/src/transformfixedpoint.c
@@ -0,0 +1,518 @@
+/*
+ *  transformfixedpoint.c
+ *
+ *  Fixed point implementation of image transformations (see also transformfloat.c/h)
+ *
+ *  Copyright (C) Georg Martius - June 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ */
+#include "transformfixedpoint.h"
+#include "transform.h"
+#include "transformtype_operations.h"
+
+// the orc code does not work at the moment (BUG in ORC?)
+// #include "orc/transformorc.h"
+
+//#include <math.h>
+//#include <libgen.h>
+
+#define iToFp8(v)  ((v)<<8)
+#define fToFp8(v)  ((int32_t)((v)*((float)0xFF)))
+#define iToFp16(v) ((v)<<16)
+#define fToFp16(v) ((int32_t)((v)*((double)0xFFFF)))
+#define fp16To8(v) ((v)>>8)
+//#define fp16To8(v) ( (v) && 0x80 == 1 ? ((v)>>8 + 1) : ((v)>>8) )
+#define fp24To8(v) ((v)>>16)
+
+#define fp8ToI(v)  ((v)>>8)
+#define fp16ToI(v) ((v)>>16)
+#define fp8ToF(v)  ((v)/((double)(1<<8)))
+#define fp16ToF(v) ((v)/((double)(1<<16)))
+
+// #define fp8ToIRound(v) ( (((v)>>7) & 0x1) == 0 ? ((v)>>8) : ((v)>>8)+1 )
+#define fp8_0_5 (1<<7)
+#define fp8ToIRound(v) (((v) + fp8_0_5) >> 7)
+//#define fp16ToIRound(v) ( (((v)>>15) & 0x1) == 0 ? ((v)>>16) : ((v)>>16)+1 )
+#define fp16_0_5 (1<<15)
+#define fp16ToIRound(v) (((v) + fp16_0_5) >> 16)
+
+/** interpolateBiLinBorder: bi-linear interpolation function that also works at the border.
+    This is used by many other interpolation methods at and outsize the border, see interpolate */
+inline void interpolateBiLinBorder(uint8_t *rv, fp16 x, fp16 y,
+                                   const uint8_t *img, int img_linesize,
+                                   int32_t width, int32_t height, uint8_t def)
+{
+  int32_t ix_f = fp16ToI(x);
+  int32_t iy_f = fp16ToI(y);
+  int32_t ix_c = ix_f + 1;
+  int32_t iy_c = iy_f + 1;
+  if (ix_f < 0 || ix_c >= width || iy_f < 0 || iy_c >= height) {
+    int32_t w  = 10; // number of pixels to blur out the border pixel outwards
+    int32_t xl = - w - ix_f;
+    int32_t yl = - w - iy_f;
+    int32_t xh = ix_c - w - width;
+    int32_t yh = iy_c - w - height;
+    int32_t c = VS_MAX(VS_MIN(VS_MAX(xl, VS_MAX(yl, VS_MAX(xh, yh))),w),0);
+    // pixel at border of source image
+    short val_border = PIX(img, img_linesize, VS_MAX(VS_MIN(ix_f, width-1),0),
+                           VS_MAX(VS_MIN(iy_f, height-1),0));
+    *rv = (def * c + val_border * (w - c)) / w;
+  }else{
+    short v1 = PIXEL(img, img_linesize, ix_c, iy_c, width, height, def);
+    short v2 = PIXEL(img, img_linesize, ix_c, iy_f, width, height, def);
+    short v3 = PIXEL(img, img_linesize, ix_f, iy_c, width, height, def);
+    short v4 = PIXEL(img, img_linesize, ix_f, iy_f, width, height, def);
+    fp16 x_f = iToFp16(ix_f);
+    fp16 x_c = iToFp16(ix_c);
+    fp16 y_f = iToFp16(iy_f);
+    fp16 y_c = iToFp16(iy_c);
+    fp16 s   = fp16To8(v1*(x - x_f)+v3*(x_c - x))*fp16To8(y - y_f) +
+      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y) + 1;
+    *rv = fp16ToIRound(s);
+  }
+}
+
+/** taken from http://en.wikipedia.org/wiki/Bicubic_interpolation for alpha=-0.5
+    in matrix notation:
+    a0-a3 are the neigthboring points where the target point is between a1 and a2
+    t is the point of interpolation (position between a1 and a2) value between 0 and 1
+    | 0, 2, 0, 0 |  |a0|
+    |-1, 0, 1, 0 |  |a1|
+    (1,t,t^2,t^3) | 2,-5, 4,-1 |  |a2|
+    |-1, 3,-3, 1 |  |a3|
+*/
+/* inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){ */
+/*   // (2*a1 + t*((-a0+a2) + t*((2*a0-5*a1+4*a2-a3) + t*(-a0+3*a1-3*a2+a3) )) ) / 2; */
+/*   return ((iToFp16(2*a1) + t*(-a0+a2 */
+/*             + fp16ToI(t*((2*a0-5*a1+4*a2-a3) */
+/*              + fp16ToI(t*(-a0+3*a1-3*a2+a3)) )) ) */
+/*      ) ) >> 17; */
+/* } */
+
+inline static short bicub_kernel(fp16 t, short a0, short a1, short a2, short a3){
+  // (2*a1 + t*((-a0+a2) + t*((2*a0-5*a1+4*a2-a3) + t*(-a0+3*a1-3*a2+a3) )) ) / 2;
+  // we add 1/2 because of truncation errors
+  return fp16ToIRound((iToFp16(2*a1) + t*(-a0+a2
+                                          + fp16ToIRound(t*((2*a0-5*a1+4*a2-a3)
+                                                            + fp16ToIRound(t*(-a0+3*a1-3*a2+a3)) )) )
+                       ) >> 1);
+}
+
+/** interpolateBiCub: bi-cubic interpolation function using 4x4 pixel, see interpolate */
+inline void interpolateBiCub(uint8_t *rv, fp16 x, fp16 y,
+                             const uint8_t *img, int img_linesize,
+                             int width, int height, uint8_t def)
+{
+  // do a simple linear interpolation at the border
+  int32_t ix_f = fp16ToI(x);
+  int32_t iy_f = fp16ToI(y);
+  if (unlikely(ix_f < 1 || ix_f > width - 3 || iy_f < 1 || iy_f > height - 3)) {
+    interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
+  } else {
+    fp16 x_f = iToFp16(ix_f);
+    fp16 y_f = iToFp16(iy_f);
+    fp16 tx  = x-x_f;
+    short v1 = bicub_kernel(tx,
+                            PIX(img, img_linesize, ix_f-1, iy_f-1),
+                            PIX(img, img_linesize, ix_f,   iy_f-1),
+                            PIX(img, img_linesize, ix_f+1, iy_f-1),
+                            PIX(img, img_linesize, ix_f+2, iy_f-1));
+    short v2 = bicub_kernel(tx,
+                            PIX(img, img_linesize, ix_f-1, iy_f),
+                            PIX(img, img_linesize, ix_f,   iy_f),
+                            PIX(img, img_linesize, ix_f+1, iy_f),
+                            PIX(img, img_linesize, ix_f+2, iy_f));
+    short v3 = bicub_kernel(tx,
+                            PIX(img, img_linesize, ix_f-1, iy_f+1),
+                            PIX(img, img_linesize, ix_f,   iy_f+1),
+                            PIX(img, img_linesize, ix_f+1, iy_f+1),
+                            PIX(img, img_linesize, ix_f+2, iy_f+1));
+    short v4 = bicub_kernel(tx,
+                            PIX(img, img_linesize, ix_f-1, iy_f+2),
+                            PIX(img, img_linesize, ix_f,   iy_f+2),
+                            PIX(img, img_linesize, ix_f+1, iy_f+2),
+                            PIX(img, img_linesize, ix_f+2, iy_f+2));
+    short res = bicub_kernel(y-y_f, v1, v2, v3, v4);
+    *rv = res < 255 ? res : 255;
+  }
+}
+
+
+/** interpolateBiLin: bi-linear interpolation function, see interpolate */
+inline void interpolateBiLin(uint8_t *rv, fp16 x, fp16 y,
+                             const uint8_t *img, int img_linesize,
+                             int32_t width, int32_t height, uint8_t def)
+{
+  int32_t ix_f = fp16ToI(x);
+  int32_t iy_f = fp16ToI(y);
+  if (unlikely(ix_f < 0 || ix_f > width - 2 || iy_f < 0 || iy_f > height - 2)) {
+    interpolateBiLinBorder(rv, x, y, img, img_linesize, width, height, def);
+  } else {
+    int32_t ix_c = ix_f + 1;
+    int32_t iy_c = iy_f + 1;
+    short v1 = PIX(img, img_linesize, ix_c, iy_c);
+    short v2 = PIX(img, img_linesize, ix_c, iy_f);
+    short v3 = PIX(img, img_linesize, ix_f, iy_c);
+    short v4 = PIX(img, img_linesize, ix_f, iy_f);
+    fp16 x_f = iToFp16(ix_f);
+    fp16 x_c = iToFp16(ix_c);
+    fp16 y_f = iToFp16(iy_f);
+    fp16 y_c = iToFp16(iy_c);
+    fp16 s  = fp16To8(v1*(x - x_f) + v3*(x_c - x))*fp16To8(y - y_f) +
+      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y);
+    // it is underestimated due to truncation, so we add one
+    short res = fp16ToI(s);
+    *rv = res < 255 ? res+1 : 255;
+  }
+}
+
+/** interpolateLin: linear (only x) interpolation function, see interpolate */
+inline void interpolateLin(uint8_t *rv, fp16 x, fp16 y,
+                           const uint8_t *img, int img_linesize,
+                           int width, int height, uint8_t def)
+{
+  int32_t ix_f = fp16ToI(x);
+  int32_t ix_c = ix_f + 1;
+  fp16    x_c  = iToFp16(ix_c);
+  fp16    x_f  = iToFp16(ix_f);
+  int     y_n  = fp16ToIRound(y);
+
+  short v1 = PIXEL(img, img_linesize, ix_c, y_n, width, height, def);
+  short v2 = PIXEL(img, img_linesize, ix_f, y_n, width, height, def);
+  fp16 s   = v1*(x - x_f) + v2*(x_c - x);
+  short res = fp16ToI(s);
+  *rv =   res < 255 ? res : 255;
+}
+
+/** interpolateZero: nearest neighbor interpolation function, see interpolate */
+inline void interpolateZero(uint8_t *rv, fp16 x, fp16 y,
+                            const uint8_t *img, int img_linesize,
+                            int width, int height, uint8_t def)
+{
+  int32_t ix_n = fp16ToIRound(x);
+  int32_t iy_n = fp16ToIRound(y);
+  *rv = (uint8_t) PIXEL(img, img_linesize, ix_n, iy_n, width, height, def);
+}
+
+
+/**
+ * interpolateN: Bi-linear interpolation function for N channel image.
+ *
+ * Parameters:
+ *             rv: destination pixel (call by reference)
+ *            x,y: the source coordinates in the image img. Note this
+ *                 are real-value coordinates, that's why we interpolate
+ *            img: source image
+ *   width,height: dimension of image
+ *              N: number of channels
+ *        channel: channel number (0..N-1)
+ *            def: default value if coordinates are out of range
+ * Return value:  None
+ */
+inline void interpolateN(uint8_t *rv, fp16 x, fp16 y,
+                         const uint8_t *img, int img_linesize,
+                         int width, int height,
+                         uint8_t N, uint8_t channel,
+                         uint8_t def)
+{
+  int32_t ix_f = fp16ToI(x);
+  int32_t iy_f = fp16ToI(y);
+  if (ix_f < 0 || ix_f > width-1 || iy_f < 0 || iy_f > height - 1) {
+    *rv = def;
+  } else {
+    int32_t ix_c = ix_f + 1;
+    int32_t iy_c = iy_f + 1;
+    short v1 = PIXN(img, img_linesize, ix_c, iy_c, N, channel);
+    short v2 = PIXN(img, img_linesize, ix_c, iy_f, N, channel);
+    short v3 = PIXN(img, img_linesize, ix_f, iy_c, N, channel);
+    short v4 = PIXN(img, img_linesize, ix_f, iy_f, N, channel);
+    fp16 x_f = iToFp16(ix_f);
+    fp16 x_c = iToFp16(ix_c);
+    fp16 y_f = iToFp16(iy_f);
+    fp16 y_c = iToFp16(iy_c);
+    fp16 s  = fp16To8(v1*(x - x_f)+v3*(x_c - x))*fp16To8(y - y_f) +
+      fp16To8(v2*(x - x_f) + v4*(x_c - x))*fp16To8(y_c - y);
+    *rv = fp16ToIRound(s);
+  }
+}
+
+
+/**
+ * transformPacked: applies current transformation to frame
+ * Parameters:
+ *         td: private data structure of this filter
+ * Return value:
+ *         0 for failture, 1 for success
+ * Preconditions:
+ *  The frame must be in Packed format
+ */
+int transformPacked(VSTransformData* td, VSTransform t)
+{
+  int x = 0, y = 0, k = 0;
+  uint8_t *D_1, *D_2;
+
+  D_1  = td->src.data[0];
+  D_2  = td->destbuf.data[0];
+  fp16 c_s_x = iToFp16(td->fiSrc.width/2);
+  fp16 c_s_y = iToFp16(td->fiSrc.height/2);
+  int32_t c_d_x = td->fiDest.width/2;
+  int32_t c_d_y = td->fiDest.height/2;
+
+  /* for each pixel in the destination image we calc the source
+   * coordinate and make an interpolation:
+   *      p_d = c_d + M(p_s - c_s) + t
+   * where p are the points, c the center coordinate,
+   *  _s source and _d destination,
+   *  t the translation, and M the rotation matrix
+   *      p_s = M^{-1}(p_d - c_d - t) + c_s
+   */
+  float z     = 1.0-t.zoom/100.0;
+  fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
+  fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
+  fp16  c_tx    = c_s_x - fToFp16(t.x);
+  fp16  c_ty    = c_s_y - fToFp16(t.y);
+  int channels = td->fiSrc.bytesPerPixel;
+  /* All channels */
+  for (y = 0; y < td->fiDest.height; y++) {
+    int32_t y_d1 = (y - c_d_y);
+    for (x = 0; x < td->fiDest.width; x++) {
+      int32_t x_d1 = (x - c_d_x);
+      fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
+      fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
+
+      for (k = 0; k < channels; k++) { // iterate over colors
+        uint8_t *dest = &D_2[x + y * td->destbuf.linesize[0]+k];
+        interpolateN(dest, x_s, y_s, D_1, td->src.linesize[0],
+                     td->fiSrc.width, td->fiSrc.height,
+                     channels, k, td->conf.crop ? 16 : *dest);
+      }
+    }
+  }
+  return VS_OK;
+}
+
+/**
+ * transformPlanar: applies current transformation to frame
+ *
+ * Parameters:
+ *         td: private data structure of this filter
+ * Return value:
+ *         0 for failture, 1 for success
+ * Preconditions:
+ *  The frame must be in Planar format
+ *
+ * Fixed-point format 32 bit integer:
+ *  for image coords we use val<<8
+ *  for angle and zoom we use val<<16
+ *
+ */
+int transformPlanar(VSTransformData* td, VSTransform t)
+{
+  int32_t x = 0, y = 0;
+  uint8_t *dat_1, *dat_2;
+
+  if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0){
+    if(vsFramesEqual(&td->src,&td->destbuf))
+      return VS_OK; // noop
+    else {
+      vsFrameCopy(&td->destbuf, &td->src, &td->fiSrc);
+      return VS_OK;
+    }
+  }
+
+  int plane;
+  for(plane=0; plane< td->fiSrc.planes; plane++){
+    dat_1  = td->src.data[plane];
+    dat_2  = td->destbuf.data[plane];
+    int wsub = vsGetPlaneWidthSubS(&td->fiSrc,plane);
+    int hsub = vsGetPlaneHeightSubS(&td->fiSrc,plane);
+    int dw = CHROMA_SIZE(td->fiDest.width , wsub);
+    int dh = CHROMA_SIZE(td->fiDest.height, hsub);
+    int sw = CHROMA_SIZE(td->fiSrc.width  , wsub);
+    int sh = CHROMA_SIZE(td->fiSrc.height , hsub);
+    uint8_t black = plane==0 ? 0 : 0x80;
+
+    fp16 c_s_x = iToFp16(sw / 2);
+    fp16 c_s_y = iToFp16(sh / 2);
+    int32_t c_d_x = dw / 2;
+    int32_t c_d_y = dh / 2;
+
+    float z     = 1.0-t.zoom/100.0;
+    fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos
+    fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin
+    fp16  c_tx    = c_s_x - (fToFp16(t.x) >> wsub);
+    fp16  c_ty    = c_s_y - (fToFp16(t.y) >> hsub);
+
+    /* for each pixel in the destination image we calc the source
+     * coordinate and make an interpolation:
+     *      p_d = c_d + M(p_s - c_s) + t
+     * where p are the points, c the center coordinate,
+     *  _s source and _d destination,
+     *  t the translation, and M the rotation and scaling matrix
+     *      p_s = M^{-1}(p_d - c_d - t) + c_s
+     */
+    for (y = 0; y < dh; y++) {
+      // swapping of the loops brought 15% performace gain
+      int32_t y_d1 = (y - c_d_y);
+      for (x = 0; x < dw; x++) {
+        int32_t x_d1 = (x - c_d_x);
+        fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx;
+        fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty;
+        uint8_t *dest = &dat_2[x + y * td->destbuf.linesize[plane]];
+        // inlining the interpolation function would bring 10%
+        //  (but then we cannot use the function pointer anymore...)
+        td->interpolate(dest, x_s, y_s, dat_1,
+                        td->src.linesize[plane], sw, sh,
+                        td->conf.crop ? black : *dest);
+      }
+    }
+  }
+
+  return VS_OK;
+}
+
+
+
+/* /\** TESTING */
+/*  * transformPlanar_orc: applies current transformation to frame */
+/*  * */
+/*  * Parameters: */
+/*  *         td: private data structure of this filter */
+/*  * Return value:  */
+/*  *         0 for failture, 1 for success */
+/*  * Preconditions: */
+/*  *  The frame must be in Planar format */
+/*  * */
+/*  * Fixed-point format 32 bit integer: */
+/*  *  for image coords we use val<<8 */
+/*  *  for angle and zoom we use val<<16 */
+/*  * */
+/*  *\/ */
+/* int transformPlanar_orc(VSTransformData* td, VSTransform t) */
+/* { */
+/*     int32_t x = 0, y = 0; */
+/*     uint8_t *Y_1, *Y_2, *Cb_1, *Cb_2, *Cr_1, *Cr_2; */
+
+/*     if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0) return VS_OK; // noop */
+
+/*     Y_1  = td->src;   */
+/*     Y_2  = td->destbuf;   */
+/*     Cb_1 = td->src + td->fiSrc.width * td->fiSrc.height; */
+/*     Cb_2 = td->destbuf + td->fiDest.width * td->fiDest.height; */
+/*     Cr_1 = td->src + 5*td->fiSrc.width * td->fiSrc.height/4; */
+/*     Cr_2 = td->destbuf + 5*td->fiDest.width * td->fiDest.height/4; */
+/*     fp16 c_s_x = iToFp16(td->fiSrc.width / 2); */
+/*     fp16 c_s_y = iToFp16(td->fiSrc.height / 2); */
+/*     int32_t c_d_x = td->fiDest.width / 2; */
+/*     int32_t c_d_y = td->fiDest.height / 2;     */
+
+/*     float z     = 1.0-t.zoom/100.0; */
+/*     fp16 zcos_a = fToFp16(z*cos(-t.alpha)); // scaled cos */
+/*     fp16 zsin_a = fToFp16(z*sin(-t.alpha)); // scaled sin */
+/*     fp16  c_tx    = c_s_x - fToFp16(t.x); */
+/*     fp16  c_ty    = c_s_y - fToFp16(t.y); */
+
+/*     /\* for each pixel in the destination image we calc the source */
+/*      * coordinate and make an interpolation:  */
+/*      *      p_d = c_d + M(p_s - c_s) + t  */
+/*      * where p are the points, c the center coordinate,  */
+/*      *  _s source and _d destination,  */
+/*      *  t the translation, and M the rotation and scaling matrix */
+/*      *      p_s = M^{-1}(p_d - c_d - t) + c_s */
+/*      *\/ */
+/*     /\* Luminance channel *\/ */
+/*     fp16* x_ss = (fp16*)malloc(sizeof(fp16)*td->fiDest.width); */
+/*     fp16* y_ss = (fp16*)malloc(sizeof(fp16)*td->fiDest.width);     */
+/*     int32_t* xs = (int32_t*)malloc(sizeof(int32_t)*td->fiDest.width);         */
+/*     for (x = 0; x < td->fiDest.width; x++) { // this can go to td */
+/*       xs[x]=x; */
+/*     } */
+
+/*     for (y = 0; y < td->fiDest.height; y++) { */
+/*       int32_t y_d1 = (y - c_d_y);   */
+/*       fp16 sin_y   = zsin_a * y_d1; */
+/*       fp16 cos_y   = zcos_a * y_d1; */
+/*       for (x = 0; x < td->fiDest.width; x++) { */
+/*         int32_t x_d1 = (xs[x] - c_d_x); */
+/*         //x_ss[x]  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx; */
+/*   y_ss[x]  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty; */
+/*       } */
+/*       transform_one_line_optimized1 (x_ss, y_ss, xs, y_d1, c_d_x,  */
+/*              c_tx, c_ty, zcos_a, zsin_a, sin_y, cos_y,  */
+/*              td->fiDest.width); */
+/*       // transform_one_line_optimized (x_ss, y_ss, xs, y_d1, c_d_x,  */
+/*       //             c_tx, c_ty, zcos_a, zsin_a, td->fiDest.width); */
+
+/*       for (x = 0; x < td->fiDest.width; x++) { */
+/*   uint8_t *dest = &Y_2[x + y * td->fiDest.width]; */
+/*   td->interpolate(dest, x_ss[x], y_ss[x], Y_1,  */
+/*         td->fiSrc.width, td->fiSrc.height,  */
+/*         td->crop ? 16 : *dest); */
+/*       } */
+/*     } */
+
+/*     /\* Color channels *\/ */
+/*     int32_t ws2 = td->fiSrc.width/2; */
+/*     int32_t wd2 = td->fiDest.width/2; */
+/*     int32_t hs2 = td->fiSrc.height/2; */
+/*     int32_t hd2 = td->fiDest.height/2; */
+/*     fp16 c_tx2   = c_tx/2; */
+/*     fp16 c_ty2   = c_ty/2; */
+
+/*     for (y = 0; y < hd2; y++) { */
+/*       int32_t y_d1 = y - (c_d_y)/2; */
+/*       for (x = 0; x < wd2; x++) { */
+/*   int32_t x_d1 = x - (c_d_x)/2; */
+/*   fp16 x_s  =  zcos_a * x_d1 + zsin_a * y_d1 + c_tx2; */
+/*   fp16 y_s  = -zsin_a * x_d1 + zcos_a * y_d1 + c_ty2;  */
+/*   uint8_t *dest = &Cr_2[x + y * wd2]; */
+/*   td->interpolate(dest, x_s, y_s, Cr_1, ws2, hs2,  */
+/*         td->crop ? 128 : *dest); */
+/*   dest = &Cb_2[x + y * wd2]; */
+/*   td->interpolate(dest, x_s, y_s, Cb_1, ws2, hs2,  */
+/*         td->crop ? 128 : *dest); */
+/*       } */
+/*     } */
+
+/*     return VS_OK; */
+/* } */
+
+/*
+  some debugging stuff
+  FILE* f1 = fopen("transFP.pos","w");
+  fprintf(f1,"%i,%i:\t %f,%f\n", x, y, x_s / (float)(1<<16), y_s / (float)(1<<16));
+  fclose(f1);
+
+*/
+
+
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ *
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/transformfixedpoint.h b/src/transformfixedpoint.h
new file mode 100644
index 0000000..519d613
--- /dev/null
+++ b/src/transformfixedpoint.h
@@ -0,0 +1,84 @@
+/*
+ *  transformfixedpoint.h
+ *
+ *  Copyright (C) Georg Martius - June 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  This work is licensed under the Creative Commons
+ *  Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of
+ *  this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/
+ *  or send a letter to Creative Commons, 543 Howard Street, 5th Floor,
+ *  San Francisco, California, 94105, USA.
+ *  This EXCLUDES COMMERCIAL USAGE
+ *
+ */
+#ifndef __TRANSFORMFIXEDPOINT_H
+#define __TRANSFORMFIXEDPOINT_H
+
+#include "transformtype.h"
+#include <stdint.h>
+
+typedef int32_t fp8;
+typedef int32_t fp16; // also ncot definition of interpolFun in transform.h
+
+struct _VSTransformData;
+
+/// does the actual transformation in Packed space
+int transformPacked(struct _VSTransformData* td, VSTransform t);
+
+/// does the actual transformation in Planar space
+int transformPlanar(struct _VSTransformData* td, VSTransform t);
+
+// testing
+/// does the actual transformation in Planar space
+int transformPlanar_orc(struct _VSTransformData* td, VSTransform t);
+
+
+/* forward deklarations, please see .c file for documentation*/
+void interpolateBiLinBorder(uint8_t *rv, fp16 x, fp16 y,
+                            const uint8_t *img, int img_linesize,
+                            int w, int h, uint8_t def);
+void interpolateBiCub(uint8_t *rv, fp16 x, fp16 y,
+                      const uint8_t *img, int img_linesize,
+                      int width, int height, uint8_t def);
+void interpolateBiLin(uint8_t *rv, fp16 x, fp16 y,
+                      const uint8_t *img, int img_linesize,
+                      int w, int h, uint8_t def);
+void interpolateLin(uint8_t *rv, fp16 x, fp16 y,
+                    const uint8_t *img, int img_linesize,
+                    int w, int h, uint8_t def);
+void interpolateZero(uint8_t *rv, fp16 x, fp16 y,
+                     const uint8_t *img, int img_linesize,
+                     int w, int h, uint8_t def);
+void interpolateN(uint8_t *rv, fp16 x, fp16 y,
+                  const uint8_t *img, int img_linesize,
+                  int width, int height,
+                  uint8_t N, uint8_t channel, uint8_t def);
+
+#endif
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */
diff --git a/src/transformfloat.c b/src/transformfloat.c
new file mode 100644
index 0000000..ee66829
--- /dev/null
+++ b/src/transformfloat.c
@@ -0,0 +1,342 @@
+/*
+ *  transformfloat.c
+ *
+ *  Floating point image transformations
+ *
+ *  Copyright (C) Georg Martius - June 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include "transformfloat.h"
+#include "transform.h"
+#include "transformtype_operations.h"
+
+
+/** interpolateBiLinBorder: bi-linear interpolation function that also works at the border.
+    This is used by many other interpolation methods at and outsize the border, see interpolate */
+void _FLT(interpolateBiLinBorder)(uint8_t *rv, float x, float y,
+                                  const uint8_t *img, int img_linesize,
+                                  int width, int height, uint8_t def)
+{
+  int x_f = myfloor(x);
+  int x_c = x_f+1;
+  int y_f = myfloor(y);
+  int y_c = y_f+1;
+  short v1 = PIXEL(img, img_linesize, x_c, y_c, width, height, def);
+  short v2 = PIXEL(img, img_linesize, x_c, y_f, width, height, def);
+  short v3 = PIXEL(img, img_linesize, x_f, y_c, width, height, def);
+  short v4 = PIXEL(img, img_linesize, x_f, y_f, width, height, def);
+  float s  = (v1*(x - x_f)+v3*(x_c - x))*(y - y_f) +
+    (v2*(x - x_f) + v4*(x_c - x))*(y_c - y);
+  *rv = (uint8_t)s;
+}
+
+/** taken from http://en.wikipedia.org/wiki/Bicubic_interpolation for alpha=-0.5
+    in matrix notation:
+    a0-a3 are the neigthboring points where the target point is between a1 and a2
+    t is the point of interpolation (position between a1 and a2) value between 0 and 1
+    | 0, 2, 0, 0 |  |a0|
+    |-1, 0, 1, 0 |  |a1|
+    (1,t,t^2,t^3) | 2,-5, 4,-1 |  |a2|
+    |-1, 3,-3, 1 |  |a3|
+*/
+static short _FLT(bicub_kernel)(float t, short a0, short a1, short a2, short a3){
+  return (2*a1 + t*((-a0+a2) + t*((2*a0-5*a1+4*a2-a3) + t*(-a0+3*a1-3*a2+a3) )) ) / 2;
+}
+
+/** interpolateBiCub: bi-cubic interpolation function using 4x4 pixel, see interpolate */
+void _FLT(interpolateBiCub)(uint8_t *rv, float x, float y,
+                            const uint8_t *img, int img_linesize,
+                            int width, int height, uint8_t def)
+{
+  // do a simple linear interpolation at the border
+  if (x < 1 || x > width - 2 || y < 1 || y > height - 2) {
+    _FLT(interpolateBiLinBorder)(rv, x, y, img, img_linesize, width, height, def);
+  } else {
+    int x_f = myfloor(x);
+    int y_f = myfloor(y);
+    float tx = x-x_f;
+    short v1 = _FLT(bicub_kernel)(tx,
+                                  PIX(img, img_linesize, x_f-1, y_f-1),
+                                  PIX(img, img_linesize, x_f,   y_f-1),
+                                  PIX(img, img_linesize, x_f+1, y_f-1),
+                                  PIX(img, img_linesize, x_f+2, y_f-1));
+    short v2 = _FLT(bicub_kernel)(tx,
+                                  PIX(img, img_linesize, x_f-1, y_f),
+                                  PIX(img, img_linesize, x_f,   y_f),
+                                  PIX(img, img_linesize, x_f+1, y_f),
+                                  PIX(img, img_linesize, x_f+2, y_f));
+    short v3 = _FLT(bicub_kernel)(tx,
+                                  PIX(img, img_linesize, x_f-1, y_f+1),
+                                  PIX(img, img_linesize, x_f,   y_f+1),
+                                  PIX(img, img_linesize, x_f+1, y_f+1),
+                                  PIX(img, img_linesize, x_f+2, y_f+1));
+    short v4 = _FLT(bicub_kernel)(tx,
+                                  PIX(img, img_linesize, x_f-1, y_f+2),
+                                  PIX(img, img_linesize, x_f,   y_f+2),
+                                  PIX(img, img_linesize, x_f+1, y_f+2),
+                                  PIX(img, img_linesize, x_f+2, y_f+2));
+    *rv = (uint8_t)_FLT(bicub_kernel)(y-y_f, v1, v2, v3, v4);
+  }
+}
+
+
+/** interpolateBiLin: bi-linear interpolation function, see interpolate */
+void _FLT(interpolateBiLin)(uint8_t *rv, float x, float y,
+                            const uint8_t *img, int img_linesize,
+                            int width, int height, uint8_t def)
+{
+  if (x < 0 || x > width - 1 || y < 0 || y > height - 1) {
+    _FLT(interpolateBiLinBorder)(rv, x, y, img, img_linesize, width, height, def);
+  } else {
+    int x_f = myfloor(x);
+    int x_c = x_f+1;
+    int y_f = myfloor(y);
+    int y_c = y_f+1;
+    short v1 = PIX(img, img_linesize, x_c, y_c);
+    short v2 = PIX(img, img_linesize, x_c, y_f);
+    short v3 = PIX(img, img_linesize, x_f, y_c);
+    short v4 = PIX(img, img_linesize, x_f, y_f);
+    float s  = (v1*(x - x_f)+v3*(x_c - x))*(y - y_f) +
+      (v2*(x - x_f) + v4*(x_c - x))*(y_c - y);
+    *rv = (uint8_t)s;
+  }
+}
+
+
+/** interpolateLin: linear (only x) interpolation function, see interpolate */
+void _FLT(interpolateLin)(uint8_t *rv, float x, float y,
+                          const uint8_t *img, int img_linesize,
+                          int width, int height, uint8_t def)
+{
+  int x_f = myfloor(x);
+  int x_c = x_f+1;
+  int y_n = myround(y);
+  float v1 = PIXEL(img, img_linesize, x_c, y_n, width, height, def);
+  float v2 = PIXEL(img, img_linesize, x_f, y_n, width, height, def);
+  float s  = v1*(x - x_f) + v2*(x_c - x);
+  *rv = (uint8_t)s;
+}
+
+/** interpolateZero: nearest neighbor interpolation function, see interpolate */
+void _FLT(interpolateZero)(uint8_t *rv, float x, float y,
+                           const uint8_t *img, int img_linesize,
+                           int width, int height, uint8_t def)
+{
+  int x_n = myround(x);
+  int y_n = myround(y);
+  *rv = (uint8_t) PIXEL(img, img_linesize, x_n, y_n, width, height, def);
+}
+
+
+/**
+ * interpolateN: Bi-linear interpolation function for N channel image.
+ *
+ * Parameters:
+ *             rv: destination pixel (call by reference)
+ *            x,y: the source coordinates in the image img. Note this
+ *                 are real-value coordinates, that's why we interpolate
+ *            img: source image
+ *   width,height: dimension of image
+ *              N: number of channels
+ *        channel: channel number (0..N-1)
+ *            def: default value if coordinates are out of range
+ * Return value:  None
+ */
+void _FLT(interpolateN)(uint8_t *rv, float x, float y,
+                        const uint8_t *img, int img_linesize,
+                        int width, int height,
+                        uint8_t N, uint8_t channel,
+                        uint8_t def)
+{
+  if (x < - 1 || x > width || y < -1 || y > height) {
+    *rv = def;
+  } else {
+    int x_f = myfloor(x);
+    int x_c = x_f+1;
+    int y_f = myfloor(y);
+    int y_c = y_f+1;
+    short v1 = PIXELN(img, img_linesize, x_c, y_c, width, height, N, channel, def);
+    short v2 = PIXELN(img, img_linesize, x_c, y_f, width, height, N, channel, def);
+    short v3 = PIXELN(img, img_linesize, x_f, y_c, width, height, N, channel, def);
+    short v4 = PIXELN(img, img_linesize, x_f, y_f, width, height, N, channel, def);
+    float s  = (v1*(x - x_f)+v3*(x_c - x))*(y - y_f) +
+      (v2*(x - x_f) + v4*(x_c - x))*(y_c - y);
+    *rv = (uint8_t)s;
+  }
+}
+
+
+/**
+ * transformPacked: applies current transformation to frame
+ * Parameters:
+ *         td: private data structure of this filter
+ * Return value:
+ *         0 for failture, 1 for success
+ * Preconditions:
+ *  The frame must be in Packed format
+ /// TODO Add zoom!
+ /// Add bytes per pixel usage
+ */
+int _FLT(transformPacked)(VSTransformData* td, VSTransform t)
+{
+  int x = 0, y = 0, z = 0;
+  uint8_t *D_1, *D_2;
+  char crop = td->conf.crop;
+
+  D_1  = td->src.data[0];
+  D_2  = td->destbuf.data[0];
+  float c_s_x = td->fiSrc.width/2.0;
+  float c_s_y = td->fiSrc.height/2.0;
+  float c_d_x = td->fiDest.width/2.0;
+  float c_d_y = td->fiDest.height/2.0;
+
+  /* for each pixel in the destination image we calc the source
+   * coordinate and make an interpolation:
+   *      p_d = c_d + M(p_s - c_s) + t
+   * where p are the points, c the center coordinate,
+   *  _s source and _d destination,
+   *  t the translation, and M the rotation matrix
+   *      p_s = M^{-1}(p_d - c_d - t) + c_s
+   */
+  int channels = td->fiSrc.bytesPerPixel;
+  /* All channels */
+  if (fabs(t.alpha) > 0.1*M_PI/180.0) { // 0.1 deg
+    for (x = 0; x < td->fiDest.width; x++) {
+      for (y = 0; y < td->fiDest.height; y++) {
+        float x_d1 = (x - c_d_x);
+        float y_d1 = (y - c_d_y);
+        float x_s  =  cos(-t.alpha) * x_d1
+          + sin(-t.alpha) * y_d1 + c_s_x -t.x;
+        float y_s  = -sin(-t.alpha) * x_d1
+          + cos(-t.alpha) * y_d1 + c_s_y -t.y;
+        for (z = 0; z < channels; z++) { // iterate over colors
+          uint8_t *dest = &D_2[x + y * td->destbuf.linesize[0]+z];
+          _FLT(interpolateN)(dest, x_s, y_s, D_1, td->src.linesize[0],
+                             td->fiSrc.width, td->fiSrc.height,
+                             channels, z, crop ? 16 : *dest);
+        }
+      }
+    }
+  }else {
+    /* no rotation, just translation
+     *(also no interpolation, since no size change (so far)
+     */
+    int round_tx = myround(t.x);
+    int round_ty = myround(t.y);
+    for (x = 0; x < td->fiDest.width; x++) {
+      for (y = 0; y < td->fiDest.height; y++) {
+        for (z = 0; z < channels; z++) { // iterate over colors
+          short p = PIXELN(D_1, td->src.linesize[0], x - round_tx, y - round_ty,
+                           td->fiSrc.width, td->fiSrc.height, channels, z, -1);
+          if (p == -1) {
+            if (crop == 1)
+              D_2[(x + y * td->destbuf.linesize[0])*channels+z] = 16;
+          } else {
+            D_2[(x + y * td->destbuf.linesize[0])*channels+z] = (uint8_t)p;
+          }
+        }
+      }
+    }
+  }
+  return 1;
+}
+
+/**
+ * transformPlanar: applies current transformation to frame
+ *
+ * Parameters:
+ *         td: private data structure of this filter
+ * Return value:
+ *         0 for failture, 1 for success
+ * Preconditions:
+ *  The frame must be in Planar format
+ */
+int _FLT(transformPlanar)(VSTransformData* td, VSTransform t)
+{
+  int x = 0, y = 0;
+  uint8_t *dat_1, *dat_2;
+  char crop = td->conf.crop;
+
+  if (t.alpha==0 && t.x==0 && t.y==0 && t.zoom == 0){
+    if(vsFramesEqual(&td->src,&td->destbuf))
+      return VS_OK; // noop
+    else {
+      vsFrameCopy(&td->destbuf, &td->src, &td->fiSrc);
+      return VS_OK;
+    }
+  }
+  int plane;
+  for(plane=0; plane< td->fiSrc.planes; plane++){
+    dat_1  = td->src.data[plane];
+    dat_2  = td->destbuf.data[plane];
+
+    int wsub = vsGetPlaneWidthSubS(&td->fiSrc,plane);
+    int hsub = vsGetPlaneHeightSubS(&td->fiSrc,plane);
+    float c_s_x = (td->fiSrc.width  >> wsub)/2.0;
+    float c_s_y = (td->fiSrc.height >> hsub)/2.0;
+    float c_d_x = (td->fiDest.width >> wsub)/2.0;
+    float c_d_y = (td->fiDest.height>> hsub)/2.0;
+    uint8_t black = plane==0 ? 0 : 0x80;
+
+    float z = 1.0-t.zoom/100;
+    float zcos_a = z*cos(-t.alpha); // scaled cos
+    float zsin_a = z*sin(-t.alpha); // scaled sin
+    float tx = t.x / (float)(1 << wsub);
+    float ty = t.y / (float)(1 << hsub);
+
+    /* for each pixel in the destination image we calc the source
+     * coordinate and make an interpolation:
+     *      p_d = c_d + M(p_s - c_s) + t
+     * where p are the points, c the center coordinate,
+     *  _s source and _d destination,
+     *  t the translation, and M the rotation and scaling matrix
+     *      p_s = M^{-1}(p_d - c_d - t) + c_s
+     */
+    int w = CHROMA_SIZE(td->fiDest.width,wsub);
+    int h = CHROMA_SIZE(td->fiDest.height,hsub);
+    int sw = CHROMA_SIZE(td->fiSrc.width,wsub);
+    int sh = CHROMA_SIZE(td->fiSrc.height,hsub);
+    for (x = 0; x < w; x++) {
+      for (y = 0; y < h; y++) {
+        float x_d1 = (x - c_d_x);
+        float y_d1 = (y - c_d_y);
+        float x_s  =  zcos_a * x_d1
+          + zsin_a * y_d1 + c_s_x -tx;
+        float y_s  = -zsin_a * x_d1
+          + zcos_a * y_d1 + c_s_y -ty;
+        uint8_t *dest = &dat_2[x + y * td->destbuf.linesize[plane]];
+        td->_FLT(interpolate)(dest, x_s, y_s, dat_1, td->src.linesize[plane],
+                              sw, sh, crop ? black : *dest);
+      }
+    }
+  }
+  return VS_OK;
+}
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/transformfloat.h b/src/transformfloat.h
new file mode 100644
index 0000000..9799417
--- /dev/null
+++ b/src/transformfloat.h
@@ -0,0 +1,98 @@
+/*
+ *  transformfloat.h
+ *
+ *  Copyright (C) Georg Martius - June 2011
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  This work is licensed under the Creative Commons
+ *  Attribution-NonCommercial-ShareAlike 2.5 License. To view a copy of
+ *  this license, visit http://creativecommons.org/licenses/by-nc-sa/2.5/
+ *  or send a letter to Creative Commons, 543 Howard Street, 5th Floor,
+ *  San Francisco, California, 94105, USA.
+ *  This EXCLUDES COMMERCIAL USAGE
+ *
+ */
+#ifndef __TRANSFORMFLOAT_H
+#define __TRANSFORMFLOAT_H
+
+#include "transformtype.h"
+#include <stdint.h>
+
+#ifdef TESTING
+#define _FLT(n) n ## _float
+#else
+#define _FLT(n) n
+#endif
+
+struct _VSTransformData;
+
+/// does the actual transformation in Packed space
+int _FLT(transformPacked)(struct _VSTransformData* td, VSTransform t);
+/// does the actual transformation in Planar space
+int _FLT(transformPlanar)(struct _VSTransformData* td, VSTransform t);
+
+/**
+ * interpolate: general interpolation function pointer for one channel image data
+ *
+ * Parameters:
+ *             rv: destination pixel (call by reference)
+ *            x,y: the source coordinates in the image img. Note this
+ *                 are real-value coordinates, that's why we interpolate
+ *            img: source image
+ *   img_linesize: length of one line in bytes (>= width)
+ *   width,height: dimension of image
+ *            def: default value if coordinates are out of range
+ * Return value:  None
+ */
+typedef void (*_FLT(vsInterpolateFun))(uint8_t *rv, float x, float y,
+                                       const uint8_t *img, int img_linesize,
+                                       int width, int height, uint8_t def);
+
+/* forward deklarations, please look in the .c file for documentation*/
+void _FLT(interpolateBiLinBorder)(uint8_t *rv, float x, float y,
+                                  const uint8_t *img, int img_linesize,
+                                  int w, int h, uint8_t def);
+void _FLT(interpolateBiCub)(uint8_t *rv, float x, float y,
+                            const uint8_t *img, int img_linesize,
+                            int width, int height, uint8_t def);
+void _FLT(interpolateBiLin)(uint8_t *rv, float x, float y,
+                            const uint8_t *img, int img_linesize,
+                            int w, int h, uint8_t def);
+void _FLT(interpolateLin)(uint8_t *rv, float x, float y,
+                          const uint8_t *img, int img_linesize,
+                          int w, int h, uint8_t def);
+void _FLT(interpolateZero)(uint8_t *rv, float x, float y,
+                           const uint8_t *img, int img_linesize,
+                           int w, int h, uint8_t def);
+void _FLT(interpolateN)(uint8_t *rv, float x, float y,
+                        const uint8_t *img, int img_linesize,
+                        int width, int height,
+                        uint8_t N, uint8_t channel, uint8_t def);
+
+#endif
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */
diff --git a/src/transformtype.c b/src/transformtype.c
new file mode 100644
index 0000000..f88fd81
--- /dev/null
+++ b/src/transformtype.c
@@ -0,0 +1,483 @@
+/*
+ *  transformtype.c
+ *
+ *  Copyright (C) Georg Martius - June 2007
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "transformtype.h"
+#include "transformtype_operations.h"
+#include "vidstabdefines.h"
+
+/***********************************************************************
+ * helper functions to create and operate with transforms.
+ * all functions are non-destructive
+ */
+
+/* create an initialized transform*/
+VSTransform new_transform(double x, double y, double alpha,
+                          double zoom, double barrel, double rshutter, int extra)
+{
+  VSTransform t;
+  t.x        = x;
+  t.y        = y;
+  t.alpha    = alpha;
+  t.zoom     = zoom;
+  t.barrel   = barrel;
+  t.rshutter = rshutter;
+  t.extra    = extra;
+  return t;
+}
+
+/* create a zero initialized transform*/
+VSTransform null_transform(void)
+{
+  return new_transform(0, 0, 0, 0, 0, 0, 0);
+}
+
+/* adds two transforms */
+VSTransform add_transforms(const VSTransform* t1, const VSTransform* t2)
+{
+  VSTransform t;
+  t.x        = t1->x + t2->x;
+  t.y        = t1->y + t2->y;
+  t.alpha    = t1->alpha + t2->alpha;
+  t.zoom     = t1->zoom + t2->zoom;
+  t.barrel   = t1->barrel + t2->barrel;
+  t.rshutter = t1->rshutter + t2->rshutter;
+  t.extra    = t1->extra || t2->extra;
+  return t;
+}
+
+/* like add_transform but with non-pointer signature */
+VSTransform add_transforms_(const VSTransform t1, const VSTransform t2)
+{
+  return add_transforms(&t1, &t2);
+}
+
+/* subtracts two transforms */
+VSTransform sub_transforms(const VSTransform* t1, const VSTransform* t2)
+{
+  VSTransform t;
+  t.x        = t1->x - t2->x;
+  t.y        = t1->y - t2->y;
+  t.alpha    = t1->alpha - t2->alpha;
+  t.zoom     = t1->zoom - t2->zoom;
+  t.barrel   = t1->barrel - t2->barrel;
+  t.rshutter = t1->rshutter - t2->rshutter;
+  t.extra    = t1->extra || t2->extra;
+  return t;
+}
+
+/* multiplies a transforms with a scalar */
+VSTransform mult_transform(const VSTransform* t1, double f)
+{
+  VSTransform t;
+  t.x        = t1->x        * f;
+  t.y        = t1->y        * f;
+  t.alpha    = t1->alpha    * f;
+  t.zoom     = t1->zoom     * f;
+  t.barrel   = t1->barrel   * f;
+  t.rshutter = t1->rshutter * f;
+  t.extra    = t1->extra;
+  return t;
+}
+
+/* like mult_transform but with non-pointer signature */
+VSTransform mult_transform_(const VSTransform t1, double f)
+{
+  return mult_transform(&t1,f);
+}
+
+void storeVSTransform(FILE* f, const VSTransform* t){
+  fprintf(f,"Trans %lf %lf %lf %lf %i\n", t->x, t->y, t->alpha, t->zoom, t->extra);
+}
+
+
+PreparedTransform prepare_transform(const VSTransform* t, const VSFrameInfo* fi){
+  PreparedTransform pt;
+  pt.t = t;
+  double z = 1.0+t->zoom/100.0;
+  pt.zcos_a = z*cos(t->alpha); // scaled cos
+  pt.zsin_a = z*sin(t->alpha); // scaled sin
+  pt.c_x    = fi->width / 2;
+  pt.c_y    = fi->height / 2;
+  return pt;
+}
+
+Vec transform_vec(const PreparedTransform* pt, const Vec* v){
+  double x,y;
+  transform_vec_double(&x, &y, pt, v);
+  Vec res = {x,y};
+  return res;
+}
+
+void transform_vec_double(double* x, double* y, const PreparedTransform* pt, const Vec* v){
+  double rx = v->x - pt->c_x;
+  double ry = v->y - pt->c_y;
+  *x =  pt->zcos_a * rx + pt->zsin_a * ry + pt->t->x + pt->c_x;
+  *y = -pt->zsin_a * rx + pt->zcos_a * ry + pt->t->y + pt->c_y;
+}
+
+Vec sub_vec(Vec v1, Vec v2){
+  Vec r = {v1.x - v2.x, v1.y - v2.y};
+  return r;
+}
+Vec add_vec(Vec v1, Vec v2){
+  Vec r = {v1.x + v2.x, v1.y + v2.y};
+  return r;
+}
+Vec field_to_vec(Field f){
+  Vec r = {f.x , f.y};
+  return r;
+}
+
+/* compares a transform with respect to x (for sort function) */
+int cmp_trans_x(const void *t1, const void* t2)
+{
+  double a = ((VSTransform*)t1)->x;
+  double b = ((VSTransform*)t2)->x;
+  return a < b ? -1 : ( a > b ? 1 : 0 );
+}
+
+/* compares a transform with respect to y (for sort function) */
+int cmp_trans_y(const void *t1, const void* t2)
+{
+  double a = ((VSTransform*)t1)->y;
+  double b = ((VSTransform*)t2)->y;
+  return a < b ? -1 : ( a > b ? 1: 0 );
+}
+
+/* static int cmp_trans_alpha(const void *t1, const void* t2){ */
+/*   double a = ((VSTransform*)t1)->alpha; */
+/*   double b = ((VSTransform*)t2)->alpha; */
+/*   return a < b ? -1 : ( a > b ? 1 : 0 ); */
+/* } */
+
+
+/* compares two double values (for sort function)*/
+int cmp_double(const void *t1, const void* t2)
+{
+  double a = *((double*)t1);
+  double b = *((double*)t2);
+  return a < b ? -1 : ( a > b ? 1 : 0 );
+}
+
+/* compares two int values (for sort function)*/
+int cmp_int(const void *t1, const void* t2)
+{
+  int a = *((int*)t1);
+  int b = *((int*)t2);
+  return a < b ? -1 : ( a > b ? 1 : 0 );
+}
+
+/**
+ * median_xy_transform: calulcates the median of an array
+ * of transforms, considering only x and y
+ *
+ * Parameters:
+ *    transforms: array of transforms.
+ *           len: length  of array
+ * Return value:
+ *     A new transform with x and y beeing the median of
+ *     all transforms. alpha and other fields are 0.
+ * Preconditions:
+ *     len>0
+ * Side effects:
+ *     None
+ */
+VSTransform median_xy_transform(const VSTransform* transforms, int len)
+{
+  VSTransform* ts = vs_malloc(sizeof(VSTransform) * len);
+  VSTransform t   = null_transform();
+  memcpy(ts,transforms, sizeof(VSTransform)*len );
+  int half = len/2;
+  qsort(ts, len, sizeof(VSTransform), cmp_trans_x);
+  t.x = len % 2 == 0 ? ts[half].x : (ts[half].x + ts[half+1].x)/2;
+  qsort(ts, len, sizeof(VSTransform), cmp_trans_y);
+  t.y = len % 2 == 0 ? ts[half].y : (ts[half].y + ts[half+1].y)/2;
+  vs_free(ts);
+  return t;
+}
+
+/**
+ * cleanmean_xy_transform: calulcates the cleaned mean of an array
+ * of transforms, considering only x and y
+ *
+ * Parameters:
+ *    transforms: array of transforms.
+ *           len: length  of array
+ * Return value:
+ *     A new transform with x and y beeing the cleaned mean
+ *     (meaning upper and lower pentile are removed) of
+ *     all transforms. alpha and other fields are 0.
+ * Preconditions:
+ *     len>0
+ * Side effects:
+ *     None
+ */
+VSTransform cleanmean_xy_transform(const VSTransform* transforms, int len)
+{
+  VSTransform* ts = vs_malloc(sizeof(VSTransform) * len);
+  VSTransform t = null_transform();
+  int i, cut = len / 5;
+  memcpy(ts, transforms, sizeof(VSTransform) * len);
+  qsort(ts,len, sizeof(VSTransform), cmp_trans_x);
+  for (i = cut; i < len - cut; i++){ // all but cutted
+    t.x += ts[i].x;
+  }
+  qsort(ts, len, sizeof(VSTransform), cmp_trans_y);
+  for (i = cut; i < len - cut; i++){ // all but cutted
+    t.y += ts[i].y;
+  }
+  vs_free(ts);
+  return mult_transform(&t, 1.0 / (len - (2.0 * cut)));
+}
+
+
+/**
+ * calulcates the cleaned maximum and minimum of an array of transforms,
+ * considerung only x and y
+ * It cuts off the upper and lower x-th percentil
+ *
+ * Parameters:
+ *    transforms: array of transforms.
+ *           len: length  of array
+ *     percentil: the x-th percentil to cut off
+ *           min: pointer to min (return value)
+ *           max: pointer to max (return value)
+ * Return value:
+ *     call by reference in min and max
+ * Preconditions:
+ *     len>0, 0<=percentil<50
+ * Side effects:
+ *     only on min and max
+ */
+void cleanmaxmin_xy_transform(const VSTransform* transforms, int len,
+                              int percentil,
+                              VSTransform* min, VSTransform* max){
+  VSTransform* ts = vs_malloc(sizeof(VSTransform) * len);
+  int cut = len * percentil / 100;
+  memcpy(ts, transforms, sizeof(VSTransform) * len);
+  qsort(ts,len, sizeof(VSTransform), cmp_trans_x);
+  min->x = ts[cut].x;
+  max->x = ts[len-cut-1].x;
+  qsort(ts, len, sizeof(VSTransform), cmp_trans_y);
+  min->y = ts[cut].y;
+  max->y = ts[len-cut-1].y;
+  vs_free(ts);
+}
+
+/* calculates the required zoom value to have no borders visible
+ */
+double transform_get_required_zoom(const VSTransform* transform, int width, int height){
+  return 100.0*(2.0*VS_MAX(fabs(transform->x)/width,fabs(transform->y)/height)  // translation part
+                + fabs(sin(transform->alpha)));          // rotation part
+
+}
+
+
+/**
+ * media: median of a double array
+ *
+ * Parameters:
+ *            ds: array of values
+ *           len: length  of array
+ * Return value:
+ *     the median value of the array
+ * Preconditions: len>0
+ * Side effects:  ds will be sorted!
+ */
+double median(double* ds, int len)
+{
+  int half=len/2;
+  qsort(ds,len, sizeof(double), cmp_double);
+  return len % 2 == 0 ? ds[half] : (ds[half] + ds[half+1])/2;
+}
+
+
+/** square of a number */
+double sqr(double x){ return x*x; }
+
+/**
+ * mean: mean of a double array
+ *
+ * Parameters:
+ *            ds: array of values
+ *           len: length  of array
+ * Return value: the mean value of the array
+ * Preconditions: len>0
+ * Side effects:  None
+ */
+double mean(const double* ds, int len)
+{
+  double sum=0;
+  int i = 0;
+  for (i = 0; i < len; i++)
+    sum += ds[i];
+  return sum / len;
+}
+
+/**
+ * stddev: standard deviation of a double array
+ *
+ * Parameters:
+ *            ds: array of values
+ *           len: length  of array
+ *          mean: mean of the array (@see mean())
+ * Return value: the standard deviation value of the array
+ * Preconditions: len>0
+ * Side effects:  None
+ */
+double stddev(const double* ds, int len, double mean)
+{
+  double sum=0;
+  int i = 0;
+  for (i = 0; i < len; i++)
+    sum += sqr(ds[i]-mean);
+  return sqrt(sum / len);
+}
+
+/**
+ * cleanmean: mean with cutted upper and lower pentile
+ *
+ * Parameters:
+ *            ds: array of values
+ *           len: length  of array
+ *       minimum: minimal value (after cleaning) if not NULL
+ *       maximum: maximal value (after cleaning) if not NULL
+ * Return value:
+ *     the mean value of the array without the upper
+ *     and lower pentile (20% each)
+ *     and minimum and maximum without the pentiles
+ * Preconditions: len>0
+ * Side effects:  ds will be sorted!
+ */
+double cleanmean(double* ds, int len, double* minimum, double* maximum)
+{
+  int cut    = len / 5;
+  double sum = 0;
+  int i      = 0;
+  qsort(ds, len, sizeof(double), cmp_double);
+  for (i = cut; i < len - cut; i++) { // all but first and last
+    sum += ds[i];
+  }
+  if (minimum)
+    *minimum = ds[cut];
+  if (maximum)
+    *maximum = ds[len-cut-1];
+  return sum / (len - (2.0 * cut));
+}
+
+/************************************************/
+/***************LOCALMOTION**********************/
+
+LocalMotion null_localmotion(){
+  LocalMotion lm;
+  memset(&lm,0,sizeof(lm));
+  return lm;
+}
+
+int* localmotions_getx(const LocalMotions* localmotions){
+  int len = vs_vector_size(localmotions);
+  int* xs = vs_malloc(sizeof(int) * len);
+  int i;
+  for (i=0; i<len; i++){
+    xs[i]=LMGet(localmotions,i)->v.x;
+  }
+  return xs;
+}
+
+int* localmotions_gety(const LocalMotions* localmotions){
+  int len = vs_vector_size(localmotions);
+  int* ys = vs_malloc(sizeof(int) * len);
+  int i;
+  for (i=0; i<len; i++){
+    ys[i]=LMGet(localmotions,i)->v.y;
+  }
+  return ys;
+}
+
+LocalMotion sub_localmotion(const LocalMotion* lm1, const LocalMotion* lm2){
+  LocalMotion res = *lm1;
+  res.v.x -= lm2->v.x;
+  res.v.y -= lm2->v.y;
+  return res;
+}
+
+
+/**
+ * cleanmean_localmotions: calulcates the cleaned mean of a vector
+ * of local motions considering
+ *
+ * Parameters:
+ *    localmotions : vs_vector of local motions
+ * Return value:
+ *     A localmotion with vec with x and y being the cleaned mean
+ *     (meaning upper and lower pentile are removed) of
+ *     all local motions. all other fields are 0.
+ * Preconditions:
+ *     size of vector >0
+ * Side effects:
+ *     None
+ */
+LocalMotion cleanmean_localmotions(const LocalMotions* localmotions)
+{
+  int len = vs_vector_size(localmotions);
+  int i, cut = len / 5;
+  int* xs = localmotions_getx(localmotions);
+  int* ys = localmotions_gety(localmotions);
+  LocalMotion m = null_localmotion();
+  m.v.x=0; m.v.y=0;
+  qsort(xs,len, sizeof(int), cmp_int);
+  for (i = cut; i < len - cut; i++){ // all but cutted
+    m.v.x += xs[i];
+  }
+  qsort(ys, len, sizeof(int), cmp_int);
+  for (i = cut; i < len - cut; i++){ // all but cutted
+    m.v.y += ys[i];
+  }
+  vs_free(xs);
+  vs_free(ys);
+  m.v.x/=(len - (2.0 * cut));
+  m.v.y/=(len - (2.0 * cut));
+  return m;
+}
+
+VSArray localmotionsGetMatch(const LocalMotions* localmotions){
+  VSArray m = vs_array_new(vs_vector_size(localmotions));
+  for (int i=0; i<m.len; i++){
+    m.dat[i]=LMGet(localmotions,i)->match;
+  }
+  return m;
+}
+
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/transformtype.h b/src/transformtype.h
new file mode 100644
index 0000000..2439bff
--- /dev/null
+++ b/src/transformtype.h
@@ -0,0 +1,80 @@
+/*
+ *  transform.h
+ *
+ *  Copyright (C) Georg Martius - June 2007 - 2013
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef __TRANSFORMTYPE_H
+#define __TRANSFORMTYPE_H
+
+#include <stdio.h>
+#include "vsvector.h"
+
+/* structure to hold information about frame transformations
+   x,y are translations, alpha is a rotation around the center in RAD,
+   zoom is a percentage to zoom in and
+   extra is for additional information like scene cut (unused)
+ */
+typedef struct _transform {
+    double x;
+    double y;
+    double alpha;
+    double zoom;
+    double barrel;
+    double rshutter;
+    int extra;    /* -1: ignore transform (only internal use);
+                     0 for normal trans; 1 for inter scene cut (unused) */
+} VSTransform;
+
+/** stores x y and size of a measurement field */
+typedef struct _field {
+  int x;     // middle position x
+  int y;     // middle position y
+  int size;  // size of field
+} Field;
+
+/** stores x y coordinates (integer) */
+typedef struct _vec {
+  int x;     // middle position x
+  int y;     // middle position y
+} Vec;
+
+/* structure to hold information about local motion.
+ */
+typedef struct _localmotion {
+    Vec v;
+    Field f;
+    double contrast; // local contrast of the measurement field
+    double match;    // quality of match
+} LocalMotion;
+
+typedef VSVector LocalMotions;
+
+#endif
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/transformtype_operations.h b/src/transformtype_operations.h
new file mode 100644
index 0000000..d41adbe
--- /dev/null
+++ b/src/transformtype_operations.h
@@ -0,0 +1,169 @@
+/*
+ *  transformtype_operations.h
+ *
+ *  Copyright (C) Georg Martius - June 2007 - 2013
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+#ifndef __TRANSFORMTYPE_OPERATIONS_H
+#define __TRANSFORMTYPE_OPERATIONS_H
+
+#include "transformtype.h"
+#include "vidstabdefines.h"
+#include "vsvector.h"
+#include "frameinfo.h"
+
+/// helper macro to access a localmotion in the VSVector
+#define LMGet(localmotions,index) \
+    ((LocalMotion*)vs_vector_get(localmotions,index))
+
+/* helper functions to create and operate with transforms.
+ * all functions are non-destructive
+ * the "_" version uses non-pointer Transforms. This is slower
+ * but useful when cascading calculations like
+ * add_transforms_(mult_transform(&t1, 5.0), &t2)
+ */
+VSTransform null_transform(void);
+VSTransform new_transform(double x, double y, double alpha,
+                          double zoom, double barrel, double rshutter, int extra);
+VSTransform add_transforms(const VSTransform* t1, const VSTransform* t2);
+VSTransform add_transforms_(const VSTransform t1, const VSTransform t2);
+VSTransform sub_transforms(const VSTransform* t1, const VSTransform* t2);
+VSTransform mult_transform(const VSTransform* t1, double f);
+VSTransform mult_transform_(const VSTransform t1, double f);
+
+void storeVSTransform(FILE* f, const VSTransform* t);
+
+
+typedef struct _preparedtransform {
+  const VSTransform* t;
+  double zcos_a;
+  double zsin_a;
+  double c_x;
+  double c_y;
+} PreparedTransform;
+
+// transforms vector
+PreparedTransform prepare_transform(const VSTransform* t, const VSFrameInfo* fi);
+// transforms vector (attention, only integer)
+Vec transform_vec(const PreparedTransform* t, const Vec* v);
+void transform_vec_double(double *x, double* y, const PreparedTransform* t, const Vec* v);
+
+// subtract two vectors
+Vec sub_vec(Vec v1, Vec v2);
+// adds two vectors
+Vec add_vec(Vec v1, Vec v2);
+Vec field_to_vec(Field f);
+
+/* compares a transform with respect to x (for sort function) */
+int cmp_trans_x(const void *t1, const void* t2);
+/* compares a transform with respect to y (for sort function) */
+int cmp_trans_y(const void *t1, const void* t2);
+/* static int cmp_trans_alpha(const void *t1, const void* t2); */
+
+/* compares two double values (for sort function)*/
+int cmp_double(const void *t1, const void* t2);
+/* compares two int values (for sort function)*/
+int cmp_int(const void *t1, const void* t2);
+
+
+/** square of a number */
+double sqr(double x);
+
+/* calculates the median of an array of transforms,
+ * considering only x and y
+ */
+VSTransform median_xy_transform(const VSTransform* transforms, int len);
+/* median of a double array */
+double median(double* ds, int len);
+/* mean of a double array */
+double mean(const double* ds, int len);
+/* standard deviation of a double array */
+double stddev(const double* ds, int len, double mean);
+/* mean with cutted upper and lower pentile
+ * (min and max are optionally returned)
+ */
+double cleanmean(double* ds, int len, double* minimum, double* maximum);
+/* calulcates the cleaned mean of an array of transforms,
+ * considerung only x and y
+ */
+VSTransform cleanmean_xy_transform(const VSTransform* transforms, int len);
+
+/* calculates the cleaned (cutting of x-th percentil)
+ * maximum and minimum of an array of transforms,
+ * considerung only x and y
+ */
+void cleanmaxmin_xy_transform(const VSTransform* transforms, int len,
+                              int percentil,
+                              VSTransform* min, VSTransform* max);
+
+/* calculates the required zoom value to have no borders visible
+ */
+double transform_get_required_zoom(const VSTransform* transform, int width, int height);
+
+/* helper function to work with local motions */
+
+LocalMotion null_localmotion(void);
+/// a new array of the v.x values is returned (vs_free has to be called)
+int* localmotions_getx(const LocalMotions* localmotions);
+/// a new array of the v.y values is returned (vs_free has to be called)
+int* localmotions_gety(const LocalMotions* localmotions);
+/// lm1 - lm2 only for the Vec (the remaining values are taken from lm1)
+LocalMotion sub_localmotion(const LocalMotion* lm1, const LocalMotion* lm2);
+
+/* calulcates the cleaned mean of the vector of localmotions
+ * considerung only v.x and v.y
+ */
+LocalMotion cleanmean_localmotions(const LocalMotions* localmotions);
+
+VSArray localmotionsGetMatch(const LocalMotions* localmotions);
+
+/* helper functions */
+
+/* optimized round function */
+inline static int myround(float x) {
+    if(x>0)
+        return x + 0.5;
+    else
+        return x - 0.5;
+}
+
+
+/* optimized floor function
+   This does not give the correct value for negative integer values like -1.0. In this case
+   it will produce -2.0.
+*/
+inline static int myfloor(float x) {
+    if(x<0)
+        return x - 1;
+    else
+        return x;
+}
+
+#endif
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/src/vid.stab.prj b/src/vid.stab.prj
new file mode 100644
index 0000000..a1689e7
--- /dev/null
+++ b/src/vid.stab.prj
@@ -0,0 +1,13 @@
+# The vid.stab project
+workspace vidstab {
+	project stabilize {
+		ipath "../../../transcode"
+		ipath "../../../transcode/src"
+		file filter_stabilize.c transformtype.c motiondetect.c libdeshake.c dslist.c
+	}
+	project transform {
+		ipath "../../../transcode"
+		ipath "../../../transcode/src"
+		file filter_transform.c transformtype.c transform.c libdeshake.c dslist.c
+	}
+}
diff --git a/src/vidstabdefines.h b/src/vidstabdefines.h
new file mode 100644
index 0000000..a563ce7
--- /dev/null
+++ b/src/vidstabdefines.h
@@ -0,0 +1,95 @@
+/*
+ * vidstabdefines.h
+ *
+ *  Created on: Feb 23, 2011
+ *      Author: georg
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+*/
+
+#ifndef VIDSTABDEFINES_H_
+#define VIDSTABDEFINES_H_
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#ifdef __GNUC__
+#define likely(x)       __builtin_expect(!!(x), 1)
+#define unlikely(x)     __builtin_expect(!!(x), 0)
+#else
+#define likely(x)        (x)
+#define unlikely(x)      (x)
+#endif
+
+#define VS_MAX(a, b)    (((a) > (b)) ?(a) :(b))
+#define VS_MIN(a, b)    (((a) < (b)) ?(a) :(b))
+/* clamp x between a and b */
+#define VS_CLAMP(x, a, b)  VS_MIN(VS_MAX((a), (x)), (b))
+
+#define VS_DEBUG 2
+
+/// pixel in single layer image
+#define PIXEL(img, linesize, x, y, w, h, def) \
+  (((x) < 0 || (y) < 0 || (x) >= (w) || (y) >= (h)) ? (def) : img[(x) + (y) * (linesize)])
+/// pixel in single layer image without rangecheck
+#define PIX(img, linesize, x, y) (img[(x) + (y) * (linesize)])
+/// pixel in N-channel image. channel in {0..N-1}
+#define PIXELN(img, linesize, x, y, w, h, N, channel, def) \
+  (((x) < 0 || (y) < 0 || (x) >= (w) || (y) >= (h)) ? (def) : img[((x) + (y) * (linesize))*(N) + (channel)])
+/// pixel in N-channel image without rangecheck. channel in {0..N-1}
+#define PIXN(img, linesize, x, y, N, channel) (img[((x) + (y) * (linesize))*(N) + (channel)])
+
+/**** Configurable memory and logging functions. Defined in libvidstab.c ****/
+
+typedef void* (*vs_malloc_t) (size_t size);
+typedef void* (*vs_realloc_t) (void* ptr, size_t size);
+typedef void (*vs_free_t) (void* ptr);
+typedef void* (*vs_zalloc_t) (size_t size);
+
+typedef int (*vs_log_t) (int type, const char* tag, const char* format, ...);
+
+typedef char* (*vs_strdup_t) (const char* s);
+
+extern vs_log_t vs_log;
+
+extern vs_malloc_t vs_malloc;
+extern vs_realloc_t vs_realloc;
+extern vs_free_t vs_free;
+extern vs_zalloc_t vs_zalloc;
+
+extern vs_strdup_t vs_strdup;
+
+extern int VS_ERROR_TYPE;
+extern int VS_WARN_TYPE;
+extern int VS_INFO_TYPE;
+extern int VS_MSG_TYPE;
+
+extern int VS_ERROR;
+extern int VS_OK;
+
+#define vs_log_error(tag, format, args...) \
+    vs_log(VS_ERROR_TYPE, tag, format , ## args)
+#define vs_log_warn(tag, format, args...) \
+    vs_log(VS_WARN_TYPE, tag, format , ## args)
+#define vs_log_info(tag, format, args...) \
+    vs_log(VS_INFO_TYPE, tag, format , ## args)
+#define vs_log_msg(tag, format, args...) \
+    vs_log(VS_MSG_TYPE, tag, format , ## args)
+
+#endif /* VIDSTABDEFINES_H_ */
diff --git a/src/vsvector.c b/src/vsvector.c
new file mode 100644
index 0000000..8438d4d
--- /dev/null
+++ b/src/vsvector.c
@@ -0,0 +1,244 @@
+/*
+ * dcvector.c -- a dynamic array
+ * (C) 2011 - Georg Martius
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "vsvector.h"
+#include "vidstabdefines.h"
+#include <assert.h>
+#include <string.h>
+
+
+/*************************************************************************/
+int vs_vector_resize(VSVector *V, int newsize);
+
+/*************************************************************************/
+
+int vs_vector_init(VSVector *V, int buffersize){
+  assert(V);
+  if(buffersize>0){
+    V->data=(void**)vs_zalloc(sizeof(void*)*buffersize);
+    if(!V->data) return VS_ERROR;
+  }else{
+    V->data = 0;
+    buffersize = 0;
+  }
+  V->buffersize=buffersize;
+  V->nelems=0;
+  return VS_OK;
+}
+
+int vs_vector_fini(VSVector *V){
+  assert(V);
+  if(V->data) vs_free(V->data);
+  V->data = 0;
+  V->buffersize=0;
+  V->nelems=0;
+  return VS_OK;
+}
+
+int vs_vector_del(VSVector *V){
+  vs_vector_zero(V);
+  return vs_vector_fini(V);
+}
+
+int vs_vector_zero(VSVector *V){
+  assert(V);
+  assert(V->nelems < 1 || V->data);
+  int i;
+  for(i=0; i < V->nelems; i++){
+    if(V->data[i])
+      vs_free(V->data[i]);
+  }
+  V->nelems=0;
+  return VS_OK;
+
+}
+
+int vs_vector_size(const VSVector *V){
+  assert(V);
+  return V->nelems;
+}
+
+
+int vs_vector_append(VSVector *V, void *data){
+  assert(V && data);
+  if(!V->data || V->buffersize < 1) vs_vector_init(V,4);
+  if(V->nelems >= V->buffersize){
+    if(vs_vector_resize(V, V->buffersize*2)!=VS_OK) return VS_ERROR;
+  }
+  V->data[V->nelems]=data;
+  V->nelems++;
+  return VS_OK;
+}
+
+int vs_vector_append_dup(VSVector *V, void *data, int data_size){
+  assert(V && data);
+  if(!V->data || V->buffersize < 1) vs_vector_init(V,4);
+  void* d = vs_malloc(data_size);
+  if(!d) return VS_ERROR;
+  memcpy(d, data, data_size);
+  return vs_vector_append(V, d);
+}
+
+
+void *vs_vector_get(const VSVector *V, int pos){
+  assert(V && V->data);
+  if(pos<0 || pos >= V->nelems)
+    return 0;
+  else
+    return V->data[pos];
+}
+
+void* vs_vector_set(VSVector *V, int pos, void *data){
+  assert(V && data && pos>=0);
+  if(!V->data || V->buffersize < 1) vs_vector_init(V,4);
+  if(V->buffersize <= pos) {
+    int nsize = V->buffersize;
+    while(nsize <= pos) nsize *=2;
+    if(vs_vector_resize(V, nsize)!=VS_OK) return 0; // insuficient error handling here! VS_ERROR
+  }
+  if(pos >= V->nelems){ // insert after end of vector
+    int i;
+    for(i=V->nelems; i< pos+1; i++){
+      V->data[i]=0;
+    }
+    V->nelems=pos+1;
+  }
+  void* old = V->data[pos];
+  V->data[pos] = data;
+  return old;
+}
+
+void* vs_vector_set_dup(VSVector *V, int pos, void *data, int data_size){
+  void* d = vs_malloc(data_size);
+  if(!d) return 0; // insuficient error handling here! VS_ERROR
+  memcpy(d, data, data_size);
+  return vs_vector_set(V, pos, d);
+}
+
+
+int vs_vector_resize(VSVector *V, int newsize){
+  assert(V && V->data);
+  if(newsize<1) newsize=1;
+  V->data = (void**)vs_realloc(V->data, newsize * sizeof(void*));
+  V->buffersize=newsize;
+  if(V->nelems>V->buffersize){
+    V->nelems=V->buffersize;
+  }
+  if (!V->data){
+    vs_log_error("VS_Vector","out of memory!");
+    return VS_ERROR;
+  } else
+    return VS_OK;
+}
+
+VSVector vs_vector_filter(const VSVector *V, short (*pred)(void*, void*), void* param){
+  VSVector result;
+  assert(V);
+  vs_vector_init(&result, V->nelems);
+  for(int i=0; i< V->nelems; i++){
+    if(pred(param, V->data[i]))
+      vs_vector_append(&result, V->data[i]);
+  }
+  return result;
+}
+
+VSVector vs_vector_concat(const VSVector *V1, const VSVector *V2){
+  VSVector result;
+  assert(V1 && V2);
+  vs_vector_init(&result, V1->nelems + V2->nelems);
+  memcpy(result.data, V1->data, sizeof(void*)* V1->nelems);
+  memcpy(result.data+V1->nelems, V2->data, sizeof(void*)* V2->nelems);
+  result.nelems=V1->nelems+V2->nelems;
+  return result;
+}
+
+
+/* ARRAY */
+
+VSArray vs_array_new(int len){
+  VSArray a;
+  a.dat = (double*)vs_zalloc(sizeof(double)*len);
+  a.len = len;
+  return a;
+}
+
+VSArray vs_array(double vals[],int len){
+  VSArray a = vs_array_new(len);
+  memcpy(a.dat,vals, sizeof(double)*len);
+  return a;
+}
+
+VSArray* vs_array_plus(VSArray* c, VSArray a, VSArray b){
+  int i;
+  assert(a.len == b.len);
+  if(c->len == 0 ) *c = vs_array_new(a.len);
+  for(i=0; i< a.len; i++) c->dat[i]=a.dat[i]+b.dat[i];
+  return c;
+}
+
+VSArray* vs_array_scale(VSArray* c, VSArray a, double f){
+  if(c->len == 0 ) *c = vs_array_new(a.len);
+  for(int i=0; i< a.len; i++) c->dat[i]=a.dat[i]*f;
+  return c;
+}
+
+VSArray vs_array_copy(VSArray a){
+  VSArray c = vs_array_new(a.len);
+  memcpy(c.dat, a.dat, a.len*sizeof(double));
+  return c;
+}
+
+void vs_array_zero(VSArray* a){
+  memset(a->dat,0,sizeof(double)*a->len);
+}
+
+void vs_array_swap(VSArray* a, VSArray* b){
+  VSArray tmp;
+  tmp = *a;
+  *a = *b;
+  *b = tmp;
+}
+
+void vs_array_free(VSArray a){
+  vs_free(a.dat);
+  a.dat=0;
+  a.len=0;
+}
+
+void vs_array_print(VSArray a, FILE* f){
+  for(int i=0; i<a.len; i++){
+    fprintf(f, "%g ", a.dat[i]);
+  }
+}
+
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */
+
diff --git a/src/vsvector.h b/src/vsvector.h
new file mode 100644
index 0000000..9c4062b
--- /dev/null
+++ b/src/vsvector.h
@@ -0,0 +1,208 @@
+/*
+ * vsvector.h -- a dynamic array
+ * (C) 2011 - Georg Martius
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of vid.stab video stabilization library
+ *
+ *  vid.stab is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License,
+ *  as published by the Free Software Foundation; either version 2, or
+ *  (at your option) any later version.
+ *
+ *  vid.stab is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef VSVECTOR_H
+#define VSVECTOR_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+/**
+   A vector for arbitrary elements that resizes
+*/
+typedef struct vsvector_ VSVector;
+struct vsvector_ {
+  void**  data;
+  int    buffersize;
+  int    nelems;
+};
+
+/**
+ * vs_vector_init:
+ *     intializes a vector data structure.
+ *     A vector will grow but not shrink if elements are added.
+ *
+ * Parameters:
+ *              V: pointer to list to be initialized.
+ *     buffersize: size of buffer (if known, then # of resizes are reduced)
+ * Return Value:
+ *     VS_OK on success,
+ *     VS_ERROR on error.
+ */
+int vs_vector_init(VSVector *V, int buffersize);
+
+/**
+ * vs_vector_fini:
+ *     finalizes a vector data structure. Frees all resources aquired,
+ *     but *NOT* the data pointed by vector elements.
+ *
+ * Parameters:
+ *     V: pointer to list to be finalized
+ * Return Value:
+ *     VS_OK on success,
+ *     VS_ERROR on error.
+ */
+int vs_vector_fini(VSVector *V);
+
+/**
+ * vs_vector_del:
+ *     like vs_vector_fini, but also deletes the data pointed by vector elements.
+ *
+ * Parameters:
+ *     V: pointer to list to be finalized
+ * Return Value:
+ *     VS_OK on success,
+ *     VS_ERROR on error.
+ */
+int vs_vector_del(VSVector *V);
+
+/**
+ * vs_vector_zero:
+ *    deletes all data pointed to by the vector elements.
+ *    sets the number of elements to 0 but does not delete buffer
+*/
+int vs_vector_zero(VSVector *V);
+
+/**
+ * vs_vector_size:
+ *     gives the number of elements present in the vector
+ *     (not the internal buffer size).
+ *
+ * Parameters:
+ *     V: vector to be used.
+ * Return Value:
+ *    -1 on error,
+ *    the number of elements otherwise
+ */
+int vs_vector_size(const VSVector *V);
+
+
+/**
+ * vs_vector_append:
+ *     append an element to the vector.
+ *     The element is added to the end of the vector.
+ *
+ * Parameters:
+ *        V: pointer to vector to be used
+ *     data: pointer to data to be appended or prepend.
+ *           *PLEASE NOTE* that JUST THE POINTER is copied on the newly-added
+ *           element. NO deep copy is performed.
+ *           The caller has to allocate memory by itself if it want to
+ *           add a copy of the data.
+ * Return Value:
+ *     VS_OK on success,
+ *     VS_ERROR on error.
+ */
+int vs_vector_append(VSVector *V, void *data);
+
+/**
+ * vs_vector_append_dup:
+ *  like vs_vector_append but copies data
+ */
+int vs_vector_append_dup(VSVector *V, void *data, int data_size);
+
+
+/* vs_vector_set:
+ *      the newly inserted element BECOMES the position `pos' in the vector.
+ *      and the old item is returned
+ */
+void* vs_vector_set(VSVector *V, int pos, void *data);
+
+/* vs_vector_set_dup:
+ *      the newly inserted element is copied and BECOMES the position `pos' in the vector
+ *      and the old item is returned
+ */
+void* vs_vector_set_dup(VSVector *V, int pos, void *data, int data_size);
+
+/*
+ * vs_vector_get:
+ *     gives access to the data pointed by the element in the given position.
+ *
+ * Parameters:
+ *       V: vector to be accessed.
+ *     pos: position of the element on which the data will be returned.
+ * Return Value:
+ *     NULL on error (requested element doesn't exist)
+ *     a pointer to the data belonging to the requested vector item.
+ */
+void *vs_vector_get(const VSVector *V, int pos);
+
+/*
+ * vs_vector_filter:
+ *      returns a new vector with elements that fulfill predicate
+ *      pred(param, elem)
+ */
+VSVector vs_vector_filter(const VSVector *V, short (*pred)(void*, void*), void* param);
+
+/*
+ * vs_vector_concat:
+ *      returns a new vector with elements of vector V1 and V2 after another
+ */
+VSVector vs_vector_concat(const VSVector *V1, const VSVector *V2);
+
+
+/**
+   A simple fixed-size double vector
+*/
+typedef struct vsarray_ VSArray;
+struct vsarray_ {
+  double* dat;
+  int len;
+};
+
+/** creates an VSArray from a double array */
+VSArray vs_array(double vals[], int len);
+
+/** allocates a new (zero initialized) double array */
+VSArray vs_array_new(int len);
+
+/** adds two vectors ands stores results into c (if zero length then allocated) */
+VSArray* vs_array_plus(VSArray* c, VSArray a, VSArray b);
+
+/** scales a vector by a factor and stores results into c (if zero length then allocated) */
+VSArray* vs_array_scale(VSArray* c, VSArray a, double f);
+
+/** create a new deep copy of the vector */
+VSArray vs_array_copy(VSArray a);
+
+/** sets all elements of the vector to 0.0 */
+void vs_array_zero(VSArray* a);
+
+/** swaps the content of the two arrays */
+void vs_array_swap(VSArray* a, VSArray* b);
+
+/** free data */
+void vs_array_free(VSArray a);
+
+/** print array to file */
+void vs_array_print(VSArray a, FILE* f);
+
+#endif /* VSVECTOR_H */
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000..e2a1242
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,43 @@
+# to use intel compiler: cd cmake; CC=icc CXX=icpc cmake ../
+#  very fast: faster than orc code at imgcompare without any options.
+#  library needs libimf.so (link statically?)
+
+cmake_minimum_required (VERSION 2.6)
+project (vid.stab)
+
+SET(CMAKE_BUILTTYPE None)
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/../CMakeModules/")
+
+#add_definitions( -Wall -O3 -Wno-pointer-sign -DTESTING  -std=gnu99)
+add_definitions(  -Wall -O0 -g -Wno-pointer-sign -DTESTING -std=gnu99)
+find_package(Orc)
+if(ORC_FOUND)
+add_definitions( -DUSE_ORC ${ORC_DEFINITIONS})
+include_directories( ${ORC_INCLUDE_DIRS} )
+else()
+add_definitions( -DDISABLE_ORC)
+endif()
+
+add_definitions( -DUSE_SSE2 -msse2 -ffast-math -fno-show-column ) # -DUSE_SSE2_ASM
+
+if(USE_OMP)
+add_definitions(-fopenmp -DUSE_OMP)
+endif()
+
+# Make sure the compiler can find include files from transcode
+include_directories (../src)
+
+add_executable (tests tests.c testutils.c testframework.c ../src/vsvector.c
+  ../src/transform.c ../src/transformfloat.c ../src/transformfixedpoint.c
+  ../src/libvidstab.c ../src/transformtype.c ../src/frameinfo.c
+  ../src/serialize.c ../src/localmotion2transform.c
+  ../src/motiondetect.c ../src/motiondetect_opt.c ../src/orc/motiondetectorc.c ../src/boxblur.c)
+
+target_link_libraries(tests m)
+if(ORC_FOUND)
+target_link_libraries(tests ${ORC_LIBRARIES})
+endif()
+if(USE_OMP)
+target_link_libraries(tests gomp)
+endif()
+
diff --git a/tests/cmake/.gitignore b/tests/cmake/.gitignore
new file mode 100644
index 0000000..f59ec20
--- /dev/null
+++ b/tests/cmake/.gitignore
@@ -0,0 +1 @@
+*
\ No newline at end of file
diff --git a/tests/generate.c b/tests/generate.c
new file mode 100644
index 0000000..a235640
--- /dev/null
+++ b/tests/generate.c
@@ -0,0 +1,53 @@
+#define NUM_RECTANGLES 100
+void generateFrames(TestData* testdata, int num){
+  int i;
+  for(i=0; i<num; i++){
+    vsFrameAllocate(&testdata->frames[i],&testdata->fi);
+  }
+  // first frame noise
+  fillArrayWithNoise(testdata->frames[0].data[0],
+                     testdata->fi.width*testdata->fi.height, 10);
+  fillArrayWithNoise(testdata->frames[0].data[1],
+                     testdata->fi.width/2*testdata->fi.height/2, 5);
+  fillArrayWithNoise(testdata->frames[0].data[2],
+                     testdata->fi.width/2*testdata->fi.height/2, 5);
+
+  // add rectangles
+  int k;
+  for(k=0; k<NUM_RECTANGLES; k++){
+    paintRectangle(testdata->frames[0].data[0],&testdata->fi,
+                   randUpTo(testdata->fi.width), randUpTo(testdata->fi.height),
+                   randUpTo((testdata->fi.width>>4)+4),
+                   randUpTo((testdata->fi.height>>4)+4),randPixel());
+
+  }
+
+  VSTransformConfig conf = vsTransformGetDefaultConfig("test_generate");
+  conf.interpolType=VS_Zero;
+  VSTransformData td;
+  test_bool(vsTransformDataInit(&td, &conf, &testdata->fi, &testdata->fi) == VS_OK);
+
+  fprintf(stderr, "testframe transforms\n");
+
+  for(i=1; i<num; i++){
+    VSTransform t = getTestFrameTransform(i);
+    fprintf(stderr,"%i: ",i);
+    storeVSTransform(stderr,&t);
+
+    test_bool(vsTransformPrepare(&td,&testdata->frames[i-1],&testdata->frames[i])== VS_OK);
+    test_bool(transformPlanar_float(&td, t)== VS_OK);
+    test_bool(vsTransformFinish(&td)== VS_OK);
+  }
+  vsTransformDataCleanup(&td);
+}
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/tests/orc_bug/CMakeLists.txt b/tests/orc_bug/CMakeLists.txt
new file mode 100644
index 0000000..71e35f5
--- /dev/null
+++ b/tests/orc_bug/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required (VERSION 2.6) 
+project (vid.stab.test) 
+
+SET(CMAKE_BUILTTYPE None)
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/")
+
+find_package(Orc REQUIRED) 
+
+add_definitions(  -Wall -O0 -g  -DTESTING ${ORC_DEFINITIONS})
+include_directories (${ORC_INCLUDE_DIRS}) 
+
+add_executable (orc_bug orc_bug.c orc_bug_orc.c )
+
+target_link_libraries(orc_bug m ${ORC_LIBRARIES}) #link the math library
+ 
diff --git a/tests/orc_bug/FindOrc.cmake b/tests/orc_bug/FindOrc.cmake
new file mode 100644
index 0000000..117fd34
--- /dev/null
+++ b/tests/orc_bug/FindOrc.cmake
@@ -0,0 +1,29 @@
+# find ORC
+# - Try to find LibOrc-0.4
+# Once done this will define
+#  ORC_FOUND - System has LibOrc
+#  ORC_INCLUDE_DIRS - The LibOrc include directories
+#  ORC_LIBRARIES - The libraries needed to use LibOrc
+#  ORC_DEFINITIONS - Compiler switches required for using LibOrc
+
+find_package(PkgConfig) 
+pkg_check_modules(PC_ORC orc-0.4)
+set(ORC_DEFINITIONS ${PC_ORC_CFLAGS_OTHER})
+
+find_path(ORC_INCLUDE_DIR orc/orc.h
+          HINTS ${PC_ORC_INCLUDEDIR} ${PC_ORC_INCLUDE_DIRS}
+          PATH_SUFFIXES orc)
+
+find_library(ORC_LIBRARY NAMES orc-0.4
+             HINTS ${PC_ORC_LIBDIR} ${PC_ORC_LIBRARY_DIRS} )
+
+set(ORC_LIBRARIES ${ORC_LIBRARY} )
+set(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR} )
+include(FindPackageHandleStandardArgs)
+# handle the QUIETLY and REQUIRED arguments and set ORC_FOUND to TRUE
+# if all listed variables are TRUE
+find_package_handle_standard_args(LibOrc  DEFAULT_MSG
+                                  ORC_LIBRARY ORC_INCLUDE_DIR)
+
+mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY )
+# End find ORC
diff --git a/tests/orc_bug/orc_bug.c b/tests/orc_bug/orc_bug.c
new file mode 100644
index 0000000..d529fd2
--- /dev/null
+++ b/tests/orc_bug/orc_bug.c
@@ -0,0 +1,31 @@
+#include <stdint.h>
+#include <math.h>
+#include <stdlib.h>
+#include "orc_bug_orc.h"
+
+// cd ../ && orcc --implementation -o orc_bug_orc.c orc_bug_orc.orc && orcc --header -o orc_bug_orc.h orc_bug_orc.orc ; cd cmake
+
+int main(){
+  int x;
+  int N = 512;
+
+  // some random parameters
+  int32_t zcos_a = 12345;
+  int32_t zsin_a = 65432;
+  int32_t c_tx   = 250;
+  int32_t c_ty   = 6;
+  int32_t c_d_x  = 256;
+  int32_t y_d1 = 100;
+
+  int32_t* x_ss = (int32_t*)malloc(sizeof(int32_t)*N);
+  int32_t* y_ss = (int32_t*)malloc(sizeof(int32_t)*N);
+  int32_t* xs   = (int32_t*)malloc(sizeof(int32_t)*N);
+  for (x = 0; x < N; x++) {
+    xs[x]=x;
+  }
+
+  test_orc (x_ss, y_ss, xs,
+      y_d1, c_d_x, c_tx, c_ty, zcos_a, zsin_a, N);
+
+  return 0;
+}
diff --git a/tests/orc_bug/orc_bug_orc.c b/tests/orc_bug/orc_bug_orc.c
new file mode 100644
index 0000000..e77c7db
--- /dev/null
+++ b/tests/orc_bug/orc_bug_orc.c
@@ -0,0 +1,314 @@
+
+/* autogenerated from orc_bug_orc.orc */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef _ORC_INTEGER_TYPEDEFS_
+#define _ORC_INTEGER_TYPEDEFS_
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#include <stdint.h>
+typedef int8_t orc_int8;
+typedef int16_t orc_int16;
+typedef int32_t orc_int32;
+typedef int64_t orc_int64;
+typedef uint8_t orc_uint8;
+typedef uint16_t orc_uint16;
+typedef uint32_t orc_uint32;
+typedef uint64_t orc_uint64;
+#define ORC_UINT64_C(x) UINT64_C(x)
+#elif defined(_MSC_VER)
+typedef signed __int8 orc_int8;
+typedef signed __int16 orc_int16;
+typedef signed __int32 orc_int32;
+typedef signed __int64 orc_int64;
+typedef unsigned __int8 orc_uint8;
+typedef unsigned __int16 orc_uint16;
+typedef unsigned __int32 orc_uint32;
+typedef unsigned __int64 orc_uint64;
+#define ORC_UINT64_C(x) (x##Ui64)
+#define inline __inline
+#else
+#include <limits.h>
+typedef signed char orc_int8;
+typedef short orc_int16;
+typedef int orc_int32;
+typedef unsigned char orc_uint8;
+typedef unsigned short orc_uint16;
+typedef unsigned int orc_uint32;
+#if INT_MAX == LONG_MAX
+typedef long long orc_int64;
+typedef unsigned long long orc_uint64;
+#define ORC_UINT64_C(x) (x##ULL)
+#else
+typedef long orc_int64;
+typedef unsigned long orc_uint64;
+#define ORC_UINT64_C(x) (x##UL)
+#endif
+#endif
+typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
+typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
+typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
+#endif
+#ifndef ORC_RESTRICT
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define ORC_RESTRICT restrict
+#elif defined(__GNUC__) && __GNUC__ >= 4
+#define ORC_RESTRICT __restrict__
+#else
+#define ORC_RESTRICT
+#endif
+#endif
+
+#ifndef DISABLE_ORC
+#include <orc/orc.h>
+#endif
+void test_orc (orc_int32 * ORC_RESTRICT d1, orc_int32 * ORC_RESTRICT d2, const orc_int32 * ORC_RESTRICT s1, int p1, int p2, int p3, int p4, int p5, int p6, int n);
+
+
+/* begin Orc C target preamble */
+#define ORC_CLAMP(x,a,b) ((x)<(a) ? (a) : ((x)>(b) ? (b) : (x)))
+#define ORC_ABS(a) ((a)<0 ? -(a) : (a))
+#define ORC_MIN(a,b) ((a)<(b) ? (a) : (b))
+#define ORC_MAX(a,b) ((a)>(b) ? (a) : (b))
+#define ORC_SB_MAX 127
+#define ORC_SB_MIN (-1-ORC_SB_MAX)
+#define ORC_UB_MAX 255
+#define ORC_UB_MIN 0
+#define ORC_SW_MAX 32767
+#define ORC_SW_MIN (-1-ORC_SW_MAX)
+#define ORC_UW_MAX 65535
+#define ORC_UW_MIN 0
+#define ORC_SL_MAX 2147483647
+#define ORC_SL_MIN (-1-ORC_SL_MAX)
+#define ORC_UL_MAX 4294967295U
+#define ORC_UL_MIN 0
+#define ORC_CLAMP_SB(x) ORC_CLAMP(x,ORC_SB_MIN,ORC_SB_MAX)
+#define ORC_CLAMP_UB(x) ORC_CLAMP(x,ORC_UB_MIN,ORC_UB_MAX)
+#define ORC_CLAMP_SW(x) ORC_CLAMP(x,ORC_SW_MIN,ORC_SW_MAX)
+#define ORC_CLAMP_UW(x) ORC_CLAMP(x,ORC_UW_MIN,ORC_UW_MAX)
+#define ORC_CLAMP_SL(x) ORC_CLAMP(x,ORC_SL_MIN,ORC_SL_MAX)
+#define ORC_CLAMP_UL(x) ORC_CLAMP(x,ORC_UL_MIN,ORC_UL_MAX)
+#define ORC_SWAP_W(x) ((((x)&0xff)<<8) | (((x)&0xff00)>>8))
+#define ORC_SWAP_L(x) ((((x)&0xff)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | (((x)&0xff000000)>>24))
+#define ORC_SWAP_Q(x) ((((x)&ORC_UINT64_C(0xff))<<56) | (((x)&ORC_UINT64_C(0xff00))<<40) | (((x)&ORC_UINT64_C(0xff0000))<<24) | (((x)&ORC_UINT64_C(0xff000000))<<8) | (((x)&ORC_UINT64_C(0xff00000000))>>8) | (((x)&ORC_UINT64_C(0xff0000000000))>>24) | (((x)&ORC_UINT64_C(0xff000000000000))>>40) | (((x)&ORC_UINT64_C(0xff00000000000000))>>56))
+#define ORC_PTR_OFFSET(ptr,offset) ((void *)(((unsigned char *)(ptr)) + (offset)))
+#define ORC_DENORMAL(x) ((x) & ((((x)&0x7f800000) == 0) ? 0xff800000 : 0xffffffff))
+#define ORC_ISNAN(x) ((((x)&0x7f800000) == 0x7f800000) && (((x)&0x007fffff) != 0))
+#define ORC_DENORMAL_DOUBLE(x) ((x) & ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == 0) ? ORC_UINT64_C(0xfff0000000000000) : ORC_UINT64_C(0xffffffffffffffff)))
+#define ORC_ISNAN_DOUBLE(x) ((((x)&ORC_UINT64_C(0x7ff0000000000000)) == ORC_UINT64_C(0x7ff0000000000000)) && (((x)&ORC_UINT64_C(0x000fffffffffffff)) != 0))
+#ifndef ORC_RESTRICT
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define ORC_RESTRICT restrict
+#elif defined(__GNUC__) && __GNUC__ >= 4
+#define ORC_RESTRICT __restrict__
+#else
+#define ORC_RESTRICT
+#endif
+#endif
+/* end Orc C target preamble */
+
+
+
+/* test_orc */
+#ifdef DISABLE_ORC
+void
+test_orc (orc_int32 * ORC_RESTRICT d1, orc_int32 * ORC_RESTRICT d2, const orc_int32 * ORC_RESTRICT s1, int p1, int p2, int p3, int p4, int p5, int p6, int n){
+  int i;
+  orc_union32 * ORC_RESTRICT ptr0;
+  orc_union32 * ORC_RESTRICT ptr1;
+  const orc_union32 * ORC_RESTRICT ptr4;
+  orc_union32 var35;
+  orc_union32 var36;
+  orc_union32 var37;
+  orc_union32 var38;
+  orc_union32 var39;
+  orc_union32 var40;
+  orc_union32 var41;
+  orc_union32 var42;
+  orc_union32 var43;
+  orc_union32 var44;
+  orc_union32 var45;
+  orc_union32 var46;
+  orc_union32 var47;
+  orc_union32 var48;
+  orc_union32 var49;
+
+  ptr0 = (orc_union32 *)d1;
+  ptr1 = (orc_union32 *)d2;
+  ptr4 = (orc_union32 *)s1;
+
+    /* 1: loadpl */
+    var36.i = p2;
+    /* 3: loadpl */
+    var37.i = p5;
+    /* 5: loadpl */
+    var38.i = p6;
+    /* 6: loadpl */
+    var39.i = p1;
+    /* 9: loadpl */
+    var40.i = p3;
+    /* 12: loadpl */
+    var42.i = p6;
+    /* 14: loadpl */
+    var43.i = p4;
+
+  for (i = 0; i < n; i++) {
+    /* 0: loadl */
+    var35 = ptr4[i];
+    /* 2: subl */
+    var45.i = var35.i - var36.i;
+    /* 4: mulll */
+    var46.i = (var37.i * var45.i) & 0xffffffff;
+    /* 7: mulll */
+    var47.i = (var38.i * var39.i) & 0xffffffff;
+    /* 8: addl */
+    var48.i = var46.i + var47.i;
+    /* 10: addl */
+    var41.i = var48.i + var40.i;
+    /* 11: storel */
+    ptr0[i] = var41;
+    /* 13: mulll */
+    var49.i = (var45.i * var42.i) & 0xffffffff;
+    /* 15: addl */
+    var44.i = var49.i + var43.i;
+    /* 16: storel */
+    ptr1[i] = var44;
+  }
+
+}
+
+#else
+static void
+_backup_test_orc (OrcExecutor * ORC_RESTRICT ex)
+{
+  int i;
+  int n = ex->n;
+  orc_union32 * ORC_RESTRICT ptr0;
+  orc_union32 * ORC_RESTRICT ptr1;
+  const orc_union32 * ORC_RESTRICT ptr4;
+  orc_union32 var35;
+  orc_union32 var36;
+  orc_union32 var37;
+  orc_union32 var38;
+  orc_union32 var39;
+  orc_union32 var40;
+  orc_union32 var41;
+  orc_union32 var42;
+  orc_union32 var43;
+  orc_union32 var44;
+  orc_union32 var45;
+  orc_union32 var46;
+  orc_union32 var47;
+  orc_union32 var48;
+  orc_union32 var49;
+
+  ptr0 = (orc_union32 *)ex->arrays[0];
+  ptr1 = (orc_union32 *)ex->arrays[1];
+  ptr4 = (orc_union32 *)ex->arrays[4];
+
+    /* 1: loadpl */
+    var36.i = ex->params[25];
+    /* 3: loadpl */
+    var37.i = ex->params[28];
+    /* 5: loadpl */
+    var38.i = ex->params[29];
+    /* 6: loadpl */
+    var39.i = ex->params[24];
+    /* 9: loadpl */
+    var40.i = ex->params[26];
+    /* 12: loadpl */
+    var42.i = ex->params[29];
+    /* 14: loadpl */
+    var43.i = ex->params[27];
+
+  for (i = 0; i < n; i++) {
+    /* 0: loadl */
+    var35 = ptr4[i];
+    /* 2: subl */
+    var45.i = var35.i - var36.i;
+    /* 4: mulll */
+    var46.i = (var37.i * var45.i) & 0xffffffff;
+    /* 7: mulll */
+    var47.i = (var38.i * var39.i) & 0xffffffff;
+    /* 8: addl */
+    var48.i = var46.i + var47.i;
+    /* 10: addl */
+    var41.i = var48.i + var40.i;
+    /* 11: storel */
+    ptr0[i] = var41;
+    /* 13: mulll */
+    var49.i = (var45.i * var42.i) & 0xffffffff;
+    /* 15: addl */
+    var44.i = var49.i + var43.i;
+    /* 16: storel */
+    ptr1[i] = var44;
+  }
+
+}
+
+void
+test_orc (orc_int32 * ORC_RESTRICT d1, orc_int32 * ORC_RESTRICT d2, const orc_int32 * ORC_RESTRICT s1, int p1, int p2, int p3, int p4, int p5, int p6, int n)
+{
+  OrcExecutor _ex, *ex = &_ex;
+  static int p_inited = 0;
+  static OrcCode *c = 0;
+  void (*func) (OrcExecutor *);
+
+  if (!p_inited) {
+    orc_once_mutex_lock ();
+    if (!p_inited) {
+      OrcProgram *p;
+
+      p = orc_program_new ();
+      orc_program_set_name (p, "test_orc");
+      orc_program_set_backup_function (p, _backup_test_orc);
+      orc_program_add_destination (p, 4, "d1");
+      orc_program_add_destination (p, 4, "d2");
+      orc_program_add_source (p, 4, "s1");
+      orc_program_add_parameter (p, 4, "p1");
+      orc_program_add_parameter (p, 4, "p2");
+      orc_program_add_parameter (p, 4, "p3");
+      orc_program_add_parameter (p, 4, "p4");
+      orc_program_add_parameter (p, 4, "p5");
+      orc_program_add_parameter (p, 4, "p6");
+      orc_program_add_temporary (p, 4, "t1");
+      orc_program_add_temporary (p, 4, "t2");
+      orc_program_add_temporary (p, 4, "t3");
+
+      orc_program_append_2 (p, "subl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_P2, ORC_VAR_D1);
+      orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_P5, ORC_VAR_T1, ORC_VAR_D1);
+      orc_program_append_2 (p, "mulll", 0, ORC_VAR_T3, ORC_VAR_P6, ORC_VAR_P1, ORC_VAR_D1);
+      orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1);
+      orc_program_append_2 (p, "addl", 0, ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_P3, ORC_VAR_D1);
+      orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_P6, ORC_VAR_D1);
+      orc_program_append_2 (p, "addl", 0, ORC_VAR_D2, ORC_VAR_T2, ORC_VAR_P4, ORC_VAR_D1);
+
+      orc_program_compile (p);
+      c = orc_program_take_code (p);
+      orc_program_free (p);
+    }
+    p_inited = TRUE;
+    orc_once_mutex_unlock ();
+  }
+  ex->arrays[ORC_VAR_A2] = c;
+  ex->program = 0;
+
+  ex->n = n;
+  ex->arrays[ORC_VAR_D1] = d1;
+  ex->arrays[ORC_VAR_D2] = d2;
+  ex->arrays[ORC_VAR_S1] = (void *)s1;
+  ex->params[ORC_VAR_P1] = p1;
+  ex->params[ORC_VAR_P2] = p2;
+  ex->params[ORC_VAR_P3] = p3;
+  ex->params[ORC_VAR_P4] = p4;
+  ex->params[ORC_VAR_P5] = p5;
+  ex->params[ORC_VAR_P6] = p6;
+
+  func = c->exec;
+  func (ex);
+}
+#endif
+
+
diff --git a/tests/orc_bug/orc_bug_orc.h b/tests/orc_bug/orc_bug_orc.h
new file mode 100644
index 0000000..2d5fc25
--- /dev/null
+++ b/tests/orc_bug/orc_bug_orc.h
@@ -0,0 +1,76 @@
+
+/* autogenerated from orc_bug_orc.orc */
+
+#ifndef _ORC_BUG_ORC_H_
+#define _ORC_BUG_ORC_H_
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+
+#ifndef _ORC_INTEGER_TYPEDEFS_
+#define _ORC_INTEGER_TYPEDEFS_
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#include <stdint.h>
+typedef int8_t orc_int8;
+typedef int16_t orc_int16;
+typedef int32_t orc_int32;
+typedef int64_t orc_int64;
+typedef uint8_t orc_uint8;
+typedef uint16_t orc_uint16;
+typedef uint32_t orc_uint32;
+typedef uint64_t orc_uint64;
+#define ORC_UINT64_C(x) UINT64_C(x)
+#elif defined(_MSC_VER)
+typedef signed __int8 orc_int8;
+typedef signed __int16 orc_int16;
+typedef signed __int32 orc_int32;
+typedef signed __int64 orc_int64;
+typedef unsigned __int8 orc_uint8;
+typedef unsigned __int16 orc_uint16;
+typedef unsigned __int32 orc_uint32;
+typedef unsigned __int64 orc_uint64;
+#define ORC_UINT64_C(x) (x##Ui64)
+#define inline __inline
+#else
+#include <limits.h>
+typedef signed char orc_int8;
+typedef short orc_int16;
+typedef int orc_int32;
+typedef unsigned char orc_uint8;
+typedef unsigned short orc_uint16;
+typedef unsigned int orc_uint32;
+#if INT_MAX == LONG_MAX
+typedef long long orc_int64;
+typedef unsigned long long orc_uint64;
+#define ORC_UINT64_C(x) (x##ULL)
+#else
+typedef long orc_int64;
+typedef unsigned long orc_uint64;
+#define ORC_UINT64_C(x) (x##UL)
+#endif
+#endif
+typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
+typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
+typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
+#endif
+#ifndef ORC_RESTRICT
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+#define ORC_RESTRICT restrict
+#elif defined(__GNUC__) && __GNUC__ >= 4
+#define ORC_RESTRICT __restrict__
+#else
+#define ORC_RESTRICT
+#endif
+#endif
+void test_orc (orc_int32 * ORC_RESTRICT d1, orc_int32 * ORC_RESTRICT d2, const orc_int32 * ORC_RESTRICT s1, int p1, int p2, int p3, int p4, int p5, int p6, int n);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/tests/orc_bug/orc_bug_orc.orc b/tests/orc_bug/orc_bug_orc.orc
new file mode 100644
index 0000000..1cd7337
--- /dev/null
+++ b/tests/orc_bug/orc_bug_orc.orc
@@ -0,0 +1,20 @@
+.function test_orc
+.dest 4 x_ss int32_t    
+.dest 4 y_ss int32_t    
+.source 4 xs int32_t
+.param 4 y_d1 int32_t
+.param 4 c_d_x int32_t
+.param 4 c_tx int32_t   
+.param 4 c_ty int32_t   
+.param 4 zcos_a int32_t 
+.param 4 zsin_a int32_t 
+.temp 4 x_d1
+.temp 4 tmp1
+.temp 4 tmp2
+subl x_d1, xs, c_d_x
+mulll tmp1 zcos_a x_d1
+mulll tmp2 zsin_a y_d1
+addl tmp1 tmp1 tmp2
+addl x_ss tmp1 c_tx
+mulll tmp1, x_d1, zsin_a # with this line I get a segfault
+addl y_ss, tmp1, c_ty
diff --git a/tests/test_boxblur.c b/tests/test_boxblur.c
new file mode 100644
index 0000000..5054a32
--- /dev/null
+++ b/tests/test_boxblur.c
@@ -0,0 +1,35 @@
+// runs the boxblur routine and returns the time
+int runboxblur( VSFrame frame1, VSFrame dest,
+                VSFrameInfo fi, int numruns){
+  int start = timeOfDayinMS();
+  int i;
+  boxblurPlanar(&dest, &frame1, 0, &fi, 15, BoxBlurColor);
+  for(i=1; i<numruns; i++){
+    boxblurPlanar(&dest, &dest, 0, &fi, 15, BoxBlurColor);
+  }
+  int end = timeOfDayinMS();
+  return end-start;
+}
+
+
+void test_boxblur(const TestData* testdata){
+  int time; //, timeref;
+  int numruns=2;
+  VSFrame dest;
+  vsFrameAllocate(&dest,&testdata->fi);
+  //    omp_set_dynamic( 0 );
+  //    omp_set_num_threads( 1 );
+  fprintf(stderr,"********** boxblur speedtest:\n");
+  time = runboxblur(testdata->frames[4], dest, testdata->fi, numruns);
+  fprintf(stderr,"***C    time for %i runs: %i ms\n", numruns, time);
+  storePGMImage("boxblured.pgm", dest.data[0], testdata->fi);
+  storePGMImage("orig4.pgm", testdata->frames[4].data[0], testdata->fi);
+  // timeref=time;
+  /* omp_set_dynamic( 0 ); */
+  /* omp_set_num_threads( 2); */
+  /* time = runboxblur(testdata->frames[4], dest, testdata->fi, numruns); */
+  /* fprintf(stderr,"***C (2)time for %i runs: %i ms, Speedup %f\n", numruns, time, */
+  /*       (double)timeref/time); */
+  /* omp_set_dynamic( 1 ); */
+  vsFrameFree(&dest);
+}
diff --git a/tests/test_compareimg.c b/tests/test_compareimg.c
new file mode 100644
index 0000000..08e0379
--- /dev/null
+++ b/tests/test_compareimg.c
@@ -0,0 +1,103 @@
+#define NUMCMP 2000
+
+int checkCompareImg(VSMotionDetect* md, const VSFrame* frame){
+  int i;
+  int error;
+  uint8_t *Y_c;
+  Field field;
+  field.x=400;
+  field.y=400;
+  field.size=12;
+
+  Y_c = frame->data[0];
+  int linesize = frame->linesize[0];
+
+  for(i=-10;i<10; i+=2){
+    printf("\nCheck: shiftX = %i\n",i);
+    error = compareSubImg(Y_c, Y_c, &field,
+                          linesize, linesize, md->fi.height,
+                          1, i, 0, INT_MAX);
+    fprintf(stderr,"mismatch %i: %i\n", i, error);
+  }
+  return 1;
+}
+
+void test_checkCompareImg(const TestData* testdata){
+  VSMotionDetect md;
+  VSMotionDetectConfig conf = vsMotionDetectGetDefaultConfig("test_checkCompareImg");
+  conf.shakiness=6;
+  conf.accuracy=12;
+  test_bool(vsMotionDetectInit(&md, &conf, &testdata->fi) == VS_OK);
+  fflush(stdout);
+  test_bool(checkCompareImg(&md,&testdata->frames[0]));
+  vsMotionDetectionCleanup(&md);
+}
+
+
+typedef unsigned int (*cmpSubImgFunc)(unsigned char* const I1, unsigned char* const I2,
+                                      const Field* field,
+                                      int width1, int width2, int height, int bytesPerPixel,
+                                      int d_x, int d_y, unsigned int threshold);
+
+// runs the compareSubImg routine and returns the time and stores the difference.
+//  if diffsRef is given than the results are validated
+int runcompare( cmpSubImgFunc cmpsubfunc,
+                VSFrame frame1, VSFrame frame2, Field f,
+                VSFrameInfo fi, int* diffs, int* diffsRef, int numruns){
+  int start = timeOfDayinMS();
+  int i;
+  for(i=0; i<numruns; i++){
+    diffs[i]=cmpsubfunc(frame1.data[0], frame2.data[0],
+                        &f, frame1.linesize[0], frame2.linesize[0], fi.height,
+                        2, i%200, i/200, INT_MAX);
+  }
+  int end = timeOfDayinMS();
+  if(diffsRef)
+    for(i=0; i<numruns; i++){
+      if(diffs[i]!=diffsRef[i]){
+        fprintf(stderr, "ERROR! Ref difference %i, Opt difference %i\n",
+                diffsRef[i], diffs[i]);
+      }
+    }
+  return end-start;
+}
+
+
+
+void test_compareImg_performance(const TestData* testdata){
+  Field f;
+  f.size=128;
+  f.x = 400;
+  f.y = 300;
+  fprintf(stderr,"********** Compare speedtest:\n");
+
+  int numruns = NUMCMP;
+  int diffsC[numruns];
+  int diffsO[numruns];
+  int timeC, timeO;
+  timeC=runcompare(compareSubImg_thr, testdata->frames[0], testdata->frames[1],
+                   f, testdata->fi, diffsC, 0, numruns);
+  fprintf(stderr,"***C        time for %i runs: %i ms ****\n", numruns, timeC);
+#ifdef USE_ORC
+  timeO=runcompare(compareSubImg_orc, testdata->frames[0], testdata->frames[1],
+                   f, testdata->fi, diffsO, diffsC, numruns);
+  fprintf(stderr,"***orc      time for %i runs: %i ms \tSpeedup %3.2f\n",
+          numruns, timeO, (double)timeC/timeO);
+  timeO=runcompare(compareSubImg_thr_orc, testdata->frames[0], testdata->frames[1],
+                   f, testdata->fi, diffsO, diffsC, numruns);
+  fprintf(stderr,"***thr_orc  time for %i runs: %i ms \tSpeedup %3.2f\n",
+          numruns, timeO, (double)timeC/timeO);
+#endif
+#ifdef USE_SSE2
+  timeO=runcompare(compareSubImg_thr_sse2, testdata->frames[0], testdata->frames[1],
+                   f, testdata->fi, diffsO, diffsC, numruns);
+  fprintf(stderr,"***thr_sse2 time for %i runs: %i ms \tSpeedup %3.2f\n",
+          numruns, timeO, (double)timeC/timeO);
+#endif
+#ifdef USE_SSE2_ASM
+  timeO=runcompare(compareSubImg_thr_sse2_asm, testdata->frames[0], testdata->frames[1],
+                   f, testdata->fi, diffsO, diffsC, numruns);
+  fprintf(stderr,"***thr_asm  time for %i runs: %i ms \tSpeedup %3.2f\n",
+          numruns, timeO, (double)timeC/timeO);
+#endif
+}
diff --git a/tests/test_contrast.c b/tests/test_contrast.c
new file mode 100644
index 0000000..668e759
--- /dev/null
+++ b/tests/test_contrast.c
@@ -0,0 +1,76 @@
+#define NUMCNTR 2000
+
+void test_contrastImg(const TestData* testdata){
+  int i;
+  Field f;
+  // difference between michelson and absolute differences from mean
+  //  is large for 100x100 at 500,300
+  f.size=128;
+  f.x = 400;
+  f.y = 300;
+  fprintf(stderr,"********** Contrast:\n");
+  int numruns = NUMCNTR;
+  double contrastC[numruns];
+  double contrastOpt[numruns];
+  int timeC, timeOpt;
+#ifdef USE_ORC
+  fprintf(stderr,"********** Variance - based Contrast (with ORC):\n");
+  {
+    int start = timeOfDayinMS();
+    for(i=0; i<numruns; i++){
+      contrastC[i]=contrastSubImg_variance_C(testdata->frames[0],
+                                             &f, testdata->fi.width, testdata->fi.height);
+    }
+    int end = timeOfDayinMS();
+    timeC=end-start;
+    fprintf(stderr,"***C    time for %i runs: %i ms ****\n", numruns, timeC);
+  }
+  {
+    int start = timeOfDayinMS();
+    for(i=0; i<numruns; i++){
+      contrastOpt[i]=contrastSubImg_variance_orc(testdata->frames[0],
+                                                 &f, testdata->fi.width, testdata->fi.height);
+    }
+    int end = timeOfDayinMS();
+    timeOpt=end-start;
+    fprintf(stderr,"***Orc  time for %i runs: %i ms ****\n", numruns, timeOpt);
+  }
+  fprintf(stderr,"***Speedup %3.2f\n", timeC/timeOpt);
+  for(i=0; i<numruns; i++){
+    if(i==0){
+      printf("Orc contrast %3.2f, C contrast %3.2f\n",contrastOpt[i], contrastC[i]);
+    }
+    test_bool(contrastC[i]==contrastOpt[i]);
+  }
+#endif
+  fprintf(stderr,"********** Michelson Contrast (with SSE2):\n");
+  {
+    int start = timeOfDayinMS();
+    for(i=0; i<numruns; i++){
+      contrastC[i]=contrastSubImg(testdata->frames[0].data[0],
+                                  &f, testdata->fi.width, testdata->fi.height,1);
+    }
+    int end = timeOfDayinMS();
+    timeC=end-start;
+    fprintf(stderr,"***C    time for %i runs: %i ms ****\n", numruns, timeC);
+  }
+#ifdef USE_SSE2
+  {
+    int start = timeOfDayinMS();
+    for(i=0; i<numruns; i++){
+      contrastOpt[i]=contrastSubImg1_SSE(testdata->frames[0].data[0],
+                                         &f, testdata->fi.width, testdata->fi.height);
+    }
+    int end = timeOfDayinMS();
+    timeOpt=end-start;
+    fprintf(stderr,"***SSE2 time for %i runs: %i ms ****\n", numruns, timeOpt);
+  }
+  fprintf(stderr,"***Speedup %3.2f\n", (float)timeC/(float)timeOpt);
+  for(i=0; i<numruns; i++){
+    if(i==0){
+      printf("SSE2 contrast %3.2f, C contrast %3.2f\n",contrastOpt[i], contrastC[i]);
+    }
+    test_bool(contrastC[i]==contrastOpt[i]);
+  }
+#endif
+}
diff --git a/tests/test_gradientoptimizer.c b/tests/test_gradientoptimizer.c
new file mode 100644
index 0000000..4283105
--- /dev/null
+++ b/tests/test_gradientoptimizer.c
@@ -0,0 +1,33 @@
+
+
+double square_test (VSArray p, void* _dat){
+  double val=0;
+  for(int k=0; k<p.len;k++){
+    val+=(k+1)*3*(p.dat[k]-k)*(p.dat[k]-k);
+  }
+  return val;
+}
+
+void test_gradientoptimizer(){
+  int numruns=10;
+
+  fprintf(stderr,"********** Gradient Optimizer Test:\n");
+
+  for(int i=0; i<numruns; i++){
+    double residual;
+    VSArray params = vs_array_new(i+1);
+    VSArray stepsizes = vs_array_new(i+1);
+    for(int k=0; k<i+1; k++){
+      params.dat[k]= 20-k;
+      stepsizes.dat[k]= 0.1;
+    }
+
+    VSArray result = vsGradientDescent(square_test, params, NULL, 50, stepsizes, 1e-15, &residual);
+    fprintf(stderr,"** %iD: residual %lg :", i+1, residual);
+    vs_array_print(result, stderr);
+    fprintf(stderr,"***\n");
+    test_bool(residual<1e-10);
+    vs_array_free(result);
+    vs_array_free(params);
+  }
+}
diff --git a/tests/test_localmotion2transform.c b/tests/test_localmotion2transform.c
new file mode 100644
index 0000000..b5a7133
--- /dev/null
+++ b/tests/test_localmotion2transform.c
@@ -0,0 +1,45 @@
+void test_localmotion2transform(TestData* testdata){
+  VSMotionDetectConfig mdconf = vsMotionDetectGetDefaultConfig("test_localmotion2transform");
+  VSMotionDetect md;
+  test_bool(vsMotionDetectInit(&md, &mdconf, &testdata->fi) == VS_OK);
+
+  VSTransformConfig tdconf = vsTransformGetDefaultConfig("test_localmotion2transform-trans");
+  VSTransformData td;
+
+  test_bool(vsTransformDataInit(&td, &tdconf, &testdata->fi, &testdata->fi) == VS_OK);
+  fprintf(stderr,"MotionDetect:\n");
+  int numruns =5;
+  int i;
+  //int t;
+  //        for(t = 1; t <= 4; t++){
+  int start = timeOfDayinMS();
+  //      omp_set_dynamic( 0 );
+  //      omp_set_num_threads( t );
+
+  for(i=0; i<numruns; i++){
+    LocalMotions localmotions;
+    VSTransform t;
+    test_bool(vsMotionDetection(&md, &localmotions,&testdata->frames[i])== VS_OK);
+    /* for(k=0; k < vs_vector_size(&localmotions); k++){ */
+    /*   localmotion_print(LMGet(&localmotions,k),stderr); */
+    /* } */
+    t = vsMotionsToTransform(&td, &localmotions, 0);
+
+    vs_vector_del(&localmotions);
+    fprintf(stderr,"%i: ",i);
+    storeVSTransform(stderr,&t);
+    VSTransform orig = mult_transform_(getTestFrameTransform(i),-1.0);
+    VSTransform diff = sub_transforms(&t,&orig);
+    int tolerance = fabs(diff.x)<1 && fabs(diff.y)<1 && fabs(diff.alpha)<0.001;
+    if(!tolerance){
+      fprintf(stderr,"Difference: ");
+      storeVSTransform(stderr,&diff);
+    }
+    test_bool(tolerance);
+  }
+  int end = timeOfDayinMS();
+
+  fprintf(stderr,"\n*** elapsed time for %i runs: %i ms ****\n", numruns, end-start );
+
+  vsMotionDetectionCleanup(&md);
+}
diff --git a/tests/test_motiondetect.c b/tests/test_motiondetect.c
new file mode 100644
index 0000000..2364604
--- /dev/null
+++ b/tests/test_motiondetect.c
@@ -0,0 +1,45 @@
+void test_motionDetect(TestData* testdata){
+  VSMotionDetectConfig mdconf = vsMotionDetectGetDefaultConfig("test_motionDetect");
+  VSMotionDetect md;
+  test_bool(vsMotionDetectInit(&md, &mdconf, &testdata->fi) == VS_OK);
+
+  VSTransformConfig tdconf = vsTransformGetDefaultConfig("test_motionDetect-trans");
+  VSTransformData td;
+
+  test_bool(vsTransformDataInit(&td, &tdconf, &testdata->fi, &testdata->fi) == VS_OK);
+  fprintf(stderr,"MotionDetect:\n");
+  int numruns =5;
+  int i;
+  //int t;
+  //        for(t = 1; t <= 4; t++){
+  int start = timeOfDayinMS();
+  //      omp_set_dynamic( 0 );
+  //      omp_set_num_threads( t );
+
+  for(i=0; i<numruns; i++){
+    LocalMotions localmotions;
+    VSTransform t;
+    test_bool(vsMotionDetection(&md, &localmotions,&testdata->frames[i])== VS_OK);
+    /* for(k=0; k < vs_vector_size(&localmotions); k++){ */
+    /*   localmotion_print(LMGet(&localmotions,k),stderr); */
+    /* } */
+    t = vsSimpleMotionsToTransform(td.fiSrc, td.conf.modName, &localmotions);
+
+    vs_vector_del(&localmotions);
+    fprintf(stderr,"%i: ",i);
+    storeVSTransform(stderr,&t);
+    VSTransform orig = mult_transform_(getTestFrameTransform(i),-1.0);
+    VSTransform diff = sub_transforms(&t,&orig);
+    int success = fabs(diff.x)<2 && fabs(diff.y)<2 && fabs(diff.alpha)<0.005;
+    if(!success){
+      fprintf(stderr,"Difference: ");
+      storeVSTransform(stderr,&diff);
+    }
+    test_bool(success);
+  }
+  int end = timeOfDayinMS();
+
+  fprintf(stderr,"\n*** elapsed time for %i runs: %i ms ****\n", numruns, end-start );
+
+  vsMotionDetectionCleanup(&md);
+}
diff --git a/tests/test_omp.c b/tests/test_omp.c
new file mode 100644
index 0000000..666606e
--- /dev/null
+++ b/tests/test_omp.c
@@ -0,0 +1,39 @@
+#ifdef USE_OMP
+int openmptest(){
+  int start = timeOfDayinMS();
+  long int sum=0;
+  int i,j;
+
+#pragma omp parallel for shared(sum)
+  for (i=0; i<10;i++){
+    printf("num theads: %i\n",omp_get_thread_num());
+    long int k=0;
+    for (j=0; j<40000;j++){
+      k+=sqrt(j);
+    }
+#pragma omp atomic
+    sum+=k;
+  }
+  int end = timeOfDayinMS();
+  fprintf(stderr, "Sum: %li\n",sum);
+  return end-start;
+}
+int openmp(){
+  fprintf(stderr, "Processors: %i, Max # theads: %i\n", omp_get_num_procs(), omp_get_max_threads());
+
+  int time, timeref;
+  omp_set_dynamic( 0 );
+  omp_set_num_threads( 1 );
+  fprintf(stderr,"********** omp speedtest:\n");
+  time = openmptest();
+  fprintf(stderr,"***C    time: %i ms\n",  time);
+  timeref=time;
+  omp_set_dynamic( 0 );
+  omp_set_num_threads( 2 );
+  time = openmptest();
+  fprintf(stderr,"***C (2)time: %i ms, Speedup %f\n", time,
+          (double)timeref/time);
+  omp_set_dynamic( 1 );
+  return 1;
+}
+#endif
diff --git a/tests/test_store_restore.c b/tests/test_store_restore.c
new file mode 100644
index 0000000..961f80e
--- /dev/null
+++ b/tests/test_store_restore.c
@@ -0,0 +1,69 @@
+int compare_localmotions(const LocalMotions* lms1, const LocalMotions* lms2){
+  test_bool(vs_vector_size(lms1) == vs_vector_size(lms2));
+  int i;
+  for(i=0; i<vs_vector_size(lms1); i++){
+    test_bool(LMGet(lms1,i)->v.x == LMGet(lms2,i)->v.x);
+    test_bool(LMGet(lms1,i)->v.y == LMGet(lms2,i)->v.y);
+  }
+  return 1;
+}
+
+int test_store_restore(TestData* testdata){
+  VSMotionDetectConfig mdconf = vsMotionDetectGetDefaultConfig("test_motionDetect");
+  VSMotionDetect md;
+  test_bool(vsMotionDetectInit(&md, &mdconf, &testdata->fi) == VS_OK);
+
+  LocalMotions lms;
+  int i;
+  for(i=0; i<2; i++){
+    test_bool(vsMotionDetection(&md, &lms,&testdata->frames[i])== VS_OK);
+    if (i==0) vs_vector_del(&lms);
+  }
+
+  FILE* f = fopen("lmtest","w");
+  vsStoreLocalmotions(f,&lms);
+  fclose(f);
+  f = fopen("lmtest","r");
+  LocalMotions test = vsRestoreLocalmotions(f);
+  fclose(f);
+  vsStoreLocalmotions(stderr,&test);
+  compare_localmotions(&lms,&test);
+  fprintf(stderr,"\n** LM and LMS OKAY\n");
+
+  f = fopen("lmstest","w");
+  md.frameNum=1;
+  vsPrepareFile(&md,f);
+  vsWriteToFile(&md,f,&lms);
+  md.frameNum=2;
+  vsWriteToFile(&md,f,&test);
+  fclose(f);
+
+  f = fopen("lmstest","r");
+  test_bool(vsReadFileVersion(f)==1);
+  LocalMotions read1;
+  test_bool(vsReadFromFile(f,&read1)==1);
+  compare_localmotions(&lms,&read1);
+  LocalMotions read2;
+  test_bool(vsReadFromFile(f,&read2)==2);
+  compare_localmotions(&test,&read2);
+  fclose(f);
+  fprintf(stderr,"** Reading file stepwise OKAY\n");
+  vs_vector_del(&read1);
+  vs_vector_del(&read2);
+  vs_vector_del(&test);
+  vs_vector_del(&lms);
+
+  f = fopen("lmstest","r");
+  VSManyLocalMotions mlms;
+  test_bool(vsReadLocalMotionsFile(f,&mlms)==VS_OK);
+  test_bool(vs_vector_size(&mlms)==2);
+  fprintf(stderr,"** Entire file routine OKAY\n\n");
+
+  for(i=0; i< vs_vector_size(&mlms); i++){
+    if(VSMLMGet(&mlms,i))
+      vs_vector_del(VSMLMGet(&mlms,i));
+  }
+  vs_vector_del(&mlms);
+
+  return 1;
+}
diff --git a/tests/test_transform.c b/tests/test_transform.c
new file mode 100644
index 0000000..a3e60d5
--- /dev/null
+++ b/tests/test_transform.c
@@ -0,0 +1,159 @@
+
+void testImageStripeYUV(int size, VSFrameInfo* fi, VSFrame* img){
+  int i,j;
+  vsFrameInfoInit(fi, size, 4, PF_YUV420P);
+  vsFrameAllocate(img,fi);
+  memset(img->data[0],100,sizeof(uint8_t)*fi->width*fi->height);
+  for(j=0; j<fi->height; j++){
+    for(i=0; i<size; i++){
+      img->data[0][i+j*img->linesize[0]]= sin(((double)i)/size/(double)j)*128+128;
+    }
+  }
+  memset(img->data[1],100,sizeof(uint8_t)*(fi->width >> 1) *(fi->height>>1));
+  memset(img->data[2],100,sizeof(uint8_t)*(fi->width >> 1) *(fi->height>>1));
+  for(j=0; j<fi->height/2; j++){
+    for(i=0; i<size/2; i++){
+      img->data[1][i+j*img->linesize[1]]= sin(((double)i)/size/j*2.0)*128+128;
+      img->data[2][i+j*img->linesize[2]]= cos(((double)i)/size/j*4.0)*128+128;
+    }
+  }
+}
+
+
+void test_transform_implementation(const TestData* testdata){
+
+  VSFrameInfo fi;
+  VSFrame src;
+  testImageStripeYUV(128,&fi,&src);
+  VSFrame dest;
+  vsFrameAllocate(&dest,&fi);
+  VSFrame cfinal;
+  vsFrameAllocate(&cfinal,&fi);
+  VSTransformData td;
+  VSTransformConfig conf = vsTransformGetDefaultConfig("test_transform_implementation");
+
+  fprintf(stderr,"--- Validate Interpolations ----\n");
+
+  int it;
+  int i;
+  int sum;
+  VSTransform t;
+  t.x = 10;
+  t.alpha = 2*M_PI/(180.0);
+
+  for(it=VS_Zero; it<=VS_BiCubic; it++){
+    vsFrameCopy(&dest, &src, &fi);
+    conf.interpolType=it;
+    test_bool(vsTransformDataInit(&td, &conf, &fi, &fi) == VS_OK);
+
+    fprintf(stderr,"Transform: %s\n", getInterpolationTypeName(it));
+    test_bool(vsTransformPrepare(&td,&dest,&dest)== VS_OK);
+    test_bool(transformPlanar_float(&td, t)== VS_OK);
+
+    vsFrameCopy(&cfinal,&td.dest,&fi);
+    vsTransformDataCleanup(&td);
+
+    vsFrameCopy(&dest, &src, &fi);
+    test_bool(vsTransformDataInit(&td, &conf, &fi, &fi) == VS_OK);
+    test_bool(vsTransformPrepare(&td,&dest,&dest)== VS_OK);
+    test_bool(transformPlanar(&td, t)== VS_OK);
+
+    // validate
+    sum=0;
+    for(i=0; i<fi.width*fi.height; i++){
+      int diff = cfinal.data[0][i] - td.dest.data[0][i];
+      if(abs(diff)>2){
+        sum+=abs(diff);
+        printf("%i,%i: %i\n", i/fi.width, i%fi.width, diff);
+      }
+    }
+    vsTransformDataCleanup(&td);
+    printf("***Difference: %i\n", sum);
+    test_bool(sum==0);
+  }
+  vsFrameFree(&dest);
+  vsFrameFree(&cfinal);
+  vsFrameFree(&src);
+}
+
+void test_transform_performance(const TestData* testdata){
+
+
+  VSTransformConfig conf = vsTransformGetDefaultConfig("test_transform_performance");
+  fprintf(stderr,"--- Performance of Transforms ----\n");
+  VSFrame dest;
+  VSFrame cfinal;
+  int it;
+  int start, numruns;
+  int timeC, timeCFP; //, timeOrc;
+  vsFrameAllocate(&dest, &testdata->fi);
+  vsFrameAllocate(&cfinal, &testdata->fi);
+  numruns = 5;
+  for(it=VS_Zero; it<=VS_BiCubic; it++){
+    VSTransformData td;
+    int i;
+    //// Float implementation
+    conf.interpolType=it;
+    test_bool(vsTransformDataInit(&td, &conf, &testdata->fi, &testdata->fi) == VS_OK);
+
+    fprintf(stderr,"Transform: %s", getInterpolationTypeName(it));
+    start = timeOfDayinMS();
+    for(i=0; i<numruns; i++){
+      VSTransform t = null_transform();
+      t.x = i*10+10;
+      t.alpha = (i+1)*2*M_PI/(180.0);
+      t.zoom = 0;
+      vsFrameCopy(&dest, &testdata->frames[0], &testdata->fi);
+      test_bool(vsTransformPrepare(&td,&dest,&dest)== VS_OK);
+      test_bool(transformPlanar_float(&td, t)== VS_OK);
+    }
+    timeC = timeOfDayinMS() - start;
+    fprintf(stderr,"\n***C   elapsed time for %i runs: %i ms ****\n",
+            numruns, timeC );
+
+    if(it==VS_BiLinear){
+      storePGMImage("transformed.pgm", td.dest.data[0], testdata->fi);
+      storePGMImage("transformed_u.pgm", td.dest.data[1], testdata->fi_color);
+      fprintf(stderr,"stored transformed.pgm\n");
+    }
+    vsFrameCopy(&cfinal,&td.dest,&testdata->fi);
+    vsTransformDataCleanup(&td);
+
+    //// fixed point implementation
+    test_bool(vsTransformDataInit(&td, &conf, &testdata->fi, &testdata->fi) == VS_OK);
+    start = timeOfDayinMS();
+    for(i=0; i<numruns; i++){
+      VSTransform t = null_transform();
+      t.x = i*10+10;
+      t.alpha = (i+1)*2*M_PI/(180.0);
+      t.zoom = 0;
+      vsFrameCopy(&dest, &testdata->frames[0], &testdata->fi);
+      test_bool(vsTransformPrepare(&td,&dest,&dest)== VS_OK);
+      test_bool(transformPlanar(&td, t)== VS_OK);
+    }
+    timeCFP = timeOfDayinMS() - start;
+    fprintf(stderr,"***FP  elapsed time for %i runs: %i ms ****\n",
+            numruns, timeCFP );
+    if(it==VS_BiLinear){
+      storePGMImage("transformed_FP.pgm", td.dest.data[0], testdata->fi);
+      storePGMImage("transformed_u_FP.pgm", td.dest.data[1], testdata->fi_color);
+      fprintf(stderr,"stored transformed_FP.pgm\n");
+    }
+    fprintf(stderr,"***Speedup %3.2f\n", (double)timeC/timeCFP);
+    // validate
+    int sum=0;
+    for(i=0; i<testdata->fi.width*testdata->fi.height; i++){
+      int diff = cfinal.data[0][i] - td.dest.data[0][i];
+      if(abs(diff)>2){
+        sum+=abs(diff);
+        //printf("%i,%i: %i\n", i/fi.width, i%fi.width, diff);
+      }
+    }
+    printf("***Difference: %i\n", sum);
+    vsTransformDataCleanup(&td);
+    test_bool(sum==0);
+  }
+
+  vsFrameFree(&dest);
+  vsFrameFree(&cfinal);
+}
diff --git a/tests/testframework.c b/tests/testframework.c
new file mode 100644
index 0000000..a38851c
--- /dev/null
+++ b/tests/testframework.c
@@ -0,0 +1,69 @@
+#include <string.h>
+#include <sys/time.h>
+
+#include "testframework.h"
+
+int help_mode=0;
+
+void unittest_help_mode(){
+  help_mode=1;
+}
+
+// returns 0 of not found and otherwise the index of the next element (possible argument)
+int contains(char **list, int len,  const char *str, const char* descr) {
+  if(help_mode) {
+    printf("\t%s:\t%s\n",str, descr);
+    return 0;
+  }
+  int i;
+  for(i=0; i<len; i++) {
+    if(strcmp(list[i],str) == 0)
+      return i+1;
+  }
+  return 0;
+}
+
+int units_success;
+int units_failed;
+
+void unittest_init(){
+  units_success=0;
+  units_failed=0;
+}
+
+int unittest_summary(){
+  fprintf(stderr, "*********** SUMMARY **************\n");
+  fprintf(stderr, "UNIT TESTs succeeded:\t %s%i/%i\033[0m\n",
+          units_failed>0 ? "\033[1;31m" : "\033[1;32m",
+          units_success, units_success + units_failed);
+  return units_failed==0;
+
+}
+
+long timeOfDayinMS() {
+  struct timeval t;
+  gettimeofday(&t, 0);
+  return t.tv_sec*1000 + t.tv_usec/1000;
+}
+
+//// INTERNALS
+int tests_success;
+int tests_failed;
+
+void tests_init(){
+  tests_success=0;
+  tests_failed=0;
+}
+
+
+int test_summary(){
+  fprintf(stderr, "Tests checks succeeded: %i/%i",
+          tests_success, tests_success + tests_failed);
+  return tests_failed==0;
+}
+
+void test_fails (__const char *__assertion, __const char *__file,
+                 unsigned int __line, __const char *__function){
+  fprintf(stderr, "%s:%i: Test Failed: %s\n in Function %s", __file,__line,__assertion,__function);
+  tests_failed++;
+}
diff --git a/tests/testframework.h b/tests/testframework.h
new file mode 100644
index 0000000..6997f6a
--- /dev/null
+++ b/tests/testframework.h
@@ -0,0 +1,55 @@
+#ifndef __TESTFRAMEWORK_H
+#define __TESTFRAMEWORK_H
+
+#include <stdio.h>
+#include <features.h>
+
+int contains(char **list, int len,  const char *str, const char* descr);
+void unittest_init();
+int unittest_summary();
+void unittest_help_mode();
+
+long timeOfDayinMS();
+
+
+#define test_bool(expr)   \
+  ((expr)                 \
+   ? tests_success++     \
+   : test_fails (__STRING(expr), __FILE__, __LINE__, ___FUNCTION))
+
+#define UNIT(func)                                                               \
+  if(!help_mode){tests_init();                                              \
+   fprintf(stderr,"\033[1;34m*** UNIT TEST %s ***\033[0m\n",__STRING(func));     \
+   (func);                                                                       \
+   fprintf(stderr,"---->\t");                                                     \
+   if(test_summary()){ fprintf(stderr, "\t\t\033[1;32m PASSED\033[0m\n");         \
+     units_success++; }                                                           \
+   else { fprintf(stderr, "\t\t\033[1;31m FAILED\033[0m !!!!!\n");               \
+     units_failed++;  }                                                           \
+   }
+
+#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L
+# define ___FUNCTION  __func__
+#else
+# define ___FUNCTION  ((__const char *) 0)
+#endif
+
+
+// INTERNALS
+extern int units_success;
+extern int units_failed;
+extern int tests_success;
+extern int tests_failed;
+extern int help_mode;
+
+
+void tests_init();
+
+int test_summary();
+
+void test_fails (__const char *__assertion, __const char *__file,
+                 unsigned int __line, __const char *__function);
+
+
+
+#endif
diff --git a/tests/tests.c b/tests/tests.c
new file mode 100644
index 0000000..e6ba398
--- /dev/null
+++ b/tests/tests.c
@@ -0,0 +1,133 @@
+
+#include <string.h>
+#include <stdio.h>
+#include <limits.h>
+#include <math.h>
+#include <features.h>
+
+#ifdef USE_OMP
+#include <omp.h>
+#endif
+
+#include "libvidstab.h"
+// load optimized functions
+#include "motiondetect_internal.h"
+#include "motiondetect_opt.h"
+#include "boxblur.h"
+#include "transformfixedpoint.h"
+#include "transformfloat.h"
+#include "transformtype_operations.h"
+
+#ifndef TESTING
+#error TESTING must be defined
+#endif
+
+#include "testframework.h"
+#include "testutils.h"
+
+#include "generate.c"
+
+#include "test_transform.c"
+#include "test_compareimg.c"
+#include "test_motiondetect.c"
+#include "test_store_restore.c"
+#include "test_contrast.c"
+#include "test_boxblur.c"
+#include "test_omp.c"
+#include "test_gradientoptimizer.c"
+#include "test_localmotion2transform.c"
+
+#define FRAMENUM 5
+
+int main(int argc, char** argv){
+
+  if(contains(argv,argc,"-h", "help")!=0){
+    printf("Usage: %s [--store --load] [--all| --testX ...]\n", argv[0]);
+    unittest_help_mode();
+  }
+
+  unittest_init();
+
+  int all = contains(argv,argc,"--all", "Perform all tests")!=0;
+
+  TestData testdata;
+  vsFrameInfoInit(&testdata.fi,1280, 720, PF_YUV420P);
+  vsFrameInfoInit(&testdata.fi_color, 640, 360, PF_GRAY8);
+
+  if(contains(argv,argc,"--load",
+              "Load frames from files from frames/frame001.raw (def: generate)")!=0){
+    FILE* file;
+    char name[128];
+    int i;
+    for(i=0; i<FRAMENUM; i++){
+      vsFrameAllocate(&testdata.frames[i],&testdata.fi);
+      sprintf(name,"../frames/frame%03i.raw",i+4);
+      fprintf(stderr, "load file %s\n", name);
+      file = fopen(name,"rb");
+      test_bool(file!=0);
+      fprintf(stderr,"read %li bytes\n",
+              (unsigned long)fread(testdata.frames[i].data[0], 1,
+                                   testdata.fi.width*testdata.fi.height,file));
+      fclose(file);
+    }
+  }else{
+    UNIT(generateFrames(&testdata, FRAMENUM));
+  }
+  if(contains(argv,argc,"--store", "Store frames to files")!=0){
+    storePGMImage("test1.pgm", testdata.frames[0].data[0], testdata.fi);
+    storePGMImage("test2.pgm", testdata.frames[1].data[0], testdata.fi);
+    storePGMImage("test3.pgm", testdata.frames[2].data[0], testdata.fi);
+    storePGMImage("test4.pgm", testdata.frames[3].data[0], testdata.fi);
+    storePGMImage("test5.pgm", testdata.frames[4].data[0], testdata.fi);
+  }
+
+#ifdef USE_OMP
+  openmp();
+#endif
+
+  if(all || contains(argv,argc,"--testTI", "transform_implementation")){
+    UNIT(test_transform_implementation(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testTP", "transform_performance")){
+    UNIT(test_transform_performance(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testBB", "boxblur")){
+    UNIT(test_boxblur(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testCCI", "checkCompareImg")){
+    UNIT(test_checkCompareImg(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testCIP", "compareImg_performance")){
+    UNIT(test_compareImg_performance(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testMD", "motionDetect")){
+    UNIT(test_motionDetect(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testLM", "localmotion2transform")){
+    UNIT(test_localmotion2transform(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testSR", "store_restore")){
+    UNIT(test_store_restore(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testCT", "contrastImg")){
+    UNIT(test_contrastImg(&testdata));
+  }
+
+  if(all || contains(argv,argc,"--testGO", "gradient optimizer")){
+    UNIT(test_gradientoptimizer());
+  }
+
+  // free
+  for(int i=0; i<FRAMENUM; i++)
+    vsFrameFree(&testdata.frames[i]);
+
+  return unittest_summary();
+}
diff --git a/tests/testutils.c b/tests/testutils.c
new file mode 100644
index 0000000..2af4a7d
--- /dev/null
+++ b/tests/testutils.c
@@ -0,0 +1,152 @@
+#include <assert.h>
+
+#include "testutils.h"
+#include "libvidstab.h"
+#include "transformtype_operations.h"
+
+void paintRectangle(unsigned char* buffer, const VSFrameInfo* fi, int x, int y, int sizex, int sizey, unsigned char color){
+  if(x>=0 && x+sizex < fi->width && y>=0 && y+sizey < fi->height){
+    int i,j;
+    for(j=y; j < y+sizey; j++){
+      for(i=x; i<x+sizex; i++){
+  buffer[j*fi->width + i] = color;
+      }
+    }
+
+  }
+}
+
+/// corr: correlation length of noise
+void fillArrayWithNoise(unsigned char* buffer, int length, float corr){
+  unsigned char avg=randPixel();
+  int i=0;
+  if(corr<1) corr=1;
+  float alpha = 1.0/corr;
+  for(i=0; i < length; i++){
+    buffer[i] = avg;
+    avg = avg * (1.0-alpha) + randPixel()*alpha;
+  }
+}
+
+VSTransform getTestFrameTransform(int i){
+  VSTransform t = null_transform();
+  t.x = ( (i%2)==0 ? -1 : 1)  *i*5;
+  t.y = ( (i%3)==0 ?  1 : -1) *i*5;
+  t.alpha = (i<3 ? 0 : 1) * (i)*1*M_PI/(180.0);
+  t.zoom = 0;
+  return t;
+}
+
+static int readNumber (const char* filename, FILE *f)
+{
+  int c,n=0;
+  for(;;) {
+    c = fgetc(f);
+    if (c==EOF)
+      vs_log_error("TEST", "unexpected end of file in '%s'", filename);
+    if (c >= '0' && c <= '9') n = n*10 + (c - '0');
+    else {
+      ungetc (c,f);
+      return n;
+    }
+  }
+}
+
+
+static void skipWhiteSpace (const char* filename, FILE *f)
+{
+  int c,d;
+  for(;;) {
+    c = fgetc(f);
+    if (c==EOF)
+      vs_log_error("TEST", "unexpected end of file in '%s'", filename);
+
+    // skip comments
+    if (c == '#') {
+      do {
+  d = fgetc(f);
+  if (d==EOF)
+    vs_log_error("TEST", "unexpected end of file in '%s'", filename);
+      } while (d != '\n');
+      continue;
+    }
+
+    if (c > ' ') {
+      ungetc (c,f);
+      return;
+    }
+  }
+}
+
+int loadPGMImage(const char* filename, VSFrame* frame, VSFrameInfo* fi)
+{
+  FILE *f = fopen (filename,"rb");
+  if (!f) {
+    vs_log_error("TEST", "Can't open image file '%s'", filename);
+    return 0;
+  }
+
+  // read in header
+  if (fgetc(f) != 'P' || fgetc(f) != '2')
+    vs_log_error("TEST","image file ist not binary PGM (no P5 header) '%s'", filename);
+  skipWhiteSpace (filename,f);
+
+  // read in image parameters
+  fi->width = readNumber (filename,f);
+  skipWhiteSpace (filename,f);
+  fi->height = readNumber (filename,f);
+  skipWhiteSpace (filename,f);
+  int max_value = readNumber (filename,f);
+
+  // check values
+  if (fi->width < 1 || fi->height < 1)
+    vs_log_error("TEST", "bad image file '%s'", filename);
+  if (max_value != 255)
+    vs_log_error("TEST", "image file '%s' must have color range 255", filename);
+
+  // read either nothing, LF (10), or CR,LF (13,10)
+  int c = fgetc(f);
+  if (c == 10) {
+    // LF
+  }
+  else if (c == 13) {
+    // CR
+    c = fgetc(f);
+    if (c != 10) ungetc (c,f);
+  }
+  else ungetc (c,f);
+
+
+  // read in rest of data
+  vsFrameAllocate(frame,fi);
+  if (fread( frame->data[0], fi->width*fi->height, 1, f) != 1){
+    vs_log_error("TEST", "Can't read data from image file '%s'", filename);
+    return 0;
+  }
+  fclose (f);
+  return 1;
+}
+
+
+int storePGMImage(const char* filename, const uint8_t* data, VSFrameInfo fi ) {
+  FILE *f = fopen (filename,"wb");
+  if (!f) {
+    vs_log_error("TEST", "Can't open image file '%s'",  filename);
+    return 0;
+  }
+
+  // write header
+  fprintf(f,"P5\n");
+  fprintf(f,"# CREATOR test suite of vid.stab\n");
+  fprintf(f,"%i %i\n", fi.width, fi.height);
+  fprintf(f,"255\n");
+
+  // write data
+  if (fwrite( data, fi.width*fi.height, 1, f) != 1){
+    vs_log_error("TEST", "Can't write to image file '%s'", filename);
+    return 0;
+  }
+  fclose (f);
+  return 1;
+}
+
diff --git a/tests/testutils.h b/tests/testutils.h
new file mode 100644
index 0000000..c7429a1
--- /dev/null
+++ b/tests/testutils.h
@@ -0,0 +1,33 @@
+#ifndef __TESTUTILS_H
+#define __TESTUTILS_H
+
+#include "libvidstab.h"
+
+typedef struct _test_data {
+  VSFrameInfo fi;
+  VSFrameInfo fi_color;
+  VSFrame frames[5];
+} TestData;
+
+
+VSTransform getTestFrameTransform(int i);
+
+void fillArrayWithNoise(unsigned char* buffer, int length, float corr);
+
+void paintRectangle(unsigned char* buffer, const VSFrameInfo* fi, int x, int y,
+                    int sizex, int sizey, unsigned char color);
+
+inline static unsigned char randPixel(){
+  return rand()%256;
+}
+
+inline static short randUpTo(short max){
+  return rand()%max;
+}
+
+
+int loadPGMImage(const char* filename, VSFrame* frame, VSFrameInfo* fi);
+
+int storePGMImage(const char* filename, const uint8_t* data, VSFrameInfo fi );
+
+#endif
diff --git a/transcode/CMakeLists.txt b/transcode/CMakeLists.txt
new file mode 100644
index 0000000..1d1335f
--- /dev/null
+++ b/transcode/CMakeLists.txt
@@ -0,0 +1,64 @@
+cmake_minimum_required (VERSION 2.6)
+project (vid.stab.transcode)
+
+SET(CMAKE_BUILTTYPE None)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/../CMakeModules/")
+
+# set your transcode path here!
+set(TRANSCODE_ROOT ../../transcode)
+
+
+add_definitions( -Wall -O3 -Wno-pointer-sign -DTRANSCODE -std=gnu99)
+#add_definitions(  -Wall -O0 -g -Wno-pointer-sign )
+# I tried it with 0.4.14
+#  0.4.10 did not work (not all opcode implemented)
+# find_package(Orc)  // it actually not used by any active code
+if(ORC_FOUND)
+add_definitions( -DUSE_ORC ${ORC_DEFINITIONS} )
+include_directories( ${ORC_INCLUDE_DIRS} )
+else()
+add_definitions( -DDISABLE_ORC )
+endif()
+
+# here we should check for SSE2
+# our  -DUSE_SSE2_ASM code does not work with fpic
+add_definitions( -DUSE_SSE2 -msse2 -ffast-math )
+
+# Make sure the compiler can find include files from transcode
+include_directories (../src ${TRANSCODE_ROOT}/src ${TRANSCODE_ROOT}/ )
+
+# Create the stabilize and transform libraries
+add_library (filter_transform SHARED filter_transform.c
+  ../src/transformtype.c ../src/libvidstab.c ../src/transform.c
+  ../src/transformfixedpoint.c  ../src/vsvector.c ../src/serialize.c ../src/frameinfo.c
+	../src/localmotion2transform.c) #  orc/transformorc.c)
+add_library (filter_stabilize SHARED filter_stabilize.c
+  ../src/transformtype.c ../src/libvidstab.c ../src/motiondetect.c
+	../src/orc/motiondetectorc.c ../src/motiondetect_opt.c ../src/localmotion2transform.c
+  ../src/boxblur.c ../src/vsvector.c ../src/serialize.c  ../src/frameinfo.c)
+add_library (filter_deshake SHARED filter_deshake.c
+  ../src/transformtype.c ../src/libvidstab.c ../src/motiondetect.c
+  ../src/orc/motiondetectorc.c ../src/boxblur.c ../src/transform.c ../src/motiondetect_opt.c
+  ../src/transformfixedpoint.c ../src/vsvector.c ../src/serialize.c ../src/frameinfo.c
+	../src/localmotion2transform.c)
+
+
+set_target_properties(filter_stabilize PROPERTIES  PREFIX "" )
+set_target_properties(filter_transform PROPERTIES  PREFIX "" )
+set_target_properties(filter_deshake   PROPERTIES  PREFIX "" )
+
+target_link_libraries(filter_stabilize m )
+target_link_libraries(filter_transform m )
+target_link_libraries(filter_deshake   m )
+
+if(ORC_FOUND)
+target_link_libraries(filter_stabilize ${ORC_LIBRARIES} )
+target_link_libraries(filter_transform ${ORC_LIBRARIES} )
+target_link_libraries(filter_deshake   ${ORC_LIBRARIES} )
+endif()
+if(USE_OMP)
+target_link_libraries(filter_stabilize gomp )
+target_link_libraries(filter_transform gomp )
+target_link_libraries(filter_deshake   gomp )
+endif()
diff --git a/transcode/cmake/.gitignore b/transcode/cmake/.gitignore
new file mode 100644
index 0000000..4114d32
--- /dev/null
+++ b/transcode/cmake/.gitignore
@@ -0,0 +1,7 @@
+CMakeCache.txt
+cmake_install.cmake
+filter_deshake.so
+filter_stabilize.so
+filter_transform.so
+Makefile
+CMakeFiles/*
diff --git a/transcode/cmake/install.sh b/transcode/cmake/install.sh
new file mode 100755
index 0000000..1543542
--- /dev/null
+++ b/transcode/cmake/install.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+PATHS="/usr/lib/transcode /usr/local/lib/transcode $HOME/lib/transcode"
+for D in $PATHS; do
+    if [ -d "$D" ]; then
+        echo -n "Do you want to install the plugins to $D ? [y/N] ";
+        read choice
+        if [ "$choice" = "y" ]; then
+            echo  "Checking PATH variable: "
+            if [ ! -w "$D" ]; then SUDO=sudo; fi
+            ${SUDO} cp --backup=numbered ./filter_stabilize.so ./filter_transform.so ./filter_deshake.so "$D/"
+        fi
+    fi
+done
\ No newline at end of file
diff --git a/transcode/filter_deshake.c b/transcode/filter_deshake.c
new file mode 100644
index 0000000..b49f076
--- /dev/null
+++ b/transcode/filter_deshake.c
@@ -0,0 +1,465 @@
+/*
+ *  filter_deshake.c
+ *
+ *  Copyright (C) Georg Martius - November 2011
+ *   georg dot martius at web dot de
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the
+ *   Free Software Foundation, Inc.,
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ */
+
+/* Typical call:
+ *  transcode -V -J deshake=shakiness=5:smoothing=10
+ *         -i inp.mpeg -y xvid,tc_aud -o out.avi
+ *  all parameters are optional
+ */
+
+#include "libvidstab.h"
+
+#define MOD_NAME    "filter_deshake.so"
+#define MOD_VERSION LIBVIDSTAB_VERSION
+#define MOD_CAP     "deshakes a video clip by extracting relative transformations\n\
+    of subsequent frames and transforms the high-frequency away\n\
+    This is a single pass verion of stabilize and transform plugin"
+#define MOD_AUTHOR  "Georg Martius"
+
+
+#define MOD_FEATURES                                    \
+  TC_MODULE_FEATURE_FILTER|TC_MODULE_FEATURE_VIDEO
+#define MOD_FLAGS          \
+  TC_MODULE_FLAG_RECONFIGURABLE | TC_MODULE_FLAG_DELAY
+
+#define DEFAULT_TRANS_FILE_NAME     "transforms.dat"
+
+
+#include <math.h>
+#include <libgen.h>
+
+#include "transcode.h"
+#include "filter.h"
+#include "libtc/libtc.h"
+#include "libtc/optstr.h"
+#include "libtc/tccodecs.h"
+#include "libtc/tcmodule-plugin.h"
+
+#include "transcode_specifics.h"
+
+/* private date structure of this filter*/
+typedef struct _deshake_data {
+  VSMotionDetect md;
+  VSTransformData td;
+  VSSlidingAvgTrans avg;
+
+  double sharpen;     // amount of sharpening
+  vob_t* vob;  // pointer to information structure
+  char* result;
+  FILE* f;
+
+  char conf_str[TC_BUF_MIN];
+} DeshakeData;
+
+
+static const char deshake_help[] = ""
+  "Overview:\n"
+  "    Deshakes a video clip. It only uses past information, such that it is less\n"
+  "     powerful than the filters stabilize and transform. \n"
+  "     It also generates a file with relative transform information\n"
+  "     to be used by the transform filter separately."
+  "Options\n"
+  "    'smoothing' number of frames*2 + 1 used for lowpass filtering \n"
+  "                used for stabilizing (def: 10)\n"
+  "    'shakiness'   how shaky is the video and how quick is the camera?\n"
+  "                  1: little (fast) 10: very strong/quick (slow) (def: 4)\n"
+  "    'accuracy'    accuracy of detection process (>=shakiness)\n"
+  "                  1: low (fast) 15: high (slow) (def: 4)\n"
+  "    'stepsize'    stepsize of search process, region around minimum \n"
+  "                  is scanned with 1 pixel resolution (def: 6)\n"
+  "    'algo'        0: brute force (translation only);\n"
+  "                  1: small measurement fields (def)\n"
+  "    'mincontrast' below this contrast a field is discarded (0-1) (def: 0.3)\n"
+  "    'result'      path to the file used to write the transforms\n"
+  "                  (def:inputfile.stab)\n"
+  "    'maxshift'    maximal number of pixels to translate image\n"
+  "                  (def: -1 no limit)\n"
+  "    'maxangle'    maximal angle in rad to rotate image (def: -1 no limit)\n"
+  "    'crop'        0: keep border (def), 1: black background\n"
+  "    'zoom'        percentage to zoom >0: zoom in, <0 zoom out (def: 2)\n"
+  "    'optzoom'     0: nothing, 1: determine optimal zoom (def)\n"
+  "    'interpol'    type of interpolation: 0: no interpolation, \n"
+  "                  1: linear (horizontal), 2: bi-linear (def), \n"
+  "                  3: bi-cubic\n"
+  "    'sharpen'     amount of sharpening: 0: no sharpening (def: 0.8)\n"
+  "                  uses filter unsharp with 5x5 matrix\n"
+  "    'help'        print this help message\n";
+
+/*************************************************************************/
+
+/* Module interface routines and data. */
+
+/*************************************************************************/
+
+/**
+ * deshake_init:  Initialize this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+
+static int deshake_init(TCModuleInstance *self, uint32_t features)
+{
+  DeshakeData* sd = NULL;
+  TC_MODULE_SELF_CHECK(self, "init");
+  TC_MODULE_INIT_CHECK(self, MOD_FEATURES, features);
+
+  setLogFunctions();
+
+  sd = tc_zalloc(sizeof(DeshakeData)); // allocation with zero values
+  if (!sd) {
+    if (verbose > TC_INFO)
+      tc_log_error(MOD_NAME, "init: out of memory!");
+    return TC_ERROR;
+  }
+
+  sd->vob = tc_get_vob();
+  if (!sd->vob)
+    return TC_ERROR;
+
+  /**** Initialise private data structure */
+
+  self->userdata = sd;
+  if (verbose & TC_INFO){
+    tc_log_info(MOD_NAME, "%s %s", MOD_VERSION, MOD_CAP);
+  }
+
+  return TC_OK;
+}
+
+
+/*
+ * deshake_fini:  Clean up after this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+static int deshake_fini(TCModuleInstance *self)
+{
+  DeshakeData *sd = NULL;
+  TC_MODULE_SELF_CHECK(self, "fini");
+  sd = self->userdata;
+
+  tc_free(sd);
+  self->userdata = NULL;
+  return TC_OK;
+}
+
+/*
+ * deshake_configure:  Configure this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+static int deshake_configure(TCModuleInstance *self,
+           const char *options, vob_t *vob)
+{
+  DeshakeData *sd = NULL;
+  TC_MODULE_SELF_CHECK(self, "configure");
+  char* filenamecopy, *filebasename;
+
+  sd = self->userdata;
+
+  /*    sd->framesize = sd->vob->im_v_width * MAX_PLANES *
+  sizeof(char) * 2 * sd->vob->im_v_height * 2;     */
+
+  VSMotionDetect* md = &(sd->md);
+  VSTransformData* td = &(sd->td);
+
+  // init VSMotionDetect part
+  VSFrameInfo fi;
+  vsFrameInfoInit(&fi, sd->vob->ex_v_width, sd->vob->ex_v_height,
+                transcode2ourPF(sd->vob->im_v_codec));
+
+  VSMotionDetectConfig  mdconf = vsMotionDetectGetDefaultConfig(MOD_NAME);
+  VSTransformConfig tdconf     = vsTransformGetDefaultConfig(MOD_NAME);
+  tdconf.verbose=verbose;
+
+  sd->result = tc_malloc(TC_BUF_LINE);
+  filenamecopy = tc_strdup(sd->vob->video_in_file);
+  filebasename = basename(filenamecopy);
+  if (strlen(filebasename) < TC_BUF_LINE - 4) {
+    tc_snprintf(sd->result, TC_BUF_LINE, "%s.trf", filebasename);
+  } else {
+    tc_log_warn(MOD_NAME, "input name too long, using default `%s'",
+    DEFAULT_TRANS_FILE_NAME);
+    tc_snprintf(sd->result, TC_BUF_LINE, DEFAULT_TRANS_FILE_NAME);
+  }
+
+  // init trasform part
+  VSFrameInfo fi_dest;
+  vsFrameInfoInit(&fi_dest, sd->vob->ex_v_width, sd->vob->ex_v_height,
+                transcode2ourPF(sd->vob->im_v_codec));
+
+  if (options != NULL) {
+    // for some reason this plugin is called in the old fashion
+    //  (not with inspect). Anyway we support both ways of getting help.
+    if(optstr_lookup(options, "help")) {
+      tc_log_info(MOD_NAME,deshake_help);
+      return(TC_IMPORT_ERROR);
+    }
+
+    optstr_get(options, "result",     "%[^:]", sd->result);
+    optstr_get(options, "shakiness",  "%d", &mdconf.shakiness);
+    optstr_get(options, "accuracy",   "%d", &mdconf.accuracy);
+    optstr_get(options, "stepsize",   "%d", &mdconf.stepSize);
+    optstr_get(options, "algo",       "%d", &mdconf.algo);
+    optstr_get(options, "mincontrast","%lf",&mdconf.contrastThreshold);
+    mdconf.show = 0;
+
+    optstr_get(options, "maxshift",  "%d", &tdconf.maxShift);
+    optstr_get(options, "maxangle",  "%lf",&tdconf.maxAngle);
+    optstr_get(options, "smoothing", "%d", &tdconf.smoothing);
+    optstr_get(options, "crop"     , "%d", (int*)&tdconf.crop);
+    optstr_get(options, "zoom"     , "%lf",&tdconf.zoom);
+    optstr_get(options, "optzoom"  , "%d", &tdconf.optZoom);
+    optstr_get(options, "interpol" , "%d", (int*)(&tdconf.interpolType));
+    optstr_get(options, "sharpen"  , "%lf",&sd->sharpen);
+    tdconf.relative=1;
+    tdconf.invert=0;
+  }
+
+  if(vsMotionDetectInit(md, &mdconf, &fi) != VS_OK){
+    tc_log_error(MOD_NAME, "initialization of Motion Detection failed");
+    return TC_ERROR;
+  }
+  vsMotionDetectGetConfig(&mdconf,md);
+
+  if(vsTransformDataInit(td, &tdconf, &fi, &fi_dest) != VS_OK){
+    tc_log_error(MOD_NAME, "initialization of VSTransformData failed");
+    return TC_ERROR;
+  }
+  vsTransformGetConfig(&tdconf, td);
+
+  if (verbose) {
+    tc_log_info(MOD_NAME, "Video Deshake  Settings:");
+    tc_log_info(MOD_NAME, "    smoothing = %d", tdconf.smoothing);
+    tc_log_info(MOD_NAME, "    shakiness = %d", mdconf.shakiness);
+    tc_log_info(MOD_NAME, "     accuracy = %d", mdconf.accuracy);
+    tc_log_info(MOD_NAME, "     stepsize = %d", mdconf.stepSize);
+    tc_log_info(MOD_NAME, "         algo = %d", mdconf.algo);
+    tc_log_info(MOD_NAME, "  mincontrast = %f", mdconf.contrastThreshold);
+    tc_log_info(MOD_NAME, "         show = %d", mdconf.show);
+    tc_log_info(MOD_NAME, "       result = %s", sd->result);
+    tc_log_info(MOD_NAME, "    maxshift  = %d", tdconf.maxShift);
+    tc_log_info(MOD_NAME, "    maxangle  = %f", tdconf.maxAngle);
+    tc_log_info(MOD_NAME, "         crop = %s",
+                tdconf.crop ? "Black" : "Keep");
+    tc_log_info(MOD_NAME, "         zoom = %f", tdconf.zoom);
+    tc_log_info(MOD_NAME, "      optzoom = %s",
+                tdconf.optZoom ? "On" : "Off");
+    tc_log_info(MOD_NAME, "     interpol = %s",
+                getInterpolationTypeName(tdconf.interpolType));
+    tc_log_info(MOD_NAME, "      sharpen = %f", sd->sharpen);
+
+  }
+
+  sd->avg.initialized=0;
+
+  sd->f = fopen(sd->result, "w");
+  if (sd->f == NULL) {
+    tc_log_error(MOD_NAME, "cannot open result file %s!\n", sd->result);
+    return TC_ERROR;
+  }
+
+  return TC_OK;
+}
+
+
+/**
+ * deshake_filter_video: performs the analysis of subsequent frames
+ * See tcmodule-data.h for function details.
+ */
+
+static int deshake_filter_video(TCModuleInstance *self,
+                                vframe_list_t *frame)
+{
+  DeshakeData *sd = NULL;
+
+  TC_MODULE_SELF_CHECK(self, "filter_video");
+  TC_MODULE_SELF_CHECK(frame, "filter_video");
+
+  sd = self->userdata;
+  VSMotionDetect* md = &(sd->md);
+  VSTransformData* td = &(sd->td);
+  LocalMotions localmotions;
+  VSTransform motion;
+  VSFrame vsFrame;
+  vsFrameFillFromBuffer(&vsFrame,frame->video_buf, &md->fi);
+
+  if(vsMotionDetection(md, &localmotions, &vsFrame)!= VS_OK){
+      tc_log_error(MOD_NAME, "motion detection failed");
+      return TC_ERROR;
+  }
+
+  if(vsWriteToFile(md, sd->f, &localmotions) != VS_OK){
+      tc_log_error(MOD_NAME, "cannot write to file!");
+      return TC_ERROR;
+  }
+  motion = vsSimpleMotionsToTransform(td->fiSrc, td->conf.modName, &localmotions);
+  vs_vector_del(&localmotions);
+
+  vsTransformPrepare(td, &vsFrame, &vsFrame);
+
+  VSTransform t = vsLowPassTransforms(td, &sd->avg, &motion);
+  /* tc_log_info(MOD_NAME, "Trans: det: %f %f %f \n\t\t act: %f %f %f %f", */
+  /*             motion.x, motion.y, motion.alpha, */
+  /*             t.x, t.y, t.alpha, t.zoom); */
+
+  vsDoTransform(td, t);
+
+  vsTransformFinish(td);
+  return TC_OK;
+}
+
+/**
+ * deshake_stop:  Reset this instance of the module.  See tcmodule-data.h
+ * for function details.
+ */
+
+static int deshake_stop(TCModuleInstance *self)
+{
+  DeshakeData *sd = NULL;
+  TC_MODULE_SELF_CHECK(self, "stop");
+  sd = self->userdata;
+  // print transs
+  if (sd->f) {
+    fclose(sd->f);
+    sd->f = NULL;
+  }
+
+  vsMotionDetectionCleanup(&sd->md);
+  if (sd->result) {
+    tc_free(sd->result);
+    sd->result = NULL;
+  }
+
+  vsTransformDataCleanup(&sd->td);
+
+  return TC_OK;
+}
+
+/* checks for parameter in function _inspect */
+#define CHECKPARAM(paramname, formatstring, variable)   \
+  if (optstr_lookup(param, paramname)) {    \
+    tc_snprintf(sd->conf_str, sizeof(sd->conf_str),  \
+    formatstring, variable);    \
+    *value = sd->conf_str;        \
+  }
+
+/**
+ * deshake_inspect:  Return the value of an option in this instance of
+ * the module.  See tcmodule-data.h for function details.
+ */
+
+static int deshake_inspect(TCModuleInstance *self,
+         const char *param, const char **value)
+{
+  DeshakeData *sd = NULL;
+
+  TC_MODULE_SELF_CHECK(self, "inspect");
+  TC_MODULE_SELF_CHECK(param, "inspect");
+  TC_MODULE_SELF_CHECK(value, "inspect");
+  sd = self->userdata;
+
+  VSMotionDetectConfig mdconf;
+  vsMotionDetectGetConfig(&mdconf,&(sd->md));
+  VSTransformConfig tdconf;
+  vsTransformGetConfig(&tdconf,&sd->td);
+  if (optstr_lookup(param, "help")) {
+    *value = deshake_help;
+  }
+
+  CHECKPARAM("shakiness","shakiness=%d", mdconf.shakiness);
+  CHECKPARAM("accuracy", "accuracy=%d",  mdconf.accuracy);
+  CHECKPARAM("stepsize", "stepsize=%d",  mdconf.stepSize);
+  CHECKPARAM("algo",     "algo=%d",      mdconf.algo);
+  CHECKPARAM("result",   "result=%s",    sd->result);
+  CHECKPARAM("maxshift", "maxshift=%d",  tdconf.maxShift);
+  CHECKPARAM("maxangle", "maxangle=%f",  tdconf.maxAngle);
+  CHECKPARAM("smoothing","smoothing=%d", tdconf.smoothing);
+  CHECKPARAM("crop",     "crop=%d",      tdconf.crop);
+  CHECKPARAM("optzoom",  "optzoom=%i",   tdconf.optZoom);
+  CHECKPARAM("zoom",     "zoom=%f",      tdconf.zoom);
+  CHECKPARAM("sharpen",  "sharpen=%f",   sd->sharpen);
+
+  return TC_OK;
+}
+
+static const TCCodecID deshake_codecs_in[] = {
+  TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
+};
+static const TCCodecID deshake_codecs_out[] = {
+  TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
+};
+TC_MODULE_FILTER_FORMATS(deshake);
+
+TC_MODULE_INFO(deshake);
+
+static const TCModuleClass deshake_class = {
+  TC_MODULE_CLASS_HEAD(deshake),
+
+  .init         = deshake_init,
+  .fini         = deshake_fini,
+  .configure    = deshake_configure,
+  .stop         = deshake_stop,
+  .inspect      = deshake_inspect,
+
+  .filter_video = deshake_filter_video,
+};
+
+TC_MODULE_ENTRY_POINT(deshake)
+
+/*************************************************************************/
+
+static int deshake_get_config(TCModuleInstance *self, char *options)
+{
+  TC_MODULE_SELF_CHECK(self, "get_config");
+
+  optstr_filter_desc(options, MOD_NAME, MOD_CAP, MOD_VERSION,
+         MOD_AUTHOR, "VRY4", "1");
+
+  return TC_OK;
+}
+
+static int deshake_process(TCModuleInstance *self, frame_list_t *frame)
+{
+  TC_MODULE_SELF_CHECK(self, "process");
+
+  //    if (frame->tag & TC_PRE_S_PROCESS && frame->tag & TC_VIDEO) {
+  if (frame->tag & TC_POST_S_PROCESS && frame->tag & TC_VIDEO) {
+    return deshake_filter_video(self, (vframe_list_t *)frame);
+  }
+  return TC_OK;
+}
+
+/*************************************************************************/
+
+TC_FILTER_OLDINTERFACE(deshake)
+
+/*************************************************************************/
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/transcode/filter_stabilize.c b/transcode/filter_stabilize.c
new file mode 100644
index 0000000..264118d
--- /dev/null
+++ b/transcode/filter_stabilize.c
@@ -0,0 +1,376 @@
+/*
+ *  filter_stabilize.c
+ *
+ *  Copyright (C) Georg Martius - June 2007
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/* Typical call:
+ *  transcode -V -J stabilize=shakiness=5:show=1,preview
+ *         -i inp.mpeg -y null,null -o dummy
+ *  all parameters are optional
+ */
+
+#include "libvidstab.h"
+
+#define MOD_NAME    "filter_stabilize.so"
+#define MOD_VERSION LIBVIDSTAB_VERSION
+#define MOD_CAP     "extracts relative transformations of \n\
+    subsequent frames (used for stabilization together with the\n\
+    transform filter in a second pass)"
+#define MOD_AUTHOR  "Georg Martius"
+
+/* Ideas:
+   - Try OpenCL/Cuda, this should work great
+   - use smoothing on the frames and then use gradient decent!
+   - stepsize could be adapted (maybe to check only one field with large
+   stepsize and use the maximally required for the other fields
+*/
+
+#define MOD_FEATURES                                    \
+    TC_MODULE_FEATURE_FILTER|TC_MODULE_FEATURE_VIDEO
+#define MOD_FLAGS                                               \
+    TC_MODULE_FLAG_RECONFIGURABLE | TC_MODULE_FLAG_DELAY
+
+
+#define DEFAULT_TRANS_FILE_NAME     "transforms.dat"
+
+#include <math.h>
+#include <libgen.h>
+
+#include "transcode.h"
+#include "filter.h"
+#include "libtc/libtc.h"
+#include "libtc/optstr.h"
+#include "libtc/tccodecs.h"
+#include "libtc/tcmodule-plugin.h"
+
+#include "transcode_specifics.h"
+
+/* private date structure of this filter*/
+typedef struct _stab_data {
+    VSMotionDetect md;
+    vob_t* vob;  // pointer to information structure
+
+    char* result;
+    FILE* f;
+
+    char conf_str[TC_BUF_MIN];
+} StabData;
+
+/*************************************************************************/
+
+/* Module interface routines and data. */
+
+/*************************************************************************/
+
+/**
+ * stabilize_init:  Initialize this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+
+static int stabilize_init(TCModuleInstance *self, uint32_t features)
+{
+    StabData* sd = NULL;
+    TC_MODULE_SELF_CHECK(self, "init");
+    TC_MODULE_INIT_CHECK(self, MOD_FEATURES, features);
+
+    setLogFunctions();
+
+    sd = tc_zalloc(sizeof(StabData)); // allocation with zero values
+    if (!sd) {
+        if (verbose > TC_INFO)
+            tc_log_error(MOD_NAME, "init: out of memory!");
+        return TC_ERROR;
+    }
+
+    sd->vob = tc_get_vob();
+    if (!sd->vob)
+        return TC_ERROR;
+
+    /**** Initialise private data structure */
+
+    self->userdata = sd;
+    if (verbose & TC_INFO){
+        tc_log_info(MOD_NAME, "%s %s", MOD_VERSION, MOD_CAP);
+    }
+
+    return TC_OK;
+}
+
+
+/*
+ * stabilize_fini:  Clean up after this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+static int stabilize_fini(TCModuleInstance *self)
+{
+    StabData *sd = NULL;
+    TC_MODULE_SELF_CHECK(self, "fini");
+    sd = self->userdata;
+
+    tc_free(sd);
+    self->userdata = NULL;
+    return TC_OK;
+}
+
+/*
+ * stabilize_configure:  Configure this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+static int stabilize_configure(TCModuleInstance *self,
+                               const char *options, vob_t *vob)
+{
+    StabData *sd = NULL;
+    TC_MODULE_SELF_CHECK(self, "configure");
+    char* filenamecopy, *filebasename;
+
+    sd = self->userdata;
+
+    /*    sd->framesize = sd->vob->im_v_width * MAX_PLANES *
+          sizeof(char) * 2 * sd->vob->im_v_height * 2;     */
+
+    VSMotionDetect* md = &(sd->md);
+    VSFrameInfo fi;
+    vsFrameInfoInit(&fi, sd->vob->ex_v_width, sd->vob->ex_v_height,
+                  transcode2ourPF(vob->im_v_codec));
+
+    VSMotionDetectConfig conf = vsMotionDetectGetDefaultConfig(MOD_NAME);
+
+    sd->result = tc_malloc(TC_BUF_LINE);
+    filenamecopy = tc_strdup(sd->vob->video_in_file);
+    filebasename = basename(filenamecopy);
+    if (strlen(filebasename) < TC_BUF_LINE - 4) {
+        tc_snprintf(sd->result, TC_BUF_LINE, "%s.trf", filebasename);
+    } else {
+        tc_log_warn(MOD_NAME, "input name too long, using default `%s'",
+                    DEFAULT_TRANS_FILE_NAME);
+        tc_snprintf(sd->result, TC_BUF_LINE, DEFAULT_TRANS_FILE_NAME);
+    }
+
+    if (options != NULL) {
+        // for some reason this plugin is called in the old fashion
+        //  (not with inspect). Anyway we support both ways of getting help.
+        if(optstr_lookup(options, "help")) {
+            tc_log_info(MOD_NAME,vs_motiondetect_help);
+            return(TC_IMPORT_ERROR);
+        }
+
+        optstr_get(options, "result",     "%[^:]", sd->result);
+        optstr_get(options, "shakiness",  "%d", &conf.shakiness);
+        optstr_get(options, "accuracy",   "%d", &conf.accuracy);
+        optstr_get(options, "stepsize",   "%d", &conf.stepSize);
+        optstr_get(options, "algo",       "%d", &conf.algo);
+        optstr_get(options, "mincontrast","%lf",&conf.contrastThreshold);
+        optstr_get(options, "tripod",     "%d", &conf.virtualTripod);
+        optstr_get(options, "show",       "%d", &conf.show);
+    }
+
+    if(vsMotionDetectInit(md, &conf, &fi) != VS_OK){
+        tc_log_error(MOD_NAME, "initialization of Motion Detection failed");
+        return TC_ERROR;
+    }
+    vsMotionDetectGetConfig(&conf,md);
+
+    if (verbose) {
+        tc_log_info(MOD_NAME, "Image Stabilization Settings:");
+        tc_log_info(MOD_NAME, "     shakiness = %d", conf.shakiness);
+        tc_log_info(MOD_NAME, "      accuracy = %d", conf.accuracy);
+        tc_log_info(MOD_NAME, "      stepsize = %d", conf.stepSize);
+        tc_log_info(MOD_NAME, "          algo = %d", conf.algo);
+        tc_log_info(MOD_NAME, "   mincontrast = %f", conf.contrastThreshold);
+        tc_log_info(MOD_NAME, "        tripod = %d", conf.virtualTripod);
+        tc_log_info(MOD_NAME, "          show = %d", conf.show);
+        tc_log_info(MOD_NAME, "        result = %s", sd->result);
+    }
+
+    sd->f = fopen(sd->result, "w");
+    if (sd->f == NULL) {
+        tc_log_error(MOD_NAME, "cannot open result file %s!\n", sd->result);
+        return TC_ERROR;
+    }else{
+        if(vsPrepareFile(md, sd->f) != VS_OK){
+            tc_log_error(MOD_NAME, "cannot write to result file %s", sd->result);
+            return TC_ERROR;
+        }
+    }
+
+    return TC_OK;
+}
+
+
+/**
+ * stabilize_filter_video: performs the analysis of subsequent frames
+ * See tcmodule-data.h for function details.
+ */
+
+static int stabilize_filter_video(TCModuleInstance *self,
+                                  vframe_list_t *frame)
+{
+    StabData *sd = NULL;
+
+    TC_MODULE_SELF_CHECK(self, "filter_video");
+    TC_MODULE_SELF_CHECK(frame, "filter_video");
+
+    sd = self->userdata;
+    VSMotionDetect* md = &(sd->md);
+    LocalMotions localmotions;
+    VSFrame vsFrame;
+    vsFrameFillFromBuffer(&vsFrame,frame->video_buf, &md->fi);
+
+    if(vsMotionDetection(md, &localmotions, &vsFrame)!= VS_OK){
+      tc_log_error(MOD_NAME, "motion detection failed");
+      return TC_ERROR;
+    }
+    if(vsWriteToFile(md, sd->f, &localmotions) != VS_OK){
+        vs_vector_del(&localmotions);
+        return TC_ERROR;
+    } else {
+        vs_vector_del(&localmotions);
+        return TC_OK;
+    }
+}
+
+/**
+ * stabilize_stop:  Reset this instance of the module.  See tcmodule-data.h
+ * for function details.
+ */
+
+static int stabilize_stop(TCModuleInstance *self)
+{
+    StabData *sd = NULL;
+    TC_MODULE_SELF_CHECK(self, "stop");
+    sd = self->userdata;
+    VSMotionDetect* md = &(sd->md);
+    if (sd->f) {
+        fclose(sd->f);
+        sd->f = NULL;
+    }
+
+    vsMotionDetectionCleanup(md);
+    if (sd->result) {
+        tc_free(sd->result);
+        sd->result = NULL;
+    }
+    return TC_OK;
+}
+
+/* checks for parameter in function _inspect */
+#define CHECKPARAM(paramname, formatstring, variable)   \
+    if (optstr_lookup(param, paramname)) {              \
+        tc_snprintf(sd->conf_str, sizeof(sd->conf_str), \
+                    formatstring, variable);            \
+        *value = sd->conf_str;                          \
+    }
+
+/**
+ * stabilize_inspect:  Return the value of an option in this instance of
+ * the module.  See tcmodule-data.h for function details.
+ */
+
+static int stabilize_inspect(TCModuleInstance *self,
+           const char *param, const char **value)
+{
+    StabData *sd = NULL;
+
+    TC_MODULE_SELF_CHECK(self, "inspect");
+    TC_MODULE_SELF_CHECK(param, "inspect");
+    TC_MODULE_SELF_CHECK(value, "inspect");
+    sd = self->userdata;
+    VSMotionDetect* md = &(sd->md);
+    if (optstr_lookup(param, "help")) {
+        *value = vs_motiondetect_help;
+    }
+    VSMotionDetectConfig conf;
+    vsMotionDetectGetConfig(&conf,md);
+
+    CHECKPARAM("shakiness","shakiness=%d", conf.shakiness);
+    CHECKPARAM("accuracy", "accuracy=%d",  conf.accuracy);
+    CHECKPARAM("stepsize", "stepsize=%d",  conf.stepSize);
+    CHECKPARAM("algo",     "algo=%d",      conf.algo);
+    CHECKPARAM("tripod",   "tripod=%d",    conf.virtualTripod);
+    CHECKPARAM("show",     "show=%d",      conf.show);
+    CHECKPARAM("result",   "result=%s",    sd->result);
+    return TC_OK;
+}
+
+static const TCCodecID stabilize_codecs_in[] = {
+    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
+};
+static const TCCodecID stabilize_codecs_out[] = {
+    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
+};
+TC_MODULE_FILTER_FORMATS(stabilize);
+
+TC_MODULE_INFO(stabilize);
+
+static const TCModuleClass stabilize_class = {
+    TC_MODULE_CLASS_HEAD(stabilize),
+
+    .init         = stabilize_init,
+    .fini         = stabilize_fini,
+    .configure    = stabilize_configure,
+    .stop         = stabilize_stop,
+    .inspect      = stabilize_inspect,
+
+    .filter_video = stabilize_filter_video,
+};
+
+TC_MODULE_ENTRY_POINT(stabilize)
+
+/*************************************************************************/
+
+static int stabilize_get_config(TCModuleInstance *self, char *options)
+{
+    TC_MODULE_SELF_CHECK(self, "get_config");
+
+    optstr_filter_desc(options, MOD_NAME, MOD_CAP, MOD_VERSION,
+                       MOD_AUTHOR, "VRY4", "1");
+
+    return TC_OK;
+}
+
+static int stabilize_process(TCModuleInstance *self, frame_list_t *frame)
+{
+    TC_MODULE_SELF_CHECK(self, "process");
+
+//    if (frame->tag & TC_PRE_S_PROCESS && frame->tag & TC_VIDEO) {
+    if (frame->tag & TC_POST_S_PROCESS && frame->tag & TC_VIDEO) {
+        return stabilize_filter_video(self, (vframe_list_t *)frame);
+    }
+    return TC_OK;
+}
+
+/*************************************************************************/
+
+TC_FILTER_OLDINTERFACE(stabilize)
+
+/*************************************************************************/
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 2 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=2:
+ */
diff --git a/transcode/filter_transform.c b/transcode/filter_transform.c
new file mode 100644
index 0000000..b5f8311
--- /dev/null
+++ b/transcode/filter_transform.c
@@ -0,0 +1,389 @@
+/*
+ *  filter_transform.c
+ *
+ *  Copyright (C) Georg Martius - June 2007
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Typical call:
+ * transcode -J transform -i inp.mpeg -y xdiv,tcaud inp_stab.avi
+*/
+
+#include "libvidstab.h"
+
+#define MOD_NAME    "filter_transform.so"
+#define MOD_VERSION LIBVIDSTAB_VERSION
+#define MOD_CAP     "transforms each frame according to transformations\n\
+ given in an input file (e.g. translation, rotate) see also filter stabilize"
+#define MOD_AUTHOR  "Georg Martius"
+#define MOD_FEATURES \
+    TC_MODULE_FEATURE_FILTER|TC_MODULE_FEATURE_VIDEO
+#define MOD_FLAGS \
+    TC_MODULE_FLAG_RECONFIGURABLE
+
+#include "transcode.h"
+#include "filter.h"
+
+#include "libtc/libtc.h"
+#include "libtc/optstr.h"
+#include "libtc/tccodecs.h"
+#include "libtc/tcmodule-plugin.h"
+
+#include "transcode_specifics.h"
+
+#define DEFAULT_TRANS_FILE_NAME     "transforms.dat"
+
+typedef struct {
+    VSTransformData td;
+    vob_t* vob;          // pointer to information structure
+
+    VSTransformations trans; // transformations
+
+
+    double sharpen;     // amount of sharpening
+    char input[TC_BUF_LINE];
+    char conf_str[TC_BUF_MIN];
+} FilterData;
+
+/**
+ * transform_init:  Initialize this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+static int transform_init(TCModuleInstance *self, uint32_t features)
+{
+    FilterData* fd = NULL;
+    TC_MODULE_SELF_CHECK(self, "init");
+    TC_MODULE_INIT_CHECK(self, MOD_FEATURES, features);
+
+    setLogFunctions();
+
+    fd = tc_zalloc(sizeof(FilterData));
+    if (fd == NULL) {
+        tc_log_error(MOD_NAME, "init: out of memory!");
+        return TC_ERROR;
+    }
+    self->userdata = fd;
+    if (verbose) {
+        tc_log_info(MOD_NAME, "%s %s", MOD_VERSION, MOD_CAP);
+    }
+
+    return TC_OK;
+}
+
+
+/**
+ * transform_configure:  Configure this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+static int transform_configure(TCModuleInstance *self,
+             const char *options, vob_t *vob)
+{
+    FilterData *fd = NULL;
+    char* filenamecopy, *filebasename;
+    FILE* f;
+    TC_MODULE_SELF_CHECK(self, "configure");
+
+    fd = self->userdata;
+    VSTransformData* td = &(fd->td);
+
+    fd->vob = vob;
+    if (!fd->vob)
+        return TC_ERROR; /* cannot happen */
+
+    /**** Initialise private data structure */
+
+    VSFrameInfo fi_src;
+    VSFrameInfo fi_dest;
+    vsFrameInfoInit(&fi_src, fd->vob->ex_v_width, fd->vob->ex_v_height,
+                  transcode2ourPF(fd->vob->im_v_codec));
+    vsFrameInfoInit(&fi_dest, fd->vob->ex_v_width, fd->vob->ex_v_height,
+                  transcode2ourPF(fd->vob->im_v_codec));
+
+    VSTransformConfig conf = vsTransformGetDefaultConfig(MOD_NAME);
+    conf.verbose = verbose;
+    fd->sharpen  = 0.8;
+
+
+    vsTransformationsInit(&fd->trans);
+
+    filenamecopy = tc_strdup(fd->vob->video_in_file);
+    filebasename = basename(filenamecopy);
+    if (strlen(filebasename) < TC_BUF_LINE - 4) {
+        tc_snprintf(fd->input, TC_BUF_LINE, "%s.trf", filebasename);
+    } else {
+        tc_log_warn(MOD_NAME, "input name too long, using default `%s'",
+                    DEFAULT_TRANS_FILE_NAME);
+        tc_snprintf(fd->input, TC_BUF_LINE, DEFAULT_TRANS_FILE_NAME);
+    }
+
+
+
+    /* process remaining options */
+    if (options != NULL) {
+        // We support also the help option.
+        if(optstr_lookup(options, "help")) {
+            tc_log_info(MOD_NAME,vs_transform_help);
+            return(TC_IMPORT_ERROR);
+        }
+        optstr_get(options, "input",  "%[^:]", (char*)&fd->input);
+        optstr_get(options, "maxshift",  "%d", &conf.maxShift);
+        optstr_get(options, "maxangle", "%lf", &conf.maxAngle);
+        optstr_get(options, "smoothing", "%d", &conf.smoothing);
+        optstr_get(options, "invert"   , "%d", &conf.invert);
+        optstr_get(options, "relative" , "%d", &conf.relative);
+        optstr_get(options, "zoom"     ,"%lf", &conf.zoom);
+        optstr_get(options, "optzoom"  , "%d", &conf.optZoom);
+        optstr_get(options, "zoomspeed", "%lf",&conf.zoomSpeed);
+        optstr_get(options, "interpol" , "%d", (int*)(&conf.interpolType));
+        optstr_get(options, "sharpen"  ,"%lf", &fd->sharpen);
+        if(optstr_lookup(options, "tripod")){
+            tc_log_info(MOD_NAME,"Virtual tripod mode: relative=False, smoothing=0");
+            conf.relative=0;
+            conf.smoothing=0;
+        }
+    }
+
+    if(vsTransformDataInit(td, &conf, &fi_src, &fi_dest) != VS_OK){
+        tc_log_error(MOD_NAME, "initialization of VSTransformData failed");
+        return TC_ERROR;
+    }
+    vsTransformGetConfig(&conf,td);
+
+    if (verbose) {
+        tc_log_info(MOD_NAME, "Image Transformation/Stabilization Settings:");
+        tc_log_info(MOD_NAME, "    input     = %s", fd->input);
+        tc_log_info(MOD_NAME, "    smoothing = %d", conf.smoothing);
+        tc_log_info(MOD_NAME, "    maxshift  = %d", conf.maxShift);
+        tc_log_info(MOD_NAME, "    maxangle  = %f", conf.maxAngle);
+        tc_log_info(MOD_NAME, "    crop      = %s",
+                        conf.crop ? "Black" : "Keep");
+        tc_log_info(MOD_NAME, "    relative  = %s",
+                    conf.relative ? "True": "False");
+        tc_log_info(MOD_NAME, "    invert    = %s",
+                    conf.invert ? "True" : "False");
+        tc_log_info(MOD_NAME, "    zoom      = %f", conf.zoom);
+        tc_log_info(MOD_NAME, "    optzoom   = %d", conf.optZoom);
+        if(conf.optZoom==2){
+            tc_log_info(MOD_NAME, "    zoomspeed = %f", conf.zoomSpeed);
+        }
+        tc_log_info(MOD_NAME, "    interpol  = %s",
+                    getInterpolationTypeName(conf.interpolType));
+        tc_log_info(MOD_NAME, "    sharpen   = %f", fd->sharpen);
+    }
+
+    f = fopen(fd->input, "r");
+    if (f == NULL) {
+        tc_log_error(MOD_NAME, "cannot open input file %s!\n", fd->input);
+        /* return (-1); when called using tcmodinfo this will fail */
+    } else {
+        VSManyLocalMotions mlms;
+        if(vsReadLocalMotionsFile(f,&mlms)==VS_OK){
+            // calculate the actual transforms from the localmotions
+            if(vsLocalmotions2Transforms(td, &mlms,&fd->trans)!=VS_OK)
+                tc_log_error(MOD_NAME, "calculating transformations failed!\n");
+        }else{ // try to read old format
+            if (!vsReadOldTransforms(td, f, &fd->trans)) { /* read input file */
+                tc_log_error(MOD_NAME, "error parsing input file %s!\n", fd->input);
+            }
+        }
+    }
+    fclose(f);
+
+    if (vsPreprocessTransforms(td, &fd->trans)!= VS_OK ) {
+        tc_log_error(MOD_NAME, "error while preprocessing transforms!");
+        return TC_ERROR;
+    }
+
+    // sharpen is still in transcode...
+    /* Is this the right point to add the filter? Seems to be the case.*/
+    if(fd->sharpen>0){
+        /* load unsharp filter */
+        char unsharp_param[256];
+        sprintf(unsharp_param,"luma=%f:%s:chroma=%f:%s",
+                fd->sharpen, "luma_matrix=5x5",
+                fd->sharpen/2, "chroma_matrix=5x5");
+        if (!tc_filter_add("unsharp", unsharp_param)) {
+            tc_log_warn(MOD_NAME, "cannot load unsharp filter!");
+        }
+    }
+
+    return TC_OK;
+}
+
+
+/**
+ * transform_filter_video: performs the transformation of frames
+ * See tcmodule-data.h for function details.
+ */
+static int transform_filter_video(TCModuleInstance *self,
+                                  vframe_list_t *frame)
+{
+    FilterData *fd = NULL;
+
+    TC_MODULE_SELF_CHECK(self, "filter_video");
+    TC_MODULE_SELF_CHECK(frame, "filter_video");
+
+    fd = self->userdata;
+    VSFrame vsFrame;
+    vsFrameFillFromBuffer(&vsFrame,frame->video_buf, vsTransformGetSrcFrameInfo(&fd->td));
+
+    vsTransformPrepare(&fd->td, &vsFrame,  &vsFrame);
+
+    VSTransform t = vsGetNextTransform(&fd->td, &fd->trans);
+
+    vsDoTransform(&fd->td, t);
+
+    vsTransformFinish(&fd->td);
+    return TC_OK;
+}
+
+
+/**
+ * transform_fini:  Clean up after this instance of the module.  See
+ * tcmodule-data.h for function details.
+ */
+static int transform_fini(TCModuleInstance *self)
+{
+    FilterData *fd = NULL;
+    TC_MODULE_SELF_CHECK(self, "fini");
+    fd = self->userdata;
+    tc_free(fd);
+    self->userdata = NULL;
+    return TC_OK;
+}
+
+
+/**
+ * transform_stop:  Reset this instance of the module.  See tcmodule-data.h
+ * for function details.
+ */
+static int transform_stop(TCModuleInstance *self)
+{
+    FilterData *fd = NULL;
+    TC_MODULE_SELF_CHECK(self, "stop");
+    fd = self->userdata;
+    vsTransformDataCleanup(&fd->td);
+
+    vsTransformationsCleanup(&fd->trans);
+    return TC_OK;
+}
+
+/* checks for parameter in function _inspect */
+#define CHECKPARAM(paramname, formatstring, variable)       \
+    if (optstr_lookup(param, paramname)) {                \
+        tc_snprintf(fd->conf_str, sizeof(fd->conf_str),   \
+                    formatstring, variable);              \
+        *value = fd->conf_str;                            \
+    }
+
+/**
+ * stabilize_inspect:  Return the value of an option in this instance of
+ * the module.  See tcmodule-data.h for function details.
+ */
+static int transform_inspect(TCModuleInstance *self,
+                       const char *param, const char **value)
+{
+    FilterData *fd = NULL;
+    TC_MODULE_SELF_CHECK(self,  "inspect");
+    TC_MODULE_SELF_CHECK(param, "inspect");
+    TC_MODULE_SELF_CHECK(value, "inspect");
+
+    fd = self->userdata;
+
+    if (optstr_lookup(param, "help")) {
+        *value = vs_transform_help;
+    }
+    VSTransformConfig conf;
+    vsTransformGetConfig(&conf,&fd->td);
+    CHECKPARAM("maxshift", "maxshift=%d",  conf.maxShift);
+    CHECKPARAM("maxangle", "maxangle=%f",  conf.maxAngle);
+    CHECKPARAM("smoothing","smoothing=%d", conf.smoothing);
+    CHECKPARAM("crop",     "crop=%d",      conf.crop);
+    CHECKPARAM("relative", "relative=%d",  conf.relative);
+    CHECKPARAM("invert",   "invert=%i",    conf.invert);
+    CHECKPARAM("input",    "input=%s",     fd->input);
+    CHECKPARAM("optzoom",  "optzoom=%i",   conf.optZoom);
+    CHECKPARAM("zoom",     "zoom=%f",      conf.zoom);
+    CHECKPARAM("sharpen",  "sharpen=%f",   fd->sharpen);
+
+    return TC_OK;
+};
+
+
+static const TCCodecID transform_codecs_in[] = {
+    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
+};
+static const TCCodecID transform_codecs_out[] = {
+    TC_CODEC_YUV420P, TC_CODEC_YUV422P, TC_CODEC_RGB, TC_CODEC_ERROR
+};
+TC_MODULE_FILTER_FORMATS(transform);
+
+TC_MODULE_INFO(transform);
+
+static const TCModuleClass transform_class = {
+    TC_MODULE_CLASS_HEAD(transform),
+
+    .init         = transform_init,
+    .fini         = transform_fini,
+    .configure    = transform_configure,
+    .stop         = transform_stop,
+    .inspect      = transform_inspect,
+
+    .filter_video = transform_filter_video,
+};
+
+TC_MODULE_ENTRY_POINT(transform)
+
+/*************************************************************************/
+
+static int transform_get_config(TCModuleInstance *self, char *options)
+{
+    TC_MODULE_SELF_CHECK(self, "get_config");
+
+    optstr_filter_desc(options, MOD_NAME, MOD_CAP, MOD_VERSION,
+                       MOD_AUTHOR, "VRY4", "1");
+
+    return TC_OK;
+}
+
+static int transform_process(TCModuleInstance *self, frame_list_t *frame)
+{
+    TC_MODULE_SELF_CHECK(self, "process");
+
+    if (frame->tag & TC_PRE_S_PROCESS && frame->tag & TC_VIDEO) {
+        return transform_filter_video(self, (vframe_list_t *)frame);
+    }
+    return TC_OK;
+}
+
+/*************************************************************************/
+
+TC_FILTER_OLDINTERFACE(transform)
+
+/*************************************************************************/
+
+/*
+ * Local variables:
+ *   c-file-style: "stroustrup"
+ *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
+ *   indent-tabs-mode: nil
+ *   c-basic-offset: 4 t
+ * End:
+ *
+ * vim: expandtab shiftwidth=4:
+ */
diff --git a/transcode/transcode_specifics.h b/transcode/transcode_specifics.h
new file mode 100644
index 0000000..60f0ee3
--- /dev/null
+++ b/transcode/transcode_specifics.h
@@ -0,0 +1,63 @@
+/*
+ *  transcode_specifics.h
+ *
+ *  Copyright (C) Georg Martius - February 2013
+ *   georg dot martius at web dot de
+ *
+ *  This file is part of transcode, a video stream processing tool
+ *
+ *  transcode is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *
+ *  transcode is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+*/
+
+#ifndef __TRANSCODE_SPECIFICS_H
+#define __TRANSCODE_SPECIFICS_H
+
+#include "vidstabdefines.h"
+#include "frameinfo.h"
+#include <transcode.h>
+
+static VSPixelFormat transcode2ourPF(int tc_img_codec){
+  switch(tc_img_codec){
+  case CODEC_RGB:
+    return PF_RGB24;
+  case CODEC_YUV:
+    return PF_YUV420P;
+  case CODEC_YUV422:
+    return PF_YUV422P;
+  default:
+    tc_log_error(MOD_NAME, "cannot deal with img format %i!\n", tc_img_codec);
+    return PF_NONE;
+  }
+}
+
+void setLogFunctions(){
+  // we cannot map the memory functions because they are macros
+  //  with FILE and LINE expansion in transcode
+
+  VS_ERROR_TYPE = TC_LOG_ERR;
+  VS_WARN_TYPE  = TC_LOG_WARN;
+  VS_INFO_TYPE  = TC_LOG_INFO;
+  VS_MSG_TYPE   = TC_LOG_MSG;
+
+  // we need the case because tc_log has first argument TCLogLevel
+  //  which is an enum and not an int
+  vs_log   = (vs_log_t)tc_log;
+
+  VS_ERROR = TC_ERROR;
+  VS_OK    = TC_OK;
+}
+
+#endif