Projects
Essentials
kvazaar
Sign Up
Log In
Username
Password
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 13
View file
kvazaar.changes
Changed
@@ -1,4 +1,70 @@ ------------------------------------------------------------------- +Fri Nov 17 14:01:40 UTC 2017 - aloisio@gmx.com + +- Update to version 1.2.0 + Features: + * Intra prediction mode encryption with + --crypto=intra_pred_modes (2b8ce5e) + * Adaptive QP for 360° video with --erp-aqp (26adef4) + * New selection algorithm for --owf=auto and --threads=auto + (8c4a347) + * Added an option to set the encryption key using --key (2e13091) + * Added an option to limit SAO to band offset or edge offset + only with --sao=band and --sao=edge (8674c0f) + Optimization: + * Reduced number of intra modes checked when using --rd=2 + (2cad317) + * Reduced inter-frame CTU dependencies caused by SAO (050e90d) + * Changed to a faster calculation for coefficient costs when + using --rd=0 (1ead9c0) + Fixes: + * Fixed long motion vectors not getting clipped (#158, 85e2a40) + * Fixed order of pictures in reconstruction debug output when + --gop=8 is used (#101, aae141f) + * Fixed a use-after-free when encoding very few frames with + --gop=8 (#161, 2991962) + * Fixed a crash when video size is not a multiple of the + smallest CU size (2f2405d) + * Fixed invalid bitstream when QP is too large (382636d) + * Fixed a race condition causing a deadlock (5f8e17d) + * Fixed a memory leak in encryption (8654b48) + * Fixed I-frames not being IRAP frames when using GOP (00c9f52, + 841597e) + * Fixed computing inter and intra costs with different metrics + (afc13f1) + * Fixed reliance on undefined behavior (b41f0fa, 924cf85) + * Fixed --mv-constraint=frametilemargin constraining motion + vectors too much (409d211) + * Fixed using --bipred with --tmvp (#160, 9974380) + User Interface: + * Changed type of kvz_config.roi.dqps from uint8_t* to int8_t. + Delta QP values for --roi may now be negative. (79cb3a2) + * Changed PSNR display format (20d6444) + Building: + * Default to no -Werror. Run configure with --enable-werror to + enable it. (033bc6b) + * make check now runs valgrind tests that used to only run on + Travis. Programs ffmpeg, valgrind and TAppDecoderStatic should + be found from $PATH (6bbe5e1) + Refactoring: + * Removed duplicate code in inter MVP and merge candidate + selection (4fb0783) + * Removed duplicate code in intra reconstruction for luma and + chroma (e944416) + * Changed functions for writing the CU tree bitstream to use + luma pixel coordinates (610c91b, f5eef7f) + * Removed duplicate code in functions for writing intra CU + bitstream with and without encryption (525a518) + * Removed duplicate code in helper functions in search.c + (2c73476) + * Gathered function parameters for inter search functions into a + single struct (2fa3d82) + +- Refreshed kvazaar.memset.patch + +- Bumped library version to 4 + +------------------------------------------------------------------- Wed Feb 22 12:34:40 UTC 2017 - scarabeus@opensuse.org - Bit of spec cleanup
View file
kvazaar.spec
Changed
@@ -18,9 +18,9 @@ %define libname libkvazaar -%define libmver 3 +%define libmver 4 Name: kvazaar -Version: 1.1.0 +Version: 1.2.0 Release: 0 Summary: HEVC encoder License: LGPL-2.1
View file
kvazaar.memset.patch
Changed
@@ -1,10 +1,8 @@ -gcc7-7.1.1+r248152-1.2 -[ 112s] rdo.c: In function 'kvz_rdoq': -[ 112s] rdo.c:563:14: error: 'memset' used with length equal to number of elements without multiplication by element size [-Werror=memset-elt-size] -[ 112s] case 16: memset(sig_coeffgroup_flag, 0, 16 * sizeof(sig_coeffgroup_flag[0])); break; ---- a/src/rdo.c -+++ b/src/rdo.c -@@ -555,6 +555,7 @@ void kvz_rdoq(encoder_state_t * const st +Index: kvazaar-1.2.0/src/rdo.c +=================================================================== +--- kvazaar-1.2.0.orig/src/rdo.c ++++ kvazaar-1.2.0/src/rdo.c +@@ -593,6 +593,7 @@ void kvz_rdoq(encoder_state_t * const st uint32_t cg_num = width * height >> 4; @@ -12,8 +10,8 @@ // Explicitly tell the only possible numbers of elements to be zeroed. // Hope the compiler is able to utilize this information. switch (cg_num) { -@@ -564,6 +565,9 @@ void kvz_rdoq(encoder_state_t * const st - case 64: memset(sig_coeffgroup_flag, 0, 64 * sizeof(sig_coeffgroup_flag[0])); break; +@@ -602,6 +603,9 @@ void kvz_rdoq(encoder_state_t * const st + case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break; default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups"); } +#else
View file
kvazaar-1.1.0.tar.gz/.travis-install.sh
Deleted
@@ -1,12 +0,0 @@ -#!/bin/sh -set -ev - -if [ -n "$VALGRIND_TEST" ]; then - wget http://ultravideo.cs.tut.fi/ffmpeg-release-32bit-static.tar.xz - 7z x ffmpeg-release-32bit-static.tar.xz - 7z x ffmpeg-release-32bit-static.tar - chmod +x ./ffmpeg-2.6.3-32bit-static/ffmpeg - ./ffmpeg-2.6.3-32bit-static/ffmpeg -f lavfi -i "mandelbrot=size=${TEST_DIM}:end_pts=10" -vframes $TEST_FRAMES -pix_fmt yuv420p mandelbrot_${TEST_DIM}.yuv - wget http://ultravideo.cs.tut.fi/ubuntu-12.04-hmdec-16.10.tgz - tar -xzvf ubuntu-12.04-hmdec-16.10.tgz -fi
View file
kvazaar-1.1.0.tar.gz/.travis-script.sh
Deleted
@@ -1,21 +0,0 @@ -#!/bin/sh -set -ev - -./autogen.sh -./configure $KVZ_CONFIGURE_ARGS -make --jobs=2 V=1 - -if [ -n "$VALGRIND_TEST" ]; then - libtool execute valgrind --leak-check=full --error-exitcode=1 -- \ - src/kvazaar -i mandelbrot_${TEST_DIM}.yuv --input-res=${TEST_DIM} \ - -o test.265 $VALGRIND_TEST - ./hmdec-16.10 -b test.265 -elif [ -n "$EXPECTED_STATUS" ]; then - set +e - libtool execute src/kvazaar $PARAMS - EXIT_STATUS=$? - set -e - [ "$EXIT_STATUS" = "$EXPECTED_STATUS" ] -else - make check -fi
View file
kvazaar-1.1.0.tar.gz/.gitignore -> kvazaar-1.2.0.tar.gz/.gitignore
Changed
@@ -41,6 +41,7 @@ *.la *.lo *.o +*.trs *.log .kdev4
View file
kvazaar-1.2.0.tar.gz/.travis-install.bash
Added
@@ -0,0 +1,25 @@ +#!/bin/bash + +# Download FFmpeg and HM decoder and place them in $PATH. + +set -euvo pipefail + +mkdir -p "${HOME}/bin" + +wget http://ultravideo.cs.tut.fi/ffmpeg-release-32bit-static.tar.xz +sha256sum -c - << EOF +4d3302ba0415e08ca10ca578dcd1f0acc48fadc9b803718283c8c670350c903e ffmpeg-release-32bit-static.tar.xz +EOF +tar xf ffmpeg-release-32bit-static.tar.xz +cp ffmpeg-2.6.3-32bit-static/ffmpeg "${HOME}/bin/ffmpeg" +chmod +x "${HOME}/bin/ffmpeg" + +wget http://ultravideo.cs.tut.fi/ubuntu-12.04-hmdec-16.10.tgz +sha256sum -c - << EOF +e00d61dd031a14aab1a03c0b23df315b8f6ec3fab66a0e2ae2162496153ccf92 ubuntu-12.04-hmdec-16.10.tgz +EOF +tar xf ubuntu-12.04-hmdec-16.10.tgz +cp hmdec-16.10 "${HOME}/bin/TAppDecoderStatic" +chmod +x "${HOME}/bin/TAppDecoderStatic" + +export PATH="${HOME}/bin:${PATH}"
View file
kvazaar-1.1.0.tar.gz/.travis.yml -> kvazaar-1.2.0.tar.gz/.travis.yml
Changed
@@ -1,137 +1,43 @@ language: c -env: - global: - - TEST_DIM=264x130 - - TEST_FRAMES=10 - -# Use container based infrastructure +# Use container based infrastructure. sudo: false -# Use this the global requirements list for valgrind tests, because those are the most numerous. addons: apt: sources: - - ubuntu-toolchain-r-test + - ubuntu-toolchain-r-test packages: - - autoconf - - libtool - - p7zip-full # to uncompress our own ffmpeg binary - - valgrind - - yasm + - autoconf + - gcc-4.8 + - libtool + - valgrind + - yasm matrix: fast_finish: true - + include: - compiler: clang - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - libtool - - yasm - - compiler: gcc-4.8 - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - gcc-4.8 - - libtool - - yasm # We have some Mac specific code and Mac sometimes has odd build issues. - os: osx compiler: clang # gcc is actually clang on Travis OS X - - # Check for external symbols without kvz_ prefix. - - compiler: gcc-4.8 + install: true script: - ./autogen.sh - - ./configure && make - - (! nm -go --defined-only src/.libs/libkvazaar.a | grep -v ' kvz_') || (echo 'ERROR Only symbols prefixed with kvz_ should be exported from libkvazaar.'; false) - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - gcc-4.8 - - libtool - - yasm - - # Tests trying to use invalid input dimensions - - env: EXPECTED_STATUS=1 PARAMS="-i src/kvazaar --input-res=1x65 -o /dev/null" - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - autoconf - - libtool - - yasm + - ./configure --enable-werror + - make --jobs=2 V=1 - # These valgrind tests are slow, so they are performed with the minimum - # number of small frames and fast settings. - - # Tests for interlace - - env: VALGRIND_TEST="--source-scan-type=tff -p0 --preset=ultrafast --threads=2 --owf=1 --wpp" - - # Tests for owf, wpp and tiles. There is lots of separate branches of - # code related to owf=0 and owf!=0, which is why all permutations are - # tried. - - env: VALGRIND_TEST="-p4 -r1 --owf=1 --threads=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r1 --owf=0 --threads=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=1 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p4 -r2 --owf=0 --threads=2 --tiles-height-split=u2 --no-wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - # Tests for rdoq, sao, deblock and signhide and subme. - - env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3" - - env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-signhide --subme=0" - - env: VALGRIND_TEST="-p0 -r1 --threads=2 --wpp --owf=1 --rd=0 --no-deblock --no-sao --subme=0" - - # Tests for all-intra. - - env: VALGRIND_TEST="-p1 --threads=2 --owf=1 --rd=1 --no-rdoq --no-deblock --no-sao --no-signhide" - - env: VALGRIND_TEST="-p1 --threads=2 --owf=1 --rd=2 --no-rdoq --no-deblock --no-sao --no-signhide --no-transform-skip" - - # Tests for SMP and AMP blocks. - - env: TEST_FRAMES=4 VALGRIND_TEST="--threads=2 --owf=1 --wpp --smp" - - env: TEST_FRAMES=4 VALGRIND_TEST="--threads=2 --owf=1 --wpp --amp" - - env: TEST_FRAMES=4 VALGRIND_TEST="--threads=2 --owf=1 --wpp --smp --amp" - - # Tests for rate control - - env: VALGRIND_TEST="--bitrate=500000 -p0 -r1 --owf=1 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - # Tests for GOP, with and without OWF. - - env: TEST_FRAMES=20 VALGRIND_TEST="--gop=8 -p0 --threads=2 --wpp --owf=1 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: TEST_FRAMES=10 VALGRIND_TEST="--gop=8 -p0 --threads=2 --wpp --owf=4 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - env: TEST_FRAMES=20 VALGRIND_TEST="--gop=8 -p0 --threads=2 --wpp --owf=0 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3" - - # Tests for --mv-constraint - - env: VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --pu-depth-inter=0-3 --mv-constraint=frametilemargin" - - env: VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --subme=4 --mv-constraint=frametilemargin" - - # Tests for --slices - - env: TEST_DIM=512x256 VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --tiles=2x2 --slices=tiles" - - env: VALGRIND_TEST="--threads=2 --owf=1 --preset=ultrafast --slices=wpp" - - # Test weird shapes. - - env: TEST_DIM=16x16 VALGRIND_TEST="--threads=2 --owf=1 --preset=veryslow" - - env: TEST_DIM=256x16 VALGRIND_TEST="--threads=2 --owf=1 --preset=veryslow" - - env: TEST_DIM=16x256 VALGRIND_TEST="--threads=2 --owf=1 --preset=veryslow" - -install: - - source .travis-install.sh +install: bash .travis-install.bash script: - - source .travis-script.sh - + - ./autogen.sh + - ./configure --enable-werror + - make --jobs=2 V=1 + - make check VERBOSE=1 + after_script: - - set +e # Disable errors to work around Travis not knowing how to fix their stuff. + # Disable errors to work around Travis not knowing how to fix their stuff. + - set +e
View file
kvazaar-1.1.0.tar.gz/README.md -> kvazaar-1.2.0.tar.gz/README.md
Changed
@@ -100,6 +100,8 @@ delta QP values in raster order. The delta QP map can be any size or aspect ratio, and will be mapped to LCU's. + --(no-)erp-aqp : Use adaptive QP for 360 video with + equirectangular projection Compression tools: --deblock [<beta:tc>] : Deblocking @@ -226,26 +228,26 @@ placebo. The effects of the presets are listed in the following table, where the names have been abbreviated to fit the layout in GitHub. - | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p --------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- -rd | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 -pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-4 | 1-4 -pu-depth-inter | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 -me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz -ref | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 4 -deblock | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 -signhide | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 -subme | 0 | 0 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 -sao | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 -rdoq | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 -rdoq-skip | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 -transform-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 -smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 -cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off -me-early-termination | sens. | sens. | sens. | sens. | on | on | on | on | on | off +| | 0-uf | 1-sf | 2-vf | 3-fr | 4-f | 5-m | 6-s | 7-sr | 8-vs | 9-p | +| -------------------- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | ----- | +| rd | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| pu-depth-intra | 2-3 | 2-3 | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-4 | 1-4 | +| pu-depth-inter | 2-3 | 2-3 | 2-3 | 1-3 | 1-3 | 1-3 | 1-3 | 0-3 | 0-3 | 0-3 | +| me | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | hexbs | tz | +| ref | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 4 | +| deblock | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| signhide | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | +| subme | 0 | 0 | 2 | 2 | 4 | 4 | 4 | 4 | 4 | 4 | +| sao | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | +| rdoq | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | +| rdoq-skip | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | +| transform-skip | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| mv-rdo | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| full-intra-search | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | +| smp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| amp | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | +| cu-split-termination | zero | zero | zero | zero | zero | zero | zero | zero | zero | off | +| me-early-termination | sens. | sens. | sens. | sens. | on | on | on | on | on | off | ## Kvazaar library
View file
kvazaar-1.1.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj -> kvazaar-1.2.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj
Changed
@@ -97,6 +97,7 @@ </ProjectReference> </ItemGroup> <ItemGroup> + <ClCompile Include="..\..\tests\coeff_sum_tests.c" /> <ClCompile Include="..\..\tests\dct_tests.c" /> <ClCompile Include="..\..\tests\test_strategies.c" /> <ClCompile Include="..\..\tests\intra_sad_tests.c" />
View file
kvazaar-1.1.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj.filters -> kvazaar-1.2.0.tar.gz/build/kvazaar_tests/kvazaar_tests.vcxproj.filters
Changed
@@ -39,6 +39,9 @@ <ClCompile Include="..\..\tests\dct_tests.c"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="..\..\tests\coeff_sum_tests.c"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="..\..\tests\sad_tests.h">
View file
kvazaar-1.1.0.tar.gz/build/yasm/vsyasm.props -> kvazaar-1.2.0.tar.gz/build/yasm/vsyasm.props
Changed
@@ -9,16 +9,23 @@ <YASMDependsOn Condition="'$(ConfigurationType)' != 'Makefile'">_SelectedFiles;$(YASMDependsOn)</YASMDependsOn> </PropertyGroup> + <!-- Object format name for vsyasm must be in lower case. --> + <PropertyGroup Condition="'$(Platform)' == 'Win32'"> + <YASMFormat>win32</YASMFormat> + </PropertyGroup> + <PropertyGroup Condition="'$(Platform)' == 'x64'"> + <YASMFormat>win64</YASMFormat> + </PropertyGroup> <ItemDefinitionGroup> <YASM> <Debug>False</Debug> <ObjectFile>$(IntDir)</ObjectFile> <PreProc>0</PreProc> <Parser>0</Parser> - <CommandLineTemplate>vsyasm.exe -Xvc -f $(Platform) [AllOptions] [AdditionalOptions] [Inputs]</CommandLineTemplate> + <CommandLineTemplate>vsyasm.exe -Xvc -f $(YASMFormat) [AllOptions] [AdditionalOptions] [Inputs]</CommandLineTemplate> <Outputs>%(ObjectFile)</Outputs> <ExecutionDescription>Assembling %(Filename)%(Extension)</ExecutionDescription> <ShowOnlyRuleProperties>false</ShowOnlyRuleProperties> </YASM> </ItemDefinitionGroup> -</Project> \ No newline at end of file +</Project>
View file
kvazaar-1.1.0.tar.gz/configure.ac -> kvazaar-1.2.0.tar.gz/configure.ac
Changed
@@ -22,8 +22,8 @@ # - Increment when making new releases and major or minor was not changed since last release. # # Here is a somewhat sane guide to lib versioning: http://apr.apache.org/versioning.html -ver_major=3 -ver_minor=15 +ver_major=4 +ver_minor=0 ver_release=0 # Prevents configure from adding a lot of defines to the CFLAGS @@ -32,7 +32,7 @@ AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) -AM_INIT_AUTOMAKE([-Wall -Werror dist-bzip2 dist-xz foreign subdir-objects]) +AM_INIT_AUTOMAKE([-Wall dist-bzip2 dist-xz foreign subdir-objects]) AM_SILENT_RULES([yes]) AC_PROG_CC @@ -56,6 +56,10 @@ KVZ_CFLAGS="-Wall -Wtype-limits -Wvla -I$srcdir/src -I$srcdir/src/extras -ftree-vectorize -fvisibility=hidden" CFLAGS="$KVZ_CFLAGS $CFLAGS" +AC_SEARCH_LIBS([log], [m c], [], [exit 1]) +AC_SEARCH_LIBS([pow], [m c], [], [exit 1]) +AC_SEARCH_LIBS([sqrt], [m c], [], [exit 1]) + AC_ARG_WITH([cryptopp], AS_HELP_STRING([--with-cryptopp], [Build with cryptopp Enables selective encryption.])) @@ -76,21 +80,24 @@ CPPFLAGS="-DKVZ_DLL_EXPORTS $CPPFLAGS" -AC_SEARCH_LIBS([log], [m c], [], [exit 1]) -AC_SEARCH_LIBS([pow], [m c], [], [exit 1]) -AC_SEARCH_LIBS([sqrt], [m c], [], [exit 1]) - +# We need to force AX_PTHREAD to check -pthread -lpthread since otherwise +# it only outputs -pthread for GCC. Without -lpthread GCC does not link the +# shared library against the pthread library (even though it does link the +# executable). +PTHREAD_CFLAGS=-pthread +PTHREAD_LIBS=-lpthread # This does workarounds for pthreads on various compilers. -AX_PTHREAD +AX_PTHREAD([],[AC_MSG_ERROR([POSIX threads not found])]) + CFLAGS="$PTHREAD_CFLAGS $CFLAGS" LIBS="$PTHREAD_LIBS $LIBS" CC="$PTHREAD_CC" -# --disable-werror -AC_ARG_ENABLE([werror], [AS_HELP_STRING([--disable-werror], [don't treat warnings as errors [no]])], - [], [CFLAGS="-Werror $CFLAGS"] +# --enable-werror +AC_ARG_ENABLE([werror], [AS_HELP_STRING([--enable-werror], [treat warnings as errors [no]])], + [CFLAGS="-Werror $CFLAGS"], [] )
View file
kvazaar-1.1.0.tar.gz/doc/kvazaar.1 -> kvazaar-1.2.0.tar.gz/doc/kvazaar.1
Changed
@@ -1,4 +1,4 @@ -.TH KVAZAAR "1" "February 2017" "kvazaar v1.1.0" "User Commands" +.TH KVAZAAR "1" "November 2017" "kvazaar v1.2.0" "User Commands" .SH NAME kvazaar \- open source HEVC encoder .SH SYNOPSIS @@ -131,6 +131,10 @@ delta QP values in raster order. The delta QP map can be any size or aspect ratio, and will be mapped to LCU's. +.TP +\fB\-\-(no\-)erp\-aqp +Use adaptive QP for 360 video with +equirectangular projection .SS "Compression tools:" .TP
View file
kvazaar-1.1.0.tar.gz/src/Makefile.am -> kvazaar-1.2.0.tar.gz/src/Makefile.am
Changed
@@ -29,10 +29,21 @@ cli.c \ yuv_io.c \ yuv_io.h + kvazaar_LDADD = libkvazaar.la $(LIBS) kvazaar_CPPFLAGS = -DKVZ_VERSION="`$(srcdir)/../tools/version.sh`" +if USE_CRYPTOPP +kvazaar_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +else +kvazaar_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +endif + libkvazaar_la_SOURCES = \ bitstream.c \ bitstream.h \ @@ -144,15 +155,21 @@ libsse2.la \ libsse41.la +libkvazaar_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-number $(KVZ_API_VERSION) + if USE_CRYPTOPP libkvazaar_la_SOURCES += \ extras/crypto.h \ extras/crypto.cpp +libkvazaar_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) \ + $(libkvazaar_la_LDFLAGS) $(LDFLAGS) -o $@ +else +libkvazaar_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libkvazaar_la_LDFLAGS) $(LDFLAGS) -o $@ endif -libkvazaar_la_LDFLAGS = $(AM_LDFLAGS) -no-undefined -version-number $(KVZ_API_VERSION) - - libaltivec_la_SOURCES = \ strategies/altivec/picture-altivec.c \ strategies/altivec/picture-altivec.h @@ -170,7 +187,6 @@ strategies/avx2/quant-avx2.h \ strategies/avx2/sao-avx2.c \ strategies/avx2/sao-avx2.h - libsse2_la_SOURCES = \ strategies/sse2/picture-sse2.c \
View file
kvazaar-1.1.0.tar.gz/src/cabac.c -> kvazaar-1.2.0.tar.gz/src/cabac.c
Changed
@@ -297,9 +297,9 @@ //m_pcBinIf->encodeBinsEP(Suffix, r_param); if(r_param==1) { if(!(( base_level ==2 )&& (codeNumber==4 || codeNumber==5) ) ) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 1; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 1, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 1; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 1, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 1); } else { CABAC_BINS_EP(cabac, Suffix, 1, "coeff_abs_level_remaining"); @@ -309,65 +309,65 @@ else if(r_param==2) { if( base_level ==1) { - uint32_t key =ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 3; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 3; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else if( base_level ==2) { if(codeNumber<=7 || codeNumber>=12) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 3; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 3; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else if(codeNumber<10) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = (( (Suffix&1) + ( state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = (( (Suffix&1) + ( state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else CABAC_BINS_EP(cabac, Suffix, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(Suffix, 2); } else { //base_level=3 if(codeNumber<=7 || codeNumber>11) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = (Suffix + ( state->tile->m_prev_pos^key ) ) & 3; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = (Suffix + ( state->crypto_prev_pos^key ) ) & 3; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } else { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = ((Suffix&2))+(( (Suffix&1) + ( state->tile->m_prev_pos^key)) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 2, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = ((Suffix&2))+(( (Suffix&1) + ( state->crypto_prev_pos^key)) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 2, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 2); } } } else if(r_param==3) { if( base_level ==1) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 7; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 7; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if( base_level ==2) { if(codeNumber<=15 || codeNumber>23) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 7; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 7; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=19){ - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = ((Suffix&4))+(( (Suffix&3) + (state->tile->m_prev_pos^key )) & 3); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = ((Suffix&4))+(( (Suffix&3) + (state->crypto_prev_pos^key )) & 3); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=21){ - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = 4+(( (Suffix&1) + ( state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = 4+(( (Suffix&1) + ( state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else CABAC_BINS_EP(cabac, Suffix, 3, "coeff_abs_level_remaining"); @@ -376,82 +376,82 @@ CABAC_BINS_EP(cabac, Suffix, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(Suffix, 3); if(codeNumber<=15 || codeNumber>23) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = (Suffix + ( state->tile->m_prev_pos^key ) ) & 7; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = (Suffix + ( state->crypto_prev_pos^key ) ) & 7; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=19) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = (( (Suffix&3) + ( state->tile->m_prev_pos^key )) &3); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = (( (Suffix&3) + ( state->crypto_prev_pos^key )) &3); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } else if(codeNumber<=23) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = (Suffix&6)+(( (Suffix&1) + (state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 3, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = (Suffix&6)+(( (Suffix&1) + (state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 3, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 3); } } } else if(r_param==4) { if( base_level ==1) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 4); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 15; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 4); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 15; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if( base_level ==2) { if(codeNumber<=31 || codeNumber>47) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 4); - state->tile->m_prev_pos = ( Suffix + ( state->tile->m_prev_pos^key ) ) & 15; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, r_param, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 4); + state->crypto_prev_pos = ( Suffix + ( state->crypto_prev_pos^key ) ) & 15; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, r_param, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, r_param); } else if(codeNumber<=39) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = (( (Suffix&7) + ( state->tile->m_prev_pos^key )) & 7); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 3); + state->crypto_prev_pos = (( (Suffix&7) + ( state->crypto_prev_pos^key )) & 7); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if(codeNumber<=43) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 2); - state->tile->m_prev_pos = 8+(( (Suffix&3) + ( state->tile->m_prev_pos^key )) & 3); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 2); + state->crypto_prev_pos = 8+(( (Suffix&3) + ( state->crypto_prev_pos^key )) & 3); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else if(codeNumber<=45){ - uint32_t key = ff_get_key(&state->tile->dbs_g, 1); - state->tile->m_prev_pos = 12+(( (Suffix&1) + ( state->tile->m_prev_pos^key )) & 1); - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, 4, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 1); + state->crypto_prev_pos = 12+(( (Suffix&1) + ( state->crypto_prev_pos^key )) & 1); + CABAC_BINS_EP(cabac, state->crypto_prev_pos, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, 4); } else CABAC_BINS_EP(cabac, Suffix, 4, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(Suffix, 4); } else {//base_level=3 if(codeNumber<=31 || codeNumber>47) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 4); - state->tile->m_prev_pos = (Suffix + ( state->tile->m_prev_pos^key ) ) & 15; - CABAC_BINS_EP(cabac, state->tile->m_prev_pos, r_param, "coeff_abs_level_remaining"); + uint32_t key = kvz_crypto_get_key(state->crypto_hdl, 4); + state->crypto_prev_pos = (Suffix + ( state->crypto_prev_pos^key ) ) & 15; + CABAC_BINS_EP(cabac, state->crypto_prev_pos, r_param, "coeff_abs_level_remaining"); //m_pcBinIf->encodeBinsEP(m_prev_pos, r_param); } else if(codeNumber<=39) { - uint32_t key = ff_get_key(&state->tile->dbs_g, 3); - state->tile->m_prev_pos = (( (Suffix&7) + ( state->tile->m_prev_pos^key )) & 7);
View file
kvazaar-1.1.0.tar.gz/src/cfg.c -> kvazaar-1.2.0.tar.gz/src/cfg.c
Changed
@@ -44,7 +44,7 @@ cfg->deblock_enable = 1; cfg->deblock_beta = 0; cfg->deblock_tc = 0; - cfg->sao_enable = 1; + cfg->sao_type = 3; cfg->rdoq_enable = 1; cfg->rdoq_skip = 1; cfg->signhide_enable = true; @@ -119,8 +119,12 @@ cfg->roi.height = 0; cfg->roi.dqps = NULL; + cfg->erp_aqp = false; + cfg->slices = KVZ_SLICES_NONE; + cfg->optional_key = NULL; + return 1; } @@ -132,6 +136,7 @@ FREE_POINTER(cfg->tiles_height_split); FREE_POINTER(cfg->slice_addresses_in_ts); FREE_POINTER(cfg->roi.dqps); + FREE_POINTER(cfg->optional_key); } free(cfg); @@ -228,6 +233,54 @@ return 1; } +static int parse_uint8(const char *numstr,uint8_t* number,int min, int max) +{ + char *tail; + int d = strtol(numstr, &tail, 10); + if (*tail || d < min || d > max){ + fprintf(stderr, "Expected number between %d and %d\n", min, max); + if(number) + *number = 0; + return 0; + } else{ + if (number) + *number = (uint8_t) d; + return 1; + } +} + +static int parse_array(const char *array, uint8_t *coeff_key, int size, + int min, int max) +{ + char *key = strdup(array); + const char delim[] = ",;:"; + char *token; + int i = 0; + + token = strtok(key, delim); + while(token!=NULL&&i<size){ + if (!parse_uint8(token, &coeff_key[i], min, max)) + { + free(key); + return 0; + } + i++; + token = strtok(NULL, delim); + } + if(i>=size && (token != NULL)){ + fprintf(stderr, "parsing failed : too many members.\n"); + free(key); + return 0; + } + else if (i<size){ + fprintf(stderr, "parsing failed : too few members.\n"); + free(key); + return 0; + } + free(key); + return 1; +} + static int parse_slice_specification(const char* const arg, int32_t * const nslices, int32_t** const array) { const char* current_arg = NULL; int32_t current_value; @@ -309,10 +362,12 @@ static const char * const cu_split_termination_names[] = { "zero", "off", NULL }; static const char * const crypto_toggle_names[] = { "off", "on", NULL }; - static const char * const crypto_feature_names[] = { "mvs", "mv_signs", "trans_coeffs", "trans_coeff_signs", NULL }; + static const char * const crypto_feature_names[] = { "mvs", "mv_signs", "trans_coeffs", "trans_coeff_signs", "intra_pred_modes", NULL }; static const char * const me_early_termination_names[] = { "off", "on", "sensitive", NULL }; + static const char * const sao_names[] = { "off", "edge", "band", "full", NULL }; + static const char * const preset_values[11][20*2] = { { "ultrafast", @@ -324,7 +379,7 @@ "deblock", "0:0", "signhide", "0", "subme", "0", - "sao", "0", + "sao", "off", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -347,7 +402,7 @@ "deblock", "0:0", "signhide", "0", "subme", "0", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -370,7 +425,7 @@ "deblock", "0:0", "signhide", "0", "subme", "2", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -393,7 +448,7 @@ "deblock", "0:0", "signhide", "0", "subme", "2", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -416,7 +471,7 @@ "deblock", "0:0", "signhide", "0", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "0", "rdoq-skip", "1", "transform-skip", "0", @@ -439,7 +494,7 @@ "deblock", "0:0", "signhide", "0", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -462,7 +517,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -485,7 +540,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -508,7 +563,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "1", "transform-skip", "0", @@ -531,7 +586,7 @@ "deblock", "0:0", "signhide", "1", "subme", "4", - "sao", "1", + "sao", "full", "rdoq", "1", "rdoq-skip", "0", "transform-skip", "1", @@ -599,8 +654,11 @@ cfg->deblock_enable = atobool(value); } } - else if OPT("sao") - cfg->sao_enable = atobool(value); + else if OPT("sao") { + int8_t sao_type = 0; + if (!parse_enum(value, sao_names, &sao_type)) sao_type = atobool(value) ? 3 : 0; + cfg->sao_type = sao_type; + }
View file
kvazaar-1.1.0.tar.gz/src/cli.c -> kvazaar-1.2.0.tar.gz/src/cli.c
Changed
@@ -47,7 +47,7 @@ { "input-fps", required_argument, NULL, 0 }, { "deblock", required_argument, NULL, 0 }, { "no-deblock", no_argument, NULL, 0 }, - { "sao", no_argument, NULL, 0 }, + { "sao", optional_argument, NULL, 0 }, { "no-sao", no_argument, NULL, 0 }, { "rdoq", no_argument, NULL, 0 }, { "no-rdoq", no_argument, NULL, 0 }, @@ -107,6 +107,7 @@ { "hash", required_argument, NULL, 0 }, {"cu-split-termination",required_argument, NULL, 0 }, { "crypto", required_argument, NULL, 0 }, + { "key", required_argument, NULL, 0 }, { "me-early-termination",required_argument, NULL, 0 }, { "lossless", no_argument, NULL, 0 }, { "no-lossless", no_argument, NULL, 0 }, @@ -119,6 +120,8 @@ { "implicit-rdpcm", no_argument, NULL, 0 }, { "no-implicit-rdpcm", no_argument, NULL, 0 }, { "roi", required_argument, NULL, 0 }, + { "erp-aqp", no_argument, NULL, 0 }, + { "no-erp-aqp", no_argument, NULL, 0 }, {0, 0, 0, 0} }; @@ -388,6 +391,8 @@ " delta QP values in raster order.\n" " The delta QP map can be any size or aspect\n" " ratio, and will be mapped to LCU's.\n" + " --(no-)erp-aqp : Use adaptive QP for 360 video with\n" + " equirectangular projection\n" "\n" /* Word wrap to this width to stay under 80 characters (including ") ************/ "Compression tools:\n" @@ -497,19 +502,23 @@ void print_frame_info(const kvz_frame_info *const info, const double frame_psnr[3], - const uint32_t bytes) + const uint32_t bytes, + const bool print_psnr) { - fprintf(stderr, "POC %4d QP %2d (%c-frame) %10d bits PSNR: %2.4f %2.4f %2.4f", + fprintf(stderr, "POC %4d QP %2d (%c-frame) %10d bits", info->poc, info->qp, "BPI"[info->slice_type % 3], - bytes << 3, - frame_psnr[0], frame_psnr[1], frame_psnr[2]); + bytes << 3); + if (print_psnr) { + fprintf(stderr, " PSNR Y %2.4f U %2.4f V %2.4f", + frame_psnr[0], frame_psnr[1], frame_psnr[2]); + } if (info->slice_type != KVZ_SLICE_I) { // Print reference picture lists fprintf(stderr, " [L0 "); - for (int j = info->ref_list_len[0] - 1; j >= 0; j--) { + for (int j = 0; j < info->ref_list_len[0]; j++) { fprintf(stderr, "%d ", info->ref_list[0][j]); } fprintf(stderr, "] [L1 ");
View file
kvazaar-1.1.0.tar.gz/src/cli.h -> kvazaar-1.2.0.tar.gz/src/cli.h
Changed
@@ -57,6 +57,7 @@ void print_help(void); void print_frame_info(const kvz_frame_info *const info, const double frame_psnr[3], - const uint32_t bytes); + const uint32_t bytes, + const bool print_psnr); #endif
View file
kvazaar-1.1.0.tar.gz/src/cu.c -> kvazaar-1.2.0.tar.gz/src/cu.c
Changed
@@ -78,33 +78,6 @@ }; -#define BLIT_COEFF_CASE(n) case n:\ - for (y = 0; y < n; ++y) {\ - memcpy(&dst[y*dst_stride], &orig[y*orig_stride], n * sizeof(coeff_t));\ - }\ - break; - -void kvz_coefficients_blit(const coeff_t * const orig, coeff_t * const dst, - const unsigned width, const unsigned height, - const unsigned orig_stride, const unsigned dst_stride) -{ - unsigned y; - - int nxn_width = (width == height) ? width : 0; - switch (nxn_width) { - BLIT_COEFF_CASE(4) - BLIT_COEFF_CASE(8) - BLIT_COEFF_CASE(16) - BLIT_COEFF_CASE(32) - BLIT_COEFF_CASE(64) - default: - for (y = 0; y < height; ++y) { - memcpy(&dst[y*dst_stride], &orig[y*orig_stride], width * sizeof(coeff_t)); - } - break; - } -} - cu_info_t* kvz_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px) { return (cu_info_t*) kvz_cu_array_at_const(cua, x_px, y_px); @@ -115,7 +88,7 @@ { assert(x_px < cua->width); assert(y_px < cua->height); - return &(cua)->data[(x_px >> 2) + (y_px >> 2) * ((cua)->width >> 2)]; + return &(cua)->data[(x_px >> 2) + (y_px >> 2) * ((cua)->stride >> 2)]; } @@ -125,82 +98,99 @@ * \param width width of the array in luma pixels * \param height height of the array in luma pixels */ -cu_array_t * kvz_cu_array_alloc(const int width, const int height) { +cu_array_t * kvz_cu_array_alloc(const int width, const int height) +{ cu_array_t *cua = MALLOC(cu_array_t, 1); - // Round up to a multiple of cell width and divide by cell width. - const int width_scu = (width + 15) >> 2; - const int height_scu = (height + 15) >> 2; - assert(width_scu * 16 >= width); - assert(height_scu * 16 >= height); + // Round up to a multiple of LCU width and divide by cell width. + const int width_scu = CEILDIV(width, LCU_WIDTH) * LCU_WIDTH / SCU_WIDTH; + const int height_scu = CEILDIV(height, LCU_WIDTH) * LCU_WIDTH / SCU_WIDTH; const unsigned cu_array_size = width_scu * height_scu; - cua->data = calloc(cu_array_size, sizeof(cu_info_t)); - cua->width = width_scu << 2; - cua->height = height_scu << 2; + + cua->base = NULL; + cua->data = calloc(cu_array_size, sizeof(cu_info_t)); + cua->width = width_scu * SCU_WIDTH; + cua->height = height_scu * SCU_WIDTH; + cua->stride = cua->width; cua->refcount = 1; return cua; } -int kvz_cu_array_free(cu_array_t * const cua) +cu_array_t * kvz_cu_subarray(cu_array_t *base, + const unsigned x_offset, + const unsigned y_offset, + const unsigned width, + const unsigned height) +{ + assert(x_offset + width <= base->width); + assert(y_offset + height <= base->height); + + if (x_offset == 0 && + y_offset == 0 && + width == base->width && + height == base->height) + { + return kvz_cu_array_copy_ref(base); + } + + cu_array_t *cua = MALLOC(cu_array_t, 1); + + // Find the real base array. + cu_array_t *real_base = base; + while (real_base->base) { + real_base = real_base->base; + } + cua->base = kvz_cu_array_copy_ref(real_base); + cua->data = kvz_cu_array_at(base, x_offset, y_offset); + cua->width = width; + cua->height = height; + cua->stride = base->stride; + cua->refcount = 1; + + return cua; +} + +void kvz_cu_array_free(cu_array_t **cua_ptr) { - int32_t new_refcount; - if (!cua) return 1; + cu_array_t *cua = *cua_ptr; + if (cua == NULL) return; + *cua_ptr = NULL; + + int new_refcount = KVZ_ATOMIC_DEC(&cua->refcount); + if (new_refcount > 0) { + // Still we have some references, do nothing. + return; + } - new_refcount = KVZ_ATOMIC_DEC(&(cua->refcount)); - //Still we have some references, do nothing - if (new_refcount > 0) return 1; + assert(new_refcount == 0); - FREE_POINTER(cua->data); - free(cua); + if (!cua->base) { + FREE_POINTER(cua->data); + } else { + kvz_cu_array_free(&cua->base); + cua->data = NULL; + } - return 1; + FREE_POINTER(cua); } /** - * \brief Copy part of a cu array to another cu array. - * - * All values are in luma pixels. + * \brief Get a new pointer to a cu array. * - * \param dst destination array - * \param dst_x x-coordinate of the left edge of the copied area in dst - * \param dst_y y-coordinate of the top edge of the copied area in dst - * \param src source array - * \param src_x x-coordinate of the left edge of the copied area in src - * \param src_y y-coordinate of the top edge of the copied area in src - * \param width width of the area to copy - * \param height height of the area to copy + * Increment reference count and return the cu array. */ -void kvz_cu_array_copy(cu_array_t* dst, int dst_x, int dst_y, - const cu_array_t* src, int src_x, int src_y, - int width, int height) +cu_array_t * kvz_cu_array_copy_ref(cu_array_t* cua) { - // Convert values from pixel coordinates to array indices. - int src_stride = src->width >> 2; - int dst_stride = dst->width >> 2; - const cu_info_t* src_ptr = &src->data[(src_x >> 2) + (src_y >> 2) * src_stride]; - cu_info_t* dst_ptr = &dst->data[(dst_x >> 2) + (dst_y >> 2) * dst_stride]; - - // Number of bytes to copy per row. - const size_t row_size = sizeof(cu_info_t) * (width >> 2); - - width = MIN(width, MIN(src->width - src_x, dst->width - dst_x)); - height = MIN(height, MIN(src->height - src_y, dst->height - dst_y)); - - assert(src_x + width <= src->width); - assert(src_y + height <= src->height); - assert(dst_x + width <= dst->width); - assert(dst_y + height <= dst->height); - - for (int i = 0; i < (height >> 2); ++i) { - memcpy(dst_ptr, src_ptr, row_size); - src_ptr += src_stride; - dst_ptr += dst_stride; - } + // The caller should have had another reference. + assert(cua->refcount > 0); + KVZ_ATOMIC_INC(&cua->refcount); + return cua; } + /** * \brief Copy an lcu to a cu array. * @@ -213,7 +203,7 @@ */ void kvz_cu_array_copy_from_lcu(cu_array_t* dst, int dst_x, int dst_y, const lcu_t *src)
View file
kvazaar-1.1.0.tar.gz/src/cu.h -> kvazaar-1.2.0.tar.gz/src/cu.h
Changed
@@ -138,10 +138,13 @@ int8_t mode; int8_t mode_chroma; int8_t tr_skip; //!< \brief transform skip flag +#if KVZ_SEL_ENCRYPTION + int8_t mode_encry; +#endif } intra; struct { int16_t mv[2][2]; // \brief Motion vectors for L0 and L1 - uint8_t mv_ref[2]; // \brief Index of the encoder_control.ref array. + uint8_t mv_ref[2]; // \brief Index of the L0 and L1 array. uint8_t mv_cand0 : 3; // \brief selected MV candidate uint8_t mv_cand1 : 3; // \brief selected MV candidate uint8_t mv_dir : 2; // \brief Probably describes if mv_ref is L0, L1 or both (bi-pred) @@ -178,20 +181,26 @@ (cu).inter.cost, (cu).inter.bitcost, (cu).inter.mv[0], (cu).inter.mv[1], (cu).inter.mvd[0], (cu).inter.mvd[1], \ (cu).inter.mv_cand, (cu).inter.mv_ref, (cu).inter.mv_dir, (cu).inter.mode) -typedef struct { - cu_info_t *data; //!< \brief cu array +typedef struct cu_array_t { + struct cu_array_t *base; //!< \brief base cu array or NULL + cu_info_t *data; //!< \brief cu array int32_t width; //!< \brief width of the array in pixels int32_t height; //!< \brief height of the array in pixels + int32_t stride; //!< \brief stride of the array in pixels int32_t refcount; //!< \brief number of references to this cu_array } cu_array_t; -cu_array_t * kvz_cu_array_alloc(int width, int height); -int kvz_cu_array_free(cu_array_t *cua); cu_info_t* kvz_cu_array_at(cu_array_t *cua, unsigned x_px, unsigned y_px); const cu_info_t* kvz_cu_array_at_const(const cu_array_t *cua, unsigned x_px, unsigned y_px); -void kvz_cu_array_copy(cu_array_t* dst, int dst_x, int dst_y, - const cu_array_t* src, int src_x, int src_y, - int width, int height); + +cu_array_t * kvz_cu_array_alloc(const int width, const int height); +cu_array_t * kvz_cu_subarray(cu_array_t *base, + const unsigned x_offset, + const unsigned y_offset, + const unsigned width, + const unsigned height); +void kvz_cu_array_free(cu_array_t **cua_ptr); +cu_array_t * kvz_cu_array_copy_ref(cu_array_t* cua); /** @@ -221,7 +230,54 @@ kvz_pixel v[LCU_REF_PX_WIDTH / 2 + 1]; } lcu_ref_px_t; -typedef struct { +/** + * \brief Coefficients of an LCU + * + * Coefficients inside a single TU are stored in row-major order. TUs + * themselves are stored in a zig-zag order, so that the coefficients of + * a TU are contiguous in memory. + * + * Example storage order for a 32x32 pixel TU tree + * + \verbatim + + +------+------+------+------+---------------------------+ + | 0 | 16 | 64 | 80 | | + | - | - | - | - | | + | 15 | 31 | 79 | 95 | | + +------+------+------+------+ | + | 32 | 48 | 96 | 112 | | + | - | - | - | - | | + | 47 | 63 | 111 | 127 | | + +------+------+------+------+ 256 - 511 | + | 128 | 144 | 192 | 208 | | + | - | - | - | - | | + | 143 | 159 | 207 | 223 | | + +------+------+------+------+ | + | 160 | 176 | 224 | 240 | | + | - | - | - | - | | + | 175 | 191 | 239 | 255 | | + +------+------+------+------+-------------+------+------+ + | 512 | 528 | | | 832 | 848 | + | - | - | | | - | - | + | 527 | 543 | | | 847 | 863 | + +------+------+ 576 - 639 | 768 - 831 +------+------+ + | 544 | 560 | | | 864 | 880 | + | - | - | | | - | - | + | 559 | 575 | | | 879 | 895 | + +------+------+-------------+-------------+------+------+ + | | | | | + | | | | | + | | | | | + | 640 - 703 | 704 - 767 | 896 - 959 | 960 - 1023 | + | | | | | + | | | | | + | | | | | + +-------------+-------------+-------------+-------------+ + + \endverbatim + */ +typedef ALIGNED(8) struct { coeff_t y[LCU_LUMA_SIZE]; coeff_t u[LCU_CHROMA_SIZE]; coeff_t v[LCU_CHROMA_SIZE]; @@ -287,6 +343,72 @@ #define LCU_GET_CU_AT_PX(lcu, x_px, y_px) \ (&(lcu)->cu[LCU_CU_OFFSET + ((x_px) >> 2) + ((y_px) >> 2) * LCU_T_CU_WIDTH]) + +/** + * \brief Copy a part of a coeff_t array to another. + * + * \param width Size of the block to be copied in pixels. + * \param src Pointer to the source array. + * \param dest Pointer to the destination array. + */ +static INLINE void copy_coeffs(const coeff_t *__restrict src, + coeff_t *__restrict dest, + size_t width) +{ + memcpy(dest, src, width * width * sizeof(coeff_t)); +} + + +/** + * \brief Convert (x, y) coordinates to z-order index. + * + * Only works for widths and coordinates divisible by four. Width must be + * a power of two in range [4..64]. + * + * \param width size of the containing block + * \param x x-coordinate + * \param y y-coordinate + * \return index in z-order + */ +static INLINE unsigned xy_to_zorder(unsigned width, unsigned x, unsigned y) +{ + assert(width % 4 == 0 && width >= 4 && width <= 64); + assert(x % 4 == 0 && x < width); + assert(y % 4 == 0 && y < width); + + unsigned result = 0; + + switch (width) { + case 64: + result += x / 32 * (32*32); + result += y / 32 * (64*32); + x %= 32; + y %= 32; + // fallthrough + case 32: + result += x / 16 * (16*16); + result += y / 16 * (32*16); + x %= 16; + y %= 16; + // fallthrough + case 16: + result += x / 8 * ( 8*8); + result += y / 8 * (16*8); + x %= 8; + y %= 8; + // fallthrough + case 8: + result += x / 4 * (4*4); + result += y / 4 * (8*4); + // fallthrough + case 4: + break; + } + + return result; +} + + #define CHECKPOINT_LCU(prefix_str, lcu) do { \ CHECKPOINT_CU(prefix_str " cu[0]", (lcu).cu[0]); \ CHECKPOINT_CU(prefix_str " cu[1]", (lcu).cu[1]); \ @@ -373,10 +495,6 @@ } while(0) -void kvz_coefficients_blit(const coeff_t *orig, coeff_t *dst, - unsigned width, unsigned height, - unsigned orig_stride, unsigned dst_stride); - #define NUM_CBF_DEPTHS 5 static const uint16_t cbf_masks[NUM_CBF_DEPTHS] = { 0x1f, 0x0f, 0x07, 0x03, 0x1 };
View file
kvazaar-1.1.0.tar.gz/src/encmain.c -> kvazaar-1.2.0.tar.gz/src/encmain.c
Changed
@@ -83,11 +83,11 @@ } } -#if KVZ_BIT_DEPTH == 8 -#define PSNRMAX (255.0 * 255.0) -#else - #define PSNRMAX ((double)PIXEL_MAX * (double)PIXEL_MAX) -#endif +/** + * \brief Value that is printed instead of PSNR when SSE is zero. + */ +static const double MAX_PSNR = 999.99; +static const double MAX_SQUARED_ERROR = (double)PIXEL_MAX * (double)PIXEL_MAX; /** * \brief Calculates image PSNR value @@ -105,28 +105,31 @@ int32_t pixels = src->width * src->height; int colors = rec->chroma_format == KVZ_CSP_400 ? 1 : 3; + double sse[3] = { 0.0 }; for (int32_t c = 0; c < colors; ++c) { int32_t num_pixels = pixels; if (c != COLOR_Y) { num_pixels >>= 2; } - psnr[c] = 0; for (int32_t i = 0; i < num_pixels; ++i) { const int32_t error = src->data[c][i] - rec->data[c][i]; - psnr[c] += error * error; + sse[c] += error * error; } // Avoid division by zero - if (psnr[c] == 0) psnr[c] = 99.0; - psnr[c] = 10 * log10((num_pixels * PSNRMAX) / ((double)psnr[c]));; + if (sse[c] == 0.0) { + psnr[c] = MAX_PSNR; + } else { + psnr[c] = 10.0 * log10(num_pixels * MAX_SQUARED_ERROR / sse[c]); + } } } typedef struct { - // Mutexes for synchronization. - pthread_mutex_t* input_mutex; - pthread_mutex_t* main_thread_mutex; + // Semaphores for synchronization. + kvz_sem_t* available_input_slots; + kvz_sem_t* filled_input_slots; // Parameters passed from main thread to input thread. FILE* input; @@ -141,9 +144,6 @@ int retval; } input_handler_args; -#define PTHREAD_LOCK(l) if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); assert(0); return 0; } -#define PTHREAD_UNLOCK(l) if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); assert(0); return 0; } - #define RETVAL_RUNNING 0 #define RETVAL_FAILURE 1 #define RETVAL_EOF 2 @@ -193,7 +193,7 @@ // Set PTS to make sure we pass it on correctly. frame_in->pts = frames_read; - bool read_success = yuv_io_read(args->input, + bool read_success = yuv_io_read(args->input, args->opts->config->width, args->opts->config->height, args->encoder->cfg.input_bitdepth, @@ -242,30 +242,65 @@ } // Wait until main thread is ready to receive the next frame. - PTHREAD_LOCK(args->input_mutex); + kvz_sem_wait(args->available_input_slots); args->img_in = frame_in; args->retval = retval; // Unlock main_thread_mutex to notify main thread that the new img_in // and retval have been placed to args. - PTHREAD_UNLOCK(args->main_thread_mutex); + kvz_sem_post(args->filled_input_slots); frame_in = NULL; } done: // Wait until main thread is ready to receive the next frame. - PTHREAD_LOCK(args->input_mutex); + kvz_sem_wait(args->available_input_slots); args->img_in = NULL; args->retval = retval; // Unlock main_thread_mutex to notify main thread that the new img_in // and retval have been placed to args. - PTHREAD_UNLOCK(args->main_thread_mutex); + kvz_sem_post(args->filled_input_slots); // Do some cleaning up. args->api->picture_free(frame_in); pthread_exit(NULL); - return 0; + return NULL; +} + + +void output_recon_pictures(const kvz_api *const api, + FILE *recout, + kvz_picture *buffer[KVZ_MAX_GOP_LENGTH], + int *buffer_size, + uint64_t *next_pts, + unsigned width, + unsigned height) +{ + bool picture_written; + do { + picture_written = false; + for (int i = 0; i < *buffer_size; i++) { + + kvz_picture *pic = buffer[i]; + if (pic->pts == *next_pts) { + // Output the picture and remove it. + if (!yuv_io_write(recout, pic, width, height)) { + fprintf(stderr, "Failed to write reconstructed picture!\n"); + } + api->picture_free(pic); + picture_written = true; + (*next_pts)++; + + // Move rest of the pictures one position backward. + for (i++; i < *buffer_size; i++) { + buffer[i - 1] = buffer[i]; + buffer[i] = NULL; + } + (*buffer_size)--; + } + } + } while (picture_written); } @@ -287,15 +322,37 @@ clock_t start_time = clock(); clock_t encoding_start_cpu_time; KVZ_CLOCK_T encoding_start_real_time; - + clock_t encoding_end_cpu_time; KVZ_CLOCK_T encoding_end_real_time; + // PTS of the reconstructed picture that should be output next. + // Only used with --debug. + uint64_t next_recon_pts = 0; + // Buffer for storing reconstructed pictures that are not to be output + // yet (i.e. in wrong order because GOP is used). + // Only used with --debug. + kvz_picture *recon_buffer[KVZ_MAX_GOP_LENGTH] = { NULL }; + int recon_buffer_size = 0; + + // Semaphores for synchronizing the input reader thread and the main + // thread. + // + // available_input_slots tells whether the main thread is currently using + // input_handler_args.img_in. (0 = in use, 1 = not in use) + // + // filled_input_slots tells whether there is a new input picture (or NULL + // if the input has ended) in input_handler_args.img_in placed by the + // input reader thread. (0 = no new image, 1 = one new image) + // + kvz_sem_t *available_input_slots = NULL; + kvz_sem_t *filled_input_slots = NULL; + #ifdef _WIN32 // Stderr needs to be text mode to convert \n to \r\n in Windows. setmode( _fileno( stderr ), _O_TEXT ); #endif - + CHECKPOINTS_INIT(); const kvz_api * const api = kvz_api_get(8); @@ -379,17 +436,15 @@ pthread_t input_thread; - pthread_mutex_t input_mutex = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_t main_thread_mutex = PTHREAD_MUTEX_INITIALIZER; - - // Lock both mutexes at startup - PTHREAD_LOCK(&main_thread_mutex); - PTHREAD_LOCK(&input_mutex); + available_input_slots = calloc(1, sizeof(kvz_sem_t)); + filled_input_slots = calloc(1, sizeof(kvz_sem_t)); + kvz_sem_init(available_input_slots, 0); + kvz_sem_init(filled_input_slots, 0);
View file
kvazaar-1.1.0.tar.gz/src/encode_coding_tree.c -> kvazaar-1.2.0.tar.gz/src/encode_coding_tree.c
Changed
@@ -46,13 +46,11 @@ * This method encodes the X and Y component within a block of the last * significant coefficient. */ -static void encode_last_significant_xy(encoder_state_t * const state, +static void encode_last_significant_xy(cabac_data_t * const cabac, uint8_t lastpos_x, uint8_t lastpos_y, uint8_t width, uint8_t height, uint8_t type, uint8_t scan) { - cabac_data_t * const cabac = &state->cabac; - const int index = kvz_math_floor_log2(width) - 2; uint8_t ctx_offset = type ? 0 : (index * 3 + (index + 1) / 4); uint8_t shift = type ? index : (index + 3) / 4; @@ -103,14 +101,14 @@ } void kvz_encode_coeff_nxn(encoder_state_t * const state, - coeff_t *coeff, + cabac_data_t * const cabac, + const coeff_t *coeff, uint8_t width, uint8_t type, int8_t scan_mode, int8_t tr_skip) { const encoder_control_t * const encoder = state->encoder_control; - cabac_data_t * const cabac = &state->cabac; int c1 = 1; uint8_t last_coeff_x = 0; uint8_t last_coeff_y = 0; @@ -183,8 +181,13 @@ last_coeff_y = (uint8_t)(pos_last >> log2_block_size); // Code last_coeff_x and last_coeff_y - encode_last_significant_xy(state, last_coeff_x, last_coeff_y, width, width, - type, scan_mode); + encode_last_significant_xy(cabac, + last_coeff_x, + last_coeff_y, + width, + width, + type, + scan_mode); scan_pos_sig = scan_pos_last; @@ -300,15 +303,15 @@ } if (be_valid && sign_hidden) { coeff_signs = coeff_signs >> 1; - if(!state->cabac.only_count) - if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) { - coeff_signs = coeff_signs ^ ff_get_key(&state->tile->dbs_g, num_non_zero-1); + if (!cabac->only_count) + if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) { + coeff_signs = coeff_signs ^ kvz_crypto_get_key(state->crypto_hdl, num_non_zero-1); } CABAC_BINS_EP(cabac, coeff_signs , (num_non_zero - 1), "coeff_sign_flag"); } else { - if(!state->cabac.only_count) - if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) - coeff_signs = coeff_signs ^ ff_get_key(&state->tile->dbs_g, num_non_zero); + if (!cabac->only_count) + if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFF_SIGNS) + coeff_signs = coeff_signs ^ kvz_crypto_get_key(state->crypto_hdl, num_non_zero); CABAC_BINS_EP(cabac, coeff_signs, num_non_zero, "coeff_sign_flag"); } @@ -319,9 +322,9 @@ int32_t base_level = (idx < C1FLAG_NUMBER) ? (2 + first_coeff2) : 1; if (abs_coeff[idx] >= base_level) { - if(!state->cabac.only_count) { - if (state->encoder_control->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFFS) - kvz_cabac_write_coeff_remain_encry(state, cabac, abs_coeff[idx] - base_level, go_rice_param, base_level); + if (!cabac->only_count) { + if (encoder->cfg.crypto_features & KVZ_CRYPTO_TRANSF_COEFFS) + kvz_cabac_write_coeff_remain_encry(state, cabac, abs_coeff[idx] - base_level, go_rice_param, base_level); else kvz_cabac_write_coeff_remain(cabac, abs_coeff[idx] - base_level, go_rice_param); } else @@ -342,7 +345,7 @@ } static void encode_transform_unit(encoder_state_t * const state, - int x_pu, int y_pu, int depth) + int x, int y, int depth) { assert(depth >= 1 && depth <= MAX_PU_DEPTH); @@ -350,79 +353,60 @@ const uint8_t width = LCU_WIDTH >> depth; const uint8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); - const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x_pu << 2, y_pu << 2); - - const int x_cu = x_pu / 2; - const int y_cu = y_pu / 2; - const cu_info_t *cur_cu = kvz_videoframe_get_cu_const(frame, x_cu, y_cu); - - coeff_t coeff_y[LCU_WIDTH*LCU_WIDTH+1]; - coeff_t coeff_u[LCU_WIDTH*LCU_WIDTH>>2]; - coeff_t coeff_v[LCU_WIDTH*LCU_WIDTH>>2]; - int32_t coeff_stride = frame->width; + const cu_info_t *cur_pu = kvz_cu_array_at_const(frame->cu_array, x, y); int8_t scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth); int cbf_y = cbf_is_set(cur_pu->cbf, depth, COLOR_Y); if (cbf_y) { - int x = x_pu * (LCU_WIDTH >> MAX_PU_DEPTH); - int y = y_pu * (LCU_WIDTH >> MAX_PU_DEPTH); - coeff_t *orig_pos = &frame->coeff_y[x + y * frame->width]; - for (y = 0; y < width; y++) { - for (x = 0; x < width; x++) { - coeff_y[x+y*width] = orig_pos[x]; - } - orig_pos += coeff_stride; - } - } - - // CoeffNxN - // Residual Coding - if (cbf_y) { - kvz_encode_coeff_nxn(state, coeff_y, width, 0, scan_idx, cur_pu->intra.tr_skip); + int x_local = x % LCU_WIDTH; + int y_local = y % LCU_WIDTH; + const coeff_t *coeff_y = &state->coeff->y[xy_to_zorder(LCU_WIDTH, x_local, y_local)]; + + // CoeffNxN + // Residual Coding + kvz_encode_coeff_nxn(state, + &state->cabac, + coeff_y, + width, + 0, + scan_idx, + cur_pu->intra.tr_skip); } - if (depth == MAX_DEPTH + 1 && !(x_pu % 2 && y_pu % 2)) { + if (depth == MAX_DEPTH + 1) { // For size 4x4 luma transform the corresponding chroma transforms are - // also of size 4x4 covering 8x8 luma pixels. The residual is coded - // in the last transform unit so for the other ones, don't do anything. - return; + // also of size 4x4 covering 8x8 luma pixels. The residual is coded in + // the last transform unit. + if (x % 8 == 0 || y % 8 == 0) { + // Not the last luma transform block so there is nothing more to do. + return; + } else { + // Time to to code the chroma transform blocks. Move to the top-left + // corner of the block. + x -= 4; + y -= 4; + cur_pu = kvz_cu_array_at_const(frame->cu_array, x, y); + } } - bool chroma_cbf_set = cbf_is_set(cur_cu->cbf, depth, COLOR_U) || - cbf_is_set(cur_cu->cbf, depth, COLOR_V); + bool chroma_cbf_set = cbf_is_set(cur_pu->cbf, depth, COLOR_U) || + cbf_is_set(cur_pu->cbf, depth, COLOR_V); if (chroma_cbf_set) { - int x, y; - coeff_t *orig_pos_u, *orig_pos_v; - - if (depth <= MAX_DEPTH) { - x = x_pu * (LCU_WIDTH >> (MAX_PU_DEPTH + 1)); - y = y_pu * (LCU_WIDTH >> (MAX_PU_DEPTH + 1)); - } else { - // for 4x4 select top left pixel of the CU. - x = x_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)); - y = y_cu * (LCU_WIDTH >> (MAX_DEPTH + 1)); - } - orig_pos_u = &frame->coeff_u[x + y * (frame->width >> 1)]; - orig_pos_v = &frame->coeff_v[x + y * (frame->width >> 1)]; - for (y = 0; y < (width_c); y++) { - for (x = 0; x < (width_c); x++) { - coeff_u[x+y*(width_c)] = orig_pos_u[x]; - coeff_v[x+y*(width_c)] = orig_pos_v[x]; - } - orig_pos_u += coeff_stride>>1; - orig_pos_v += coeff_stride>>1; - } + int x_local = (x >> 1) % LCU_WIDTH_C; + int y_local = (y >> 1) % LCU_WIDTH_C; + scan_idx = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode_chroma, depth); - scan_idx = kvz_get_scan_order(cur_cu->type, cur_cu->intra.mode_chroma, depth); + const coeff_t *coeff_u = &state->coeff->u[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; + const coeff_t *coeff_v = &state->coeff->v[xy_to_zorder(LCU_WIDTH_C, x_local, y_local)]; - if (cbf_is_set(cur_cu->cbf, depth, COLOR_U)) { - kvz_encode_coeff_nxn(state, coeff_u, width_c, 2, scan_idx, 0); + if (cbf_is_set(cur_pu->cbf, depth, COLOR_U)) {
View file
kvazaar-1.1.0.tar.gz/src/encode_coding_tree.h -> kvazaar-1.2.0.tar.gz/src/encode_coding_tree.h
Changed
@@ -34,8 +34,9 @@ uint16_t y_ctb, uint8_t depth); -void kvz_encode_coeff_nxn(encoder_state_t *state, - coeff_t *coeff, +void kvz_encode_coeff_nxn(encoder_state_t * const state, + cabac_data_t * const cabac, + const coeff_t *coeff, uint8_t width, uint8_t type, int8_t scan_mode,
View file
kvazaar-1.1.0.tar.gz/src/encoder.c -> kvazaar-1.2.0.tar.gz/src/encoder.c
Changed
@@ -20,6 +20,9 @@ #include "encoder.h" +// This define is required for M_PI on Windows. +#define _USE_MATH_DEFINES +#include <math.h> #include <stdio.h> #include <stdlib.h> @@ -27,90 +30,170 @@ #include "strategyselector.h" +/** + * \brief Strength of QP adjustments when using adaptive QP for 360 video. + * + * Determined empirically. + */ +static const double ERP_AQP_STRENGTH = 3.0; + + static int encoder_control_init_gop_layer_weights(encoder_control_t * const); -static int size_of_wpp_ends(int threads) +static unsigned cfg_num_threads(void) { - // Based on the shape of the area where all threads can't yet run in parallel. - return 4 * threads * threads - 2 * threads; + if (kvz_g_hardware_flags.logical_cpu_count == 0) { + // Default to 4 if we don't know the number of CPUs. + return 4; + } + + return kvz_g_hardware_flags.logical_cpu_count; } -static int select_owf_auto(const kvz_config *const cfg) + +static int get_max_parallelism(const encoder_control_t *const encoder) { - if (cfg->intra_period == 1) { - if (cfg->wpp) { - // If wpp is on, select owf such that less than 15% of the - // frame is covered by the are threads can not work at the same time. - const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH); - const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH); - - // Find the largest number of threads per frame that satifies the - // the condition: wpp start/stop inefficiency takes up less than 15% - // of frame area. - int threads_per_frame = 1; - const int wpp_treshold = lcu_width * lcu_height * 15 / 100; - while ((threads_per_frame + 1) * 2 < lcu_width && - threads_per_frame + 1 < lcu_height && - size_of_wpp_ends(threads_per_frame + 1) < wpp_treshold) { - ++threads_per_frame; - } + const int width_lcu = CEILDIV(encoder->cfg.width, LCU_WIDTH); + const int height_lcu = CEILDIV(encoder->cfg.height, LCU_WIDTH); + const int wpp_limit = MIN(height_lcu, CEILDIV(width_lcu, 2)); + const int par_frames = encoder->cfg.owf + 1; - const int threads = MAX(cfg->threads, 1); - const int frames = CEILDIV(threads, threads_per_frame); + int parallelism = 0; - // Convert from number of parallel frames to number of additional frames. - return CLIP(0, threads - 1, frames - 1); + if (encoder->cfg.intra_period == 1) { + int threads_per_frame; + if (encoder->cfg.wpp) { + // Usually limited by width because starting to code a CTU requires + // that the next two CTUs in the row above have been completed. + threads_per_frame = wpp_limit; } else { - // If wpp is not on, select owf such that there is enough - // tiles for twice the number of threads. - - int tiles_per_frame = cfg->tiles_width_count * cfg->tiles_height_count; - int threads = (cfg->threads > 1 ? cfg->threads : 1); - int frames = CEILDIV(threads * 4, tiles_per_frame); - - // Limit number of frames to 1.25x the number of threads for the case - // where there is only 1 tile per frame. - frames = CLIP(1, threads * 4 / 3, frames); - return frames - 1; + // One thread for each tile. + threads_per_frame = encoder->cfg.tiles_width_count * + encoder->cfg.tiles_height_count; } + // Divide by two since all frames cannot achieve the maximum + // parallelism all the time. + parallelism = par_frames * threads_per_frame / 2; + } else { - // Try and estimate a good number of parallel frames for inter. - const int lcu_width = CEILDIV(cfg->width, LCU_WIDTH); - const int lcu_height = CEILDIV(cfg->height, LCU_WIDTH); - int threads_per_frame = MIN(lcu_width / 2, lcu_height); - int threads = cfg->threads; - - // If all threads fit into one frame, at least two parallel frames should - // be used to reduce the effect of WPP spin-up and wind-down. - int frames = 1; - - while (threads > 0 && threads_per_frame > 0) { - frames += 1; - threads -= threads_per_frame; - threads_per_frame -= 2; - } + if (encoder->cfg.wpp) { + const int last_diagonal = (width_lcu - 1) + (height_lcu - 1) * 2; + + // Index of a diagonal. The diagonal contains CTUs whose coordinates + // satisfy x + 2*y == diagonal. We start the sum from the longest + // diagonal. + int diagonal = CEILDIV(last_diagonal, 2); + + // Difference between diagonal indices in consecutive frames. + const int frame_delay = 1 + encoder->max_inter_ref_lcu.right + + 2 * encoder->max_inter_ref_lcu.down; + int step = frame_delay; + int direction = -1; + + // Compute number of threads for each parallel frame. + for (int num_frames = 0; num_frames < par_frames; num_frames++) { + if (diagonal < 0 || diagonal > last_diagonal) { + // No room for more threads. + break; + } - if (cfg->gop_len && cfg->gop_lowdelay && cfg->gop_lp_definition.t > 1) { - // Temporal skipping makes every other frame very fast to encode so - // more parallel frames should be used. - frames *= 2; + // Count number of CTUs on the diagonal. + if (diagonal < MIN(2 * height_lcu, width_lcu)) { + parallelism += 1 + diagonal / 2; + } else { + parallelism += MIN( + wpp_limit, + height_lcu + CEILDIV(width_lcu, 2) - 1 - CEILDIV(diagonal, 2) + ); + } + diagonal += direction * step; + step += frame_delay; + direction = -direction; + } + + } else { + parallelism = encoder->cfg.tiles_width_count * + encoder->cfg.tiles_height_count; } - return CLIP(0, cfg->threads * 2 - 1, frames - 1); } + + return parallelism; } -static unsigned cfg_num_threads(void) +/** + * \brief Return weight for 360 degree ERP video + * + * Returns the scaling factor of area from equirectangular projection to + * spherical surface. + * + * \param y y-coordinate of the pixel + * \param h height of the picture + */ +static double ws_weight(int y, int h) +{ + return cos((y - 0.5 * h + 0.5) * (M_PI / h)); +} + + + +/** + * \brief Update ROI QPs for 360 video with equirectangular projection. + * + * Writes updated ROI parameters to encoder->cfg.roi. + * + * \param encoder encoder control + * \param orig_roi original delta QPs or NULL + * \param orig_width width of orig_roi + * \param orig_height height of orig_roi + */ +static void init_erp_aqp_roi(encoder_control_t* encoder, + int8_t *orig_roi, + int32_t orig_width, + int32_t orig_height) { - unsigned cpus = kvz_g_hardware_flags.physical_cpu_count; - unsigned fake_cpus = kvz_g_hardware_flags.logical_cpu_count - cpus; + // Update ROI with WS-PSNR delta QPs. + int height = encoder->in.height_in_lcu; + int width = orig_roi ? orig_width : 1; + + int frame_height = encoder->in.real_height;
View file
kvazaar-1.1.0.tar.gz/src/encoder.h -> kvazaar-1.2.0.tar.gz/src/encoder.h
Changed
@@ -118,11 +118,21 @@ //! Picture weights when GOP is used. double gop_layer_weights[MAX_GOP_LAYERS]; + bool lcu_dqp_enabled; + + int tr_depth_inter; + //! pic_parameter_set struct { uint8_t dependent_slice_segments_enabled_flag; } pps; + //! Maximum motion vector distance as number of LCUs. + struct { + int right; + int down; + } max_inter_ref_lcu; + } encoder_control_t; encoder_control_t* kvz_encoder_control_init(const kvz_config *cfg);
View file
kvazaar-1.1.0.tar.gz/src/encoder_state-bitstream.c -> kvazaar-1.2.0.tar.gz/src/encoder_state-bitstream.c
Changed
@@ -389,7 +389,7 @@ WRITE_UE(stream, MAX_DEPTH, "log2_diff_max_min_coding_block_size"); WRITE_UE(stream, 0, "log2_min_transform_block_size_minus2"); // 4x4 WRITE_UE(stream, 3, "log2_diff_max_min_transform_block_size"); // 4x4...32x32 - WRITE_UE(stream, TR_DEPTH_INTER, "max_transform_hierarchy_depth_inter"); + WRITE_UE(stream, encoder->tr_depth_inter, "max_transform_hierarchy_depth_inter"); WRITE_UE(stream, encoder->cfg.tr_depth_intra, "max_transform_hierarchy_depth_intra"); // scaling list @@ -401,7 +401,7 @@ WRITE_U(stream, (encoder->cfg.amp_enable ? 1 : 0), 1, "amp_enabled_flag"); - WRITE_U(stream, encoder->cfg.sao_enable ? 1 : 0, 1, + WRITE_U(stream, encoder->cfg.sao_type ? 1 : 0, 1, "sample_adaptive_offset_enabled_flag"); WRITE_U(stream, ENABLE_PCM, 1, "pcm_enabled_flag"); #if ENABLE_PCM == 1 @@ -455,7 +455,7 @@ WRITE_U(stream, 0, 1, "constrained_intra_pred_flag"); WRITE_U(stream, encoder->cfg.trskip_enable, 1, "transform_skip_enabled_flag"); - if (encoder->cfg.target_bitrate > 0 || encoder->cfg.roi.dqps != NULL) { + if (encoder->lcu_dqp_enabled) { // Use separate QP for each LCU when rate control is enabled. WRITE_U(stream, 1, 1, "cu_qp_delta_enabled_flag"); WRITE_UE(stream, 0, "diff_cu_qp_delta_depth"); @@ -544,7 +544,7 @@ s += sprintf(s, " %dx%d", cfg->width, cfg->height); s += sprintf(s, " deblock=%d:%d:%d", cfg->deblock_enable, cfg->deblock_beta, cfg->deblock_tc); - s += sprintf(s, " sao=%d", cfg->sao_enable); + s += sprintf(s, " sao=%d", cfg->sao_type); s += sprintf(s, " intra_period=%d", cfg->intra_period); s += sprintf(s, " qp=%d", cfg->qp); s += sprintf(s, " ref=%d", cfg->ref_frames); @@ -731,7 +731,7 @@ WRITE_UE(stream, encoder->cfg.gop_len?delta_poc - last_poc - 1:0, "delta_poc_s0_minus1"); last_poc = delta_poc; - WRITE_U(stream,1,1, "used_by_curr_pic_s0_flag"); + WRITE_U(stream, !state->frame->is_irap, 1, "used_by_curr_pic_s0_flag"); } last_poc = 0; poc_shift = 0; @@ -758,12 +758,12 @@ WRITE_UE(stream, encoder->cfg.gop_len ? delta_poc - last_poc - 1 : 0, "delta_poc_s1_minus1"); last_poc = delta_poc; - WRITE_U(stream, 1, 1, "used_by_curr_pic_s1_flag"); + WRITE_U(stream, !state->frame->is_irap, 1, "used_by_curr_pic_s1_flag"); } //WRITE_UE(stream, 0, "short_term_ref_pic_set_idx"); if (state->encoder_control->cfg.tmvp_enable) { - WRITE_U(stream, ref_negative?1:0, 1, "slice_temporal_mvp_enabled_flag"); + WRITE_U(stream, ref_negative ? 1 : 0, 1, "slice_temporal_mvp_enabled_flag"); } } @@ -771,7 +771,7 @@ //end if - if (encoder->cfg.sao_enable) { + if (encoder->cfg.sao_type) { WRITE_U(stream, 1, 1, "slice_sao_luma_flag"); if (encoder->chroma_format != KVZ_CSP_400) { WRITE_U(stream, 1, 1, "slice_sao_chroma_flag"); @@ -942,9 +942,7 @@ encoder_state_t * state, bool independent) { - uint8_t nal_type = (state->frame->is_idr_frame ? KVZ_NAL_IDR_W_RADL : KVZ_NAL_TRAIL_R); - - kvz_nal_write(stream, nal_type, 0, state->frame->first_nal); + kvz_nal_write(stream, state->frame->pictype, 0, state->frame->first_nal); state->frame->first_nal = false; kvz_encoder_state_write_bitstream_slice_header(stream, state, independent); @@ -1018,19 +1016,13 @@ kvz_bitstream_add_rbsp_trailing_bits(stream); } - { - PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); - encoder_state_write_bitstream_children(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_append,frame=%d,encoder_type=%c", state->frame->num, state->type); - } - + encoder_state_write_bitstream_children(state); + if (state->encoder_control->cfg.hash != KVZ_HASH_NONE) { - PERFORMANCE_MEASURE_START(KVZ_PERF_FRAME); // Calculate checksum add_checksum(state); - PERFORMANCE_MEASURE_END(KVZ_PERF_FRAME, encoder->threadqueue, "type=write_bitstream_checksum,frame=%d,encoder_type=%c", state->frame->num, state->type); } - + //Get bitstream length for stats uint64_t newpos = kvz_bitstream_tell(stream); state->stats_bitstream_length = (newpos >> 3) - (curpos >> 3);
View file
kvazaar-1.1.0.tar.gz/src/encoder_state-ctors_dtors.c -> kvazaar-1.2.0.tar.gz/src/encoder_state-ctors_dtors.c
Changed
@@ -29,7 +29,6 @@ #include "encoder.h" #include "encoder_state-geometry.h" #include "encoderstate.h" -#include "extras/crypto.h" #include "image.h" #include "imagelist.h" #include "kvazaar.h" @@ -82,16 +81,12 @@ printf("Error allocating videoframe!\r\n"); return 0; } - - // Init coeff data table - //FIXME: move them - state->tile->frame->coeff_y = MALLOC(coeff_t, width * height); - state->tile->frame->coeff_u = MALLOC(coeff_t, (width * height) >> 2); - state->tile->frame->coeff_v = MALLOC(coeff_t, (width * height) >> 2); - + state->tile->lcu_offset_x = lcu_offset_x; state->tile->lcu_offset_y = lcu_offset_y; - + state->tile->offset_x = lcu_offset_x * LCU_WIDTH; + state->tile->offset_y = lcu_offset_y * LCU_WIDTH; + state->tile->lcu_offset_in_ts = encoder->tiles_ctb_addr_rs_to_ts[lcu_offset_x + lcu_offset_y * encoder->in.width_in_lcu]; // hor_buf_search and ver_buf_search store single row/col from each LCU row/col. @@ -105,13 +100,15 @@ state->tile->hor_buf_search = kvz_yuv_t_alloc(luma_size, chroma_size_hor); state->tile->ver_buf_search = kvz_yuv_t_alloc(luma_size, chroma_size_ver); - - if (encoder->cfg.sao_enable) { + + if (encoder->cfg.sao_type) { state->tile->hor_buf_before_sao = kvz_yuv_t_alloc(luma_size, chroma_size_hor); + state->tile->ver_buf_before_sao = kvz_yuv_t_alloc(luma_size, chroma_size_ver); } else { state->tile->hor_buf_before_sao = NULL; + state->tile->ver_buf_before_sao = NULL; } - + if (encoder->cfg.wpp) { int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; state->tile->wf_jobs = MALLOC(threadqueue_job_t*, num_jobs); @@ -132,21 +129,27 @@ static void encoder_state_config_tile_finalize(encoder_state_t * const state) { if (state->tile == NULL) return; - if (state->tile->hor_buf_before_sao) kvz_yuv_t_free(state->tile->hor_buf_before_sao); - kvz_yuv_t_free(state->tile->hor_buf_search); kvz_yuv_t_free(state->tile->ver_buf_search); - + kvz_yuv_t_free(state->tile->hor_buf_before_sao); + kvz_yuv_t_free(state->tile->ver_buf_before_sao); + + if (state->encoder_control->cfg.wpp) { + int num_jobs = state->tile->frame->width_in_lcu * state->tile->frame->height_in_lcu; + for (int i = 0; i < num_jobs; ++i) { + kvz_threadqueue_free_job(&state->tile->wf_jobs[i]); + } + } + kvz_videoframe_free(state->tile->frame); state->tile->frame = NULL; - if (state->encoder_control->cfg.crypto_features && state->tile->dbs_g) { - DeleteCryptoC(state->tile->dbs_g); - } FREE_POINTER(state->tile->wf_jobs); } -static int encoder_state_config_slice_init(encoder_state_t * const state, - const int start_address_in_ts, const int end_address_in_ts) { +static int encoder_state_config_slice_init(encoder_state_t * const state, + const int start_address_in_ts, + const int end_address_in_ts) +{ state->slice->id = -1; for (int i = 0; i < state->encoder_control->slice_count; ++i) { if (state->encoder_control->slice_addresses_in_ts[i] == start_address_in_ts) { @@ -308,6 +311,7 @@ child_state->parent = parent_state; child_state->children = MALLOC(encoder_state_t, 1); child_state->children[0].encoder_control = NULL; + child_state->crypto_hdl = NULL; child_state->tqj_bitstream_written = NULL; child_state->tqj_recon_done = NULL; @@ -326,7 +330,6 @@ return 0; } - child_state->tile->dbs_g = NULL; // Not used. The used state is in the sub-tile. child_state->slice = MALLOC(encoder_state_config_slice_t, 1); if (!child_state->slice || !encoder_state_config_slice_init(child_state, 0, encoder->in.width_in_lcu * encoder->in.height_in_lcu - 1)) { fprintf(stderr, "Could not initialize encoder_state->slice!\n"); @@ -461,9 +464,6 @@ new_child->type = ENCODER_STATE_TYPE_TILE; new_child->frame = child_state->frame; new_child->tile = MALLOC(encoder_state_config_tile_t, 1); - if (child_state->encoder_control->cfg.crypto_features) { - new_child->tile->dbs_g = CreateC(); - } new_child->slice = child_state->slice; new_child->wfrow = child_state->wfrow; @@ -706,4 +706,7 @@ } kvz_bitstream_finalize(&state->stream); + + kvz_threadqueue_free_job(&state->tqj_recon_done); + kvz_threadqueue_free_job(&state->tqj_bitstream_written); }
View file
kvazaar-1.1.0.tar.gz/src/encoderstate.c -> kvazaar-1.2.0.tar.gz/src/encoderstate.c
Changed
@@ -35,6 +35,10 @@ #include "sao.h" #include "search.h" #include "tables.h" +#include "threadqueue.h" + +#define SAO_BUF_WIDTH (LCU_WIDTH + SAO_DELAY_PX + 2) +#define SAO_BUF_WIDTH_C (SAO_BUF_WIDTH / 2) int kvz_encoder_state_match_children_of_previous_frame(encoder_state_t * const state) { @@ -48,7 +52,127 @@ return 1; } -static void encoder_state_recdata_to_bufs(encoder_state_t * const state, const lcu_order_element_t * const lcu, yuv_t * const hor_buf, yuv_t * const ver_buf) { +/** + * \brief Save edge pixels before SAO to buffers. + * + * Copies pixels at the edges of the area that will be filtered with SAO to + * the given buffers. If deblocking is enabled, the pixels must have been + * deblocked before this. + * + * The saved pixels will be needed later when doing SAO for the neighboring + * areas. + */ +static void encoder_state_recdata_before_sao_to_bufs( + encoder_state_t * const state, + const lcu_order_element_t * const lcu, + yuv_t * const hor_buf, + yuv_t * const ver_buf) +{ + videoframe_t* const frame = state->tile->frame; + + if (hor_buf && lcu->below) { + // Copy the bottommost row that will be filtered with SAO to the + // horizontal buffer. + vector2d_t pos = { + .x = lcu->position_px.x, + .y = lcu->position_px.y + LCU_WIDTH - SAO_DELAY_PX - 1, + }; + // Copy all pixels that have been deblocked. + int length = lcu->size.x - DEBLOCK_DELAY_PX; + + if (!lcu->right) { + // If there is no LCU to the right, the last pixels will be + // filtered too. + length += DEBLOCK_DELAY_PX; + } + + if (lcu->left) { + // The rightmost pixels of the CTU to the left will also be filtered. + pos.x -= DEBLOCK_DELAY_PX; + length += DEBLOCK_DELAY_PX; + } + + const unsigned from_index = pos.x + pos.y * frame->rec->stride; + // NOTE: The horizontal buffer is indexed by + // x_px + y_lcu * frame->width + // where x_px is in pixels and y_lcu in number of LCUs. + const unsigned to_index = pos.x + lcu->position.y * frame->width; + + kvz_pixels_blit(&frame->rec->y[from_index], + &hor_buf->y[to_index], + length, 1, + frame->rec->stride, + frame->width); + + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2; + const unsigned to_index_c = (pos.x / 2) + lcu->position.y * frame->width / 2; + + kvz_pixels_blit(&frame->rec->u[from_index_c], + &hor_buf->u[to_index_c], + length / 2, 1, + frame->rec->stride / 2, + frame->width / 2); + kvz_pixels_blit(&frame->rec->v[from_index_c], + &hor_buf->v[to_index_c], + length / 2, 1, + frame->rec->stride / 2, + frame->width / 2); + } + } + + if (ver_buf && lcu->right) { + // Copy the rightmost column that will be filtered with SAO to the + // vertical buffer. + vector2d_t pos = { + .x = lcu->position_px.x + LCU_WIDTH - SAO_DELAY_PX - 1, + .y = lcu->position_px.y, + }; + int length = lcu->size.y - DEBLOCK_DELAY_PX; + + if (!lcu->below) { + // If there is no LCU below, the last pixels will be filtered too. + length += DEBLOCK_DELAY_PX; + } + + if (lcu->above) { + // The bottommost pixels of the CTU above will also be filtered. + pos.y -= DEBLOCK_DELAY_PX; + length += DEBLOCK_DELAY_PX; + } + + const unsigned from_index = pos.x + pos.y * frame->rec->stride; + // NOTE: The vertical buffer is indexed by + // x_lcu * frame->height + y_px + // where x_lcu is in number of LCUs and y_px in pixels. + const unsigned to_index = lcu->position.x * frame->height + pos.y; + + kvz_pixels_blit(&frame->rec->y[from_index], + &ver_buf->y[to_index], + 1, length, + frame->rec->stride, 1); + + if (state->encoder_control->chroma_format != KVZ_CSP_400) { + const unsigned from_index_c = (pos.x / 2) + (pos.y / 2) * frame->rec->stride / 2; + const unsigned to_index_c = lcu->position.x * frame->height / 2 + pos.y / 2; + + kvz_pixels_blit(&frame->rec->u[from_index_c], + &ver_buf->u[to_index_c], + 1, length / 2, + frame->rec->stride / 2, 1); + kvz_pixels_blit(&frame->rec->v[from_index_c], + &ver_buf->v[to_index_c], + 1, length / 2, + frame->rec->stride / 2, 1); + } + } +} + +static void encoder_state_recdata_to_bufs(encoder_state_t * const state, + const lcu_order_element_t * const lcu, + yuv_t * const hor_buf, + yuv_t * const ver_buf) +{ videoframe_t* const frame = state->tile->frame; if (hor_buf) { @@ -107,6 +231,209 @@ } +/** + * \brief Do SAO reconstuction for all available pixels. + * + * Does SAO reconstruction for all pixels that are available after the + * given LCU has been deblocked. This means the following pixels: + * - bottom-right block of SAO_DELAY_PX times SAO_DELAY_PX in the lcu to + * the left and up + * - the rightmost SAO_DELAY_PX pixels of the LCU to the left (excluding + * the bottommost pixel) + * - the bottommost SAO_DELAY_PX pixels of the LCU above (excluding the + * rightmost pixels) + * - all pixels inside the LCU, excluding the rightmost SAO_DELAY_PX and + * bottommost SAO_DELAY_PX + */ +static void encoder_sao_reconstruct(const encoder_state_t *const state, + const lcu_order_element_t *const lcu) +{ + videoframe_t *const frame = state->tile->frame; + + // Temporary buffers for SAO input pixels. + kvz_pixel sao_buf_y_array[SAO_BUF_WIDTH * SAO_BUF_WIDTH]; + kvz_pixel sao_buf_u_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C]; + kvz_pixel sao_buf_v_array[SAO_BUF_WIDTH_C * SAO_BUF_WIDTH_C]; + + // Pointers to the top-left pixel of the LCU in the buffers. + kvz_pixel *const sao_buf_y = &sao_buf_y_array[(SAO_DELAY_PX + 1) * (SAO_BUF_WIDTH + 1)]; + kvz_pixel *const sao_buf_u = &sao_buf_u_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)]; + kvz_pixel *const sao_buf_v = &sao_buf_v_array[(SAO_DELAY_PX/2 + 1) * (SAO_BUF_WIDTH_C + 1)]; + + const int x_offsets[3] = { + // If there is an lcu to the left, we need to filter its rightmost + // pixels. + lcu->left ? -SAO_DELAY_PX : 0, + 0, + // If there is an lcu to the right, the rightmost pixels of this LCU + // are filtered when filtering that LCU. Otherwise we filter them now. + lcu->size.x - (lcu->right ? SAO_DELAY_PX : 0), + }; + + const int y_offsets[3] = { + // If there is an lcu above, we need to filter its bottommost pixels. + lcu->above ? -SAO_DELAY_PX : 0, + 0, + // If there is an lcu below, the bottommost pixels of this LCU are + // filtered when filtering that LCU. Otherwise we filter them now. + lcu->size.y - (lcu->below ? SAO_DELAY_PX : 0), + }; + + // Number of pixels around the block that need to be copied to the + // buffers. + const int border_left = lcu->left ? 1 : 0; + const int border_right = lcu->right ? 1 : 0; + const int border_above = lcu->above ? 1 : 0; + const int border_below = lcu->below ? 1 : 0; + + // Index of the pixel at the intersection of the top and left borders.
View file
kvazaar-1.1.0.tar.gz/src/encoderstate.h -> kvazaar-1.2.0.tar.gz/src/encoderstate.h
Changed
@@ -81,6 +81,7 @@ int32_t num; /*!< \brief Frame number */ int32_t poc; /*!< \brief Picture order count */ int8_t gop_offset; /*!< \brief Offset in the gop structure */ + int32_t irap_poc; /*!< \brief POC of the associated IRAP picture */ /** * \brief Frame-level quantization parameter @@ -91,17 +92,16 @@ //! \brief quantization factor double QP_factor; - //Current picture available references + //! Current pictures available for references image_list_t *ref; int8_t ref_list; - struct { - int32_t poc; - int8_t list; - int8_t idx; - } refmap[16]; - - bool is_idr_frame; + //! L0 and L1 reference index list + uint8_t ref_LX[2][16]; + //! L0 reference index list size + uint8_t ref_LX_size[2]; + + bool is_irap; uint8_t pictype; enum kvz_slice_type slicetype; @@ -153,11 +153,15 @@ videoframe_t *frame; int32_t id; - + //Tile: offset in LCU for current encoder_state in global coordinates int32_t lcu_offset_x; int32_t lcu_offset_y; - + + //Tile: offset in pixels + int32_t offset_x; + int32_t offset_y; + //Position of the first element in tile scan in global coordinates int32_t lcu_offset_in_ts; @@ -169,18 +173,20 @@ // LCU-column. They are packed such that each LCU-column index maps to the // x-coordinate. yuv_t *ver_buf_search; - - // This is a buffer for the deblocked bottom pixels of every LCU-row in the - // tile. They are packed such that each LCU-row index maps to the y-coordinate. + + // This is a buffer for the deblocked bottom pixels of every LCU in the + // tile. They are packed such that each LCU-row index maps to the + // y-coordinate. yuv_t *hor_buf_before_sao; - + + // This is a buffer for the deblocked right pixels of every LCU in the + // tile. They are packed such that each LCU-column index maps to the + // x-coordinate. + yuv_t *ver_buf_before_sao; + //Jobs for each individual LCU of a wavefront row. threadqueue_job_t **wf_jobs; - // Instance of encryption generator by tile - Crypto_Handle dbs_g; - uint32_t m_prev_pos; - } encoder_state_config_tile_t; typedef struct encoder_state_config_slice_t { @@ -243,6 +249,10 @@ bitstream_t stream; cabac_data_t cabac; + // Crypto stuff + crypto_handle_t *crypto_hdl; + uint32_t crypto_prev_pos; + uint32_t stats_bitstream_length; //Bitstream length written in bytes //! \brief Lambda for SSE @@ -263,6 +273,11 @@ */ int8_t ref_qp; + /** + * \brief Coeffs for the LCU. + */ + lcu_coeff_t *coeff; + //Jobs to wait for threadqueue_job_t * tqj_recon_done; //Reconstruction is done threadqueue_job_t * tqj_bitstream_written; //Bitstream is written @@ -277,9 +292,7 @@ coeff_scan_order_t kvz_get_scan_order(int8_t cu_type, int intra_mode, int depth); -void kvz_encoder_get_ref_lists(const encoder_state_t *const state, - int ref_list_len_out[2], - int ref_list_poc_out[2][16]); +void kvz_encoder_create_ref_lists(const encoder_state_t *const state); lcu_stats_t* kvz_get_lcu_stats(encoder_state_t *state, int lcu_x, int lcu_y);
View file
kvazaar-1.1.0.tar.gz/src/extras/crypto.cpp -> kvazaar-1.2.0.tar.gz/src/extras/crypto.cpp
Changed
@@ -1,132 +1,140 @@ #include <extras/crypto.h> #ifndef KVZ_SEL_ENCRYPTION -extern int kvz_make_vs_ignore_crypto_not_having_symbols; int kvz_make_vs_ignore_crypto_not_having_symbols = 0; #else + #include <cryptopp/aes.h> #include <cryptopp/modes.h> #include <cryptopp/osrng.h> -typedef struct AESDecoder { + #if AESEncryptionStreamMode - CryptoPP::CFB_Mode<CryptoPP::AES>::Encryption *CFBdec; + typedef CryptoPP::CFB_Mode<CryptoPP::AES>::Encryption cipher_t; #else - CryptoPP::CFB_Mode<CryptoPP::AES>::Decryption *CFBdec; + typedef CryptoPP::CFB_Mode<CryptoPP::AES>::Decryption cipher_t; #endif - byte key[CryptoPP::AES::DEFAULT_KEYLENGTH], iv[CryptoPP::AES::BLOCKSIZE], out_stream_counter[CryptoPP::AES::BLOCKSIZE], counter[CryptoPP::AES::BLOCKSIZE]; - int couter_avail, counter_index, counter_index_pos; -} AESDecoder; +struct crypto_handle_t { + cipher_t *cipher; + byte key[CryptoPP::AES::DEFAULT_KEYLENGTH]; + byte iv[CryptoPP::AES::BLOCKSIZE]; + byte out_stream_counter[CryptoPP::AES::BLOCKSIZE]; + byte counter[CryptoPP::AES::BLOCKSIZE]; + int couter_avail; + int counter_index; + int counter_index_pos; +}; -AESDecoder* Create() { - AESDecoder * AESdecoder = (AESDecoder *)malloc(sizeof(AESDecoder)); - return AESdecoder; -} -void Init(AESDecoder* AESdecoder) { - int init_val[32] = {201, 75, 219, 152, 6, 245, 237, 107, 179, 194, 81, 29, 66, 98, 198, 0, 16, 213, 27, 56, 255, 127, 242, 112, 97, 126, 197, 204, 25, 59, 38, 30}; - for(int i=0;i<16; i++) { - AESdecoder->iv [i] = init_val[i]; - AESdecoder->counter[i] = init_val[5+i]; - AESdecoder->key[i] = init_val[i+16]; - } -#if AESEncryptionStreamMode - AESdecoder->CFBdec = new CryptoPP::CFB_Mode<CryptoPP::AES >::Encryption(AESdecoder->key, CryptoPP::AES::DEFAULT_KEYLENGTH, AESdecoder->iv); -#else - AESdecoder->CFBdec = new CryptoPP::CFB_Mode<CryptoPP::AES >::Decryption(AESdecoder->key, CryptoPP::AES::DEFAULT_KEYLENGTH, AESdecoder->iv); -#endif - AESdecoder->couter_avail = 0; - AESdecoder->counter_index = 0; - AESdecoder->counter_index_pos = 0; -} +static uint8_t default_IV[16] = {201, 75, 219, 152, 6, 245, 237, 107, 179, 194, 81, 29, 66, 98, 198, 0}; +static uint8_t default_key[16] = {16, 213, 27, 56, 255, 127, 242, 112, 97, 126, 197, 204, 25, 59, 38, 30}; -void DeleteCrypto(AESDecoder * AESdecoder) { - if(AESdecoder) - free(AESdecoder); -} -void Decrypt(AESDecoder *AESdecoder, const unsigned char *in_stream, int size_bits, unsigned char *out_stream) { - int nb_bytes = ceil((double)size_bits/8); - AESdecoder->CFBdec->ProcessData(out_stream, in_stream, nb_bytes); - if(size_bits&7) - AESdecoder->CFBdec->SetKeyWithIV(AESdecoder->key, CryptoPP::AES::DEFAULT_KEYLENGTH, AESdecoder->iv); - -} -void Incr_counter (unsigned char *counter) { - counter[0]++; -} +crypto_handle_t* kvz_crypto_create(const kvz_config *cfg) +{ + crypto_handle_t* hdl = (crypto_handle_t*)calloc(1, sizeof(crypto_handle_t)); -#if AESEncryptionStreamMode -void Decrypt_counter(AESDecoder * AESdecoder) { - AESdecoder->CFBdec->ProcessData(AESdecoder->out_stream_counter, AESdecoder->counter, 16); - AESdecoder->couter_avail = 128; - AESdecoder->counter_index = 15; - AESdecoder->counter_index_pos = 8; - Incr_counter(AESdecoder->counter); -} -#endif + uint8_t *key; + if(cfg->optional_key!=NULL) + key = cfg->optional_key; + else + key = default_key; -#if AESEncryptionStreamMode -unsigned int get_key (AESDecoder * AESdecoder, int nb_bits) { - unsigned int key_ = 0; - if(nb_bits > 32) { - printf("The Generator can not generate more than 32 bit %d \n", nb_bits); - return 0; - } - if( !nb_bits ) - return 0; - if(!AESdecoder->couter_avail) - Decrypt_counter(AESdecoder); - - if(AESdecoder->couter_avail >= nb_bits) - AESdecoder->couter_avail -= nb_bits; - else - AESdecoder->couter_avail = 0; - int nb = 0; - while( nb_bits ) { - if( nb_bits >= AESdecoder->counter_index_pos ) - nb = AESdecoder->counter_index_pos; - else - nb = nb_bits; - key_ <<= nb; - key_ += (AESdecoder->out_stream_counter[AESdecoder->counter_index] & ((1<<nb)-1)); - AESdecoder->out_stream_counter[AESdecoder->counter_index] >>= nb; - nb_bits -= nb; - - if(AESdecoder->counter_index && nb == AESdecoder->counter_index_pos ) { - AESdecoder->counter_index--; - AESdecoder->counter_index_pos = 8; - } else { - AESdecoder->counter_index_pos -= nb; - if(nb_bits) { - Decrypt_counter(AESdecoder); - AESdecoder->couter_avail -= nb_bits; - } - } - } - return key_; -} -#endif -Crypto_Handle CreateC() { - AESDecoder* AESdecoder = Create(); - return AESdecoder; + for (int i = 0; i < 16; i++) { + hdl->iv [i] = default_IV[i]; + hdl->counter[i] = (i<11)? default_IV[5+i] : key[i-11]; + hdl->key[i] = key[i]; + } + + hdl->cipher = new cipher_t(hdl->key, CryptoPP::AES::DEFAULT_KEYLENGTH, hdl->iv); + + hdl->couter_avail = 0; + hdl->counter_index = 0; + hdl->counter_index_pos = 0; + + return hdl; } -void InitC(Crypto_Handle hdl) { - Init((AESDecoder*)hdl); +void kvz_crypto_delete(crypto_handle_t **hdl) +{ + if (*hdl) { + delete (*hdl)->cipher; + (*hdl)->cipher = NULL; + } + FREE_POINTER(*hdl); } +void kvz_crypto_decrypt(crypto_handle_t* hdl, + const uint8_t *in_stream, + int size_bits, + uint8_t *out_stream) +{ + int num_bytes = ceil((double)size_bits/8); + hdl->cipher->ProcessData(out_stream, in_stream, num_bytes); + if (size_bits & 7) { + hdl->cipher->SetKeyWithIV(hdl->key, CryptoPP::AES::DEFAULT_KEYLENGTH, hdl->iv); + } +} #if AESEncryptionStreamMode -unsigned int ff_get_key (Crypto_Handle *hdl, int nb_bits) { - return get_key ((AESDecoder*)*hdl, nb_bits); +static void increment_counter(unsigned char *counter) +{ + counter[0]++; } -#endif -void DecryptC(Crypto_Handle hdl, const unsigned char *in_stream, int size_bits, unsigned char *out_stream) { - Decrypt((AESDecoder*)hdl, in_stream, size_bits, out_stream); + +static void decrypt_counter(crypto_handle_t *hdl) +{ + hdl->cipher->ProcessData(hdl->out_stream_counter, hdl->counter, 16); + hdl->couter_avail = 128; + hdl->counter_index = 15; + hdl->counter_index_pos = 8; + increment_counter(hdl->counter); } -void DeleteCryptoC(Crypto_Handle hdl) { - DeleteCrypto((AESDecoder *)hdl); +unsigned kvz_crypto_get_key(crypto_handle_t *hdl, int nb_bits) +{ + unsigned key = 0; + if (nb_bits > 32) {
View file
kvazaar-1.1.0.tar.gz/src/extras/crypto.h -> kvazaar-1.2.0.tar.gz/src/extras/crypto.h
Changed
@@ -2,6 +2,10 @@ #define CRYPTO_H_ #include "global.h" +#include "../cfg.h" + +#include <stdio.h> +#include <math.h> #ifdef KVZ_SEL_ENCRYPTION #define STUBBED extern @@ -9,73 +13,60 @@ #define STUBBED static #endif -#include <stdio.h> -#include <math.h> -#define AESEncryptionStreamMode 1 +#define AESEncryptionStreamMode 1 + #ifdef __cplusplus extern "C" { #endif - typedef void* Crypto_Handle; - STUBBED Crypto_Handle CreateC(); - STUBBED void InitC(Crypto_Handle hdl); - STUBBED void DecryptC(Crypto_Handle hdl, const unsigned char *in_stream, int size_bits, unsigned char *out_stream); + +typedef struct crypto_handle_t crypto_handle_t; + +STUBBED crypto_handle_t* kvz_crypto_create(const kvz_config *cfg); +STUBBED void kvz_crypto_decrypt(crypto_handle_t* hdl, + const uint8_t *in_stream, + int size_bits, + uint8_t *out_stream); +STUBBED void kvz_crypto_delete(crypto_handle_t **hdl); + #if AESEncryptionStreamMode - STUBBED unsigned int ff_get_key(Crypto_Handle *hdl, int nb_bits); +STUBBED unsigned kvz_crypto_get_key(crypto_handle_t *hdl, int num_bits); #endif - STUBBED void DeleteCryptoC(Crypto_Handle hdl); #ifdef __cplusplus } #endif +#undef STUBBED + #ifndef KVZ_SEL_ENCRYPTION -// Provide static stubs to allow linking without libcryptopp and allows us to -// avoid sprinkling ifdefs everywhere and having a bunch of code that's not -// compiled during normal development. +// Provide static stubs to allow linking without libcryptopp and allows us +// to avoid sprinkling ifdefs everywhere and having a bunch of code that's +// not compiled during normal development. // Provide them in the header so we can avoid compiling the cpp file, which // means we don't need a C++ compiler when crypto is not enabled. -#include <stdio.h> -#include <stdint.h> -#include <inttypes.h> - -static uintptr_t handle_id = 1; - -static INLINE Crypto_Handle CreateC() { - printf("Crypto CreateC %" PRIuPTR "\n", handle_id); - return (void*)(handle_id++); -} -static INLINE void InitC(Crypto_Handle hdl) { - printf("Crypto InitC %" PRIuPTR "\n", (uintptr_t)hdl); -} - -static INLINE void DecryptC(Crypto_Handle hdl, const unsigned char *in_stream, - int size_bits, unsigned char *out_stream) +static INLINE crypto_handle_t* kvz_crypto_create(const kvz_config *cfg) { - // Stub. - printf("Crypto DecryptC %" PRIuPTR "\n", (uintptr_t)hdl); + return NULL; } +static INLINE void kvz_crypto_decrypt(crypto_handle_t* hdl, + const uint8_t *in_stream, + int size_bits, + uint8_t *out_stream) +{} + +static INLINE void kvz_crypto_delete(crypto_handle_t **hdl) +{} + #if AESEncryptionStreamMode -static INLINE unsigned int ff_get_key(Crypto_Handle *hdl, int nb_bits) +static INLINE unsigned kvz_crypto_get_key(crypto_handle_t *hdl, int num_bits) { - // Stub. - static Crypto_Handle ff_get_key_last_hdl = 0; - if (*hdl != ff_get_key_last_hdl) { - printf("Crypto ff_get_key %" PRIuPTR "\n", (uintptr_t)*hdl); - } - ff_get_key_last_hdl = *hdl; return 0; } #endif -static INLINE void DeleteCryptoC(Crypto_Handle hdl) -{ - // Stub. - printf("Crypto DeleteCryptoC %" PRIuPTR "\n", (uintptr_t)hdl); -} - #endif // KVZ_SEL_ENCRYPTION #endif // CRYPTO_H_
View file
kvazaar-1.1.0.tar.gz/src/filter.c -> kvazaar-1.2.0.tar.gz/src/filter.c
Changed
@@ -168,7 +168,7 @@ int16_t m4 = src[0]; int16_t m5 = src[offset]; - delta = CLIP(-tc,tc, (((m4 - m3) << 2) + m2 - m5 + 4 ) >> 3); + delta = CLIP(-tc,tc, (((m4 - m3) * 4) + m2 - m5 + 4 ) >> 3); if(!part_P_nofilter) { src[-offset] = CLIP(0, (1 << encoder->bitdepth) - 1, m3 + delta); } @@ -262,9 +262,7 @@ static int8_t get_qp_y_pred(const encoder_state_t* state, int x, int y, edge_dir dir) { - if (state->encoder_control->cfg.target_bitrate <= 0 - && state->encoder_control->cfg.roi.dqps == NULL) - { + if (!state->encoder_control->lcu_dqp_enabled) { return state->qp; } @@ -403,10 +401,13 @@ // Non-zero residual/coeffs and transform boundary // Neither CU is intra so tr_depth <= MAX_DEPTH. strength = 1; - } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && ((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= 4) || (abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= 4))) { + } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && + ((abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][0] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][0]) >= 4) || + (abs(cu_q->inter.mv[cu_q->inter.mv_dir - 1][1] - cu_p->inter.mv[cu_p->inter.mv_dir - 1][1]) >= 4))) { // Absolute motion vector diff between blocks >= 1 (Integer pixel) strength = 1; - } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) { + } else if (cu_p->inter.mv_dir != 3 && cu_q->inter.mv_dir != 3 && + cu_q->inter.mv_ref[cu_q->inter.mv_dir - 1] != cu_p->inter.mv_ref[cu_p->inter.mv_dir - 1]) { strength = 1; } @@ -431,10 +432,10 @@ cu_p->inter.mv[1][0] = 0; cu_p->inter.mv[1][1] = 0; } - const int refP0 = (cu_p->inter.mv_dir & 1) ? cu_p->inter.mv_ref[0] : -1; - const int refP1 = (cu_p->inter.mv_dir & 2) ? cu_p->inter.mv_ref[1] : -1; - const int refQ0 = (cu_q->inter.mv_dir & 1) ? cu_q->inter.mv_ref[0] : -1; - const int refQ1 = (cu_q->inter.mv_dir & 2) ? cu_q->inter.mv_ref[1] : -1; + const int refP0 = (cu_p->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_p->inter.mv_ref[0]] : -1; + const int refP1 = (cu_p->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_p->inter.mv_ref[1]] : -1; + const int refQ0 = (cu_q->inter.mv_dir & 1) ? state->frame->ref_LX[0][cu_q->inter.mv_ref[0]] : -1; + const int refQ1 = (cu_q->inter.mv_dir & 2) ? state->frame->ref_LX[1][cu_q->inter.mv_ref[1]] : -1; const int16_t* mvQ0 = cu_q->inter.mv[0]; const int16_t* mvQ1 = cu_q->inter.mv[1];
View file
kvazaar-1.1.0.tar.gz/src/global.h -> kvazaar-1.2.0.tar.gz/src/global.h
Changed
@@ -117,10 +117,6 @@ //! Search is started at depth 0 and goes in Z-order to MAX_PU_DEPTH, see search_cu() #define MAX_PU_DEPTH 4 -//! Minimum log2 transform sizes. -//! spec: max_transform_hierarchy_depth_inter -#define TR_DEPTH_INTER 2 - //! spec: pcm_enabled_flag, Setting to 1 will enable using PCM blocks (current intra-search does not consider PCM) #define ENABLE_PCM 0 @@ -150,6 +146,28 @@ #define LCU_LUMA_SIZE (LCU_WIDTH * LCU_WIDTH) #define LCU_CHROMA_SIZE (LCU_WIDTH * LCU_WIDTH >> 2) +/** + * \brief Number of pixels to delay deblocking. + * + * Number of pixels at the bottom and right side of the LCU that are not + * deblocked until when filtering the neighboring LCU. The last four chroma + * pixels of the horizontal edges within the LCU are deblocked with the LCU + * to the right. Therefore, DEBLOCK_DELAY_PX is set to 8 pixels. + */ +#define DEBLOCK_DELAY_PX 8 + +/** + * \brief Number of pixels to delay SAO in horizontal and vertical + * directions. + * + * Number of pixels at the bottom and right side of the LCU that are not + * filtered with SAO until when filtering the neighboring LCU. SAO + * reconstruction requires that a one pixels border has been deblocked for + * both luma and chroma. Therefore, SAO_DELAY_PX is set to + * DEBLOCK_DELAY_PX + 2. + */ +#define SAO_DELAY_PX (DEBLOCK_DELAY_PX + 2) + #define MAX_REF_PIC_COUNT 16 #define AMVP_MAX_NUM_CANDS 2 @@ -162,6 +180,7 @@ #define MIN(a,b) (((a)<(b))?(a):(b)) #define CLIP(low,high,value) MAX((low),MIN((high),(value))) #define CLIP_TO_PIXEL(value) CLIP(0, PIXEL_MAX, (value)) +#define CLIP_TO_QP(value) CLIP(0, 51, (value)) #define SWAP(a,b,swaptype) { swaptype tempval; tempval = a; a = b; b = tempval; } #define CU_WIDTH_FROM_DEPTH(depth) (LCU_WIDTH >> depth) #define WITHIN(val, min_val, max_val) ((min_val) <= (val) && (val) <= (max_val)) @@ -181,7 +200,7 @@ // NOTE: When making a release, check to see if incrementing libversion in // configure.ac is necessary. #ifndef KVZ_VERSION -#define KVZ_VERSION 1.1.0 +#define KVZ_VERSION 1.2.0 #endif #define VERSION_STRING QUOTE_EXPAND(KVZ_VERSION) @@ -206,6 +225,12 @@ #define SIMD_ALIGNMENT 32 #ifdef _MSC_VER + #define ALIGNED(alignment) __declspec(align(alignment)) +#else + #define ALIGNED(alignment) __attribute__((aligned (alignment))) +#endif + +#ifdef _MSC_VER // Buggy VS2010 throws intellisense warnings if void* is not casted. #define MALLOC(type, num) (type *)malloc(sizeof(type) * (num)) #else @@ -219,7 +244,11 @@ // Fill a structure or a static array with val bytes. #define FILL(var, val) memset(&(var), (val), sizeof(var)) // Fill a number of elements in an array with val bytes. -#define FILL_ARRAY(ar, val, size) memset((ar), (val), (size) * sizeof(*(ar))) +#define FILL_ARRAY(ar, val, size) \ +{\ + void *temp_ptr = (void*)(ar);\ + memset((temp_ptr), (val), (size) * sizeof(*(ar)));\ +} #define FREE_POINTER(pointer) { free((void*)pointer); pointer = NULL; } #define MOVE_POINTER(dst_pointer,src_pointer) { dst_pointer = src_pointer; src_pointer = NULL; }
View file
kvazaar-1.1.0.tar.gz/src/image.c -> kvazaar-1.2.0.tar.gz/src/image.c
Changed
@@ -23,6 +23,7 @@ #include <limits.h> #include <stdlib.h> +#include "strategies/strategies-ipol.h" #include "strategies/strategies-picture.h" #include "threads.h" @@ -191,12 +192,14 @@ return yuv; } -void kvz_yuv_t_free(yuv_t * yuv) +void kvz_yuv_t_free(yuv_t *yuv) { - free(yuv->y); - free(yuv->u); - free(yuv->v); - free(yuv); + if (yuv) { + FREE_POINTER(yuv->y); + FREE_POINTER(yuv->u); + FREE_POINTER(yuv->v); + } + FREE_POINTER(yuv); } hi_prec_buf_t * kvz_hi_prec_buf_t_alloc(int luma_size) @@ -447,21 +450,19 @@ * \param pic Image for the block we are trying to find. * \param ref Image where we are trying to find the block. * -* \returns +* \returns Sum of absolute differences */ -unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_x, int pic_y, int ref_x, int ref_y, - int block_width, int block_height, int max_px_below_lcu) { +unsigned kvz_image_calc_sad(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height) +{ assert(pic_x >= 0 && pic_x <= pic->width - block_width); assert(pic_y >= 0 && pic_y <= pic->height - block_height); - - // Check that we are not referencing pixels that are not final. - if (max_px_below_lcu >= 0) { - int next_lcu_row_px = ((pic_y >> LOG2_LCU_WIDTH) + 1) << LOG2_LCU_WIDTH; - int px_below_lcu = ref_y + block_height - next_lcu_row_px; - if (px_below_lcu > max_px_below_lcu) { - return INT_MAX; - } - } if (ref_x >= 0 && ref_x <= ref->width - block_width && ref_y >= 0 && ref_y <= ref->height - block_height) @@ -479,6 +480,74 @@ /** +* \brief Calculate interpolated SATD between two blocks. +* +* \param pic Image for the block we are trying to find. +* \param ref Image where we are trying to find the block. +*/ +unsigned kvz_image_calc_satd(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height) +{ + assert(pic_x >= 0 && pic_x <= pic->width - block_width); + assert(pic_y >= 0 && pic_y <= pic->height - block_height); + + if (ref_x >= 0 && ref_x <= ref->width - block_width && + ref_y >= 0 && ref_y <= ref->height - block_height) + { + // Reference block is completely inside the frame, so just calculate the + // SAD directly. This is the most common case, which is why it's first. + const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; + const kvz_pixel *ref_data = &ref->y[ref_y * ref->stride + ref_x]; + return kvz_satd_any_size(block_width, + block_height, + pic_data, + pic->stride, + ref_data, + ref->stride) >> (KVZ_BIT_DEPTH - 8); + } else { + // Extrapolate pixels from outside the frame. + kvz_extended_block block; + kvz_get_extended_block(pic_x, + pic_y, + ref_x - pic_x, + ref_y - pic_y, + 0, + 0, + ref->y, + ref->width, + ref->height, + 0, + block_width, + block_height, + &block); + + const kvz_pixel *pic_data = &pic->y[pic_y * pic->stride + pic_x]; + + unsigned satd = kvz_satd_any_size(block_width, + block_height, + pic_data, + pic->stride, + block.buffer, + block.stride) >> (KVZ_BIT_DEPTH - 8); + + if (block.malloc_used) { + FREE_POINTER(block.buffer); + } + + return satd; + } +} + + + + +/** * \brief BLock Image Transfer from one buffer to another. * * It's a stupidly simple loop that copies pixels.
View file
kvazaar-1.1.0.tar.gz/src/image.h -> kvazaar-1.2.0.tar.gz/src/image.h
Changed
@@ -74,8 +74,24 @@ //Algorithms -unsigned kvz_image_calc_sad(const kvz_picture *pic, const kvz_picture *ref, int pic_x, int pic_y, int ref_x, int ref_y, - int block_width, int block_height, int max_lcu_below); +unsigned kvz_image_calc_sad(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height); + + +unsigned kvz_image_calc_satd(const kvz_picture *pic, + const kvz_picture *ref, + int pic_x, + int pic_y, + int ref_x, + int ref_y, + int block_width, + int block_height); void kvz_pixels_blit(const kvz_pixel* orig, kvz_pixel *dst,
View file
kvazaar-1.1.0.tar.gz/src/imagelist.c -> kvazaar-1.2.0.tar.gz/src/imagelist.c
Changed
@@ -36,9 +36,10 @@ { image_list_t *list = (image_list_t *)malloc(sizeof(image_list_t)); list->size = size; - list->images = malloc(sizeof(kvz_picture*) * size); - list->cu_arrays = malloc(sizeof(cu_array_t*) * size); - list->pocs = malloc(sizeof(int32_t) * size); + list->images = malloc(sizeof(kvz_picture*) * size); + list->cu_arrays = malloc(sizeof(cu_array_t*) * size); + list->pocs = malloc(sizeof(int32_t) * size); + list->ref_LXs = malloc(sizeof(*list->ref_LXs) * size); list->used_size = 0; return list; @@ -55,6 +56,7 @@ list->images = (kvz_picture**)realloc(list->images, sizeof(kvz_picture*) * size); list->cu_arrays = (cu_array_t**)realloc(list->cu_arrays, sizeof(cu_array_t*) * size); list->pocs = realloc(list->pocs, sizeof(int32_t) * size); + list->ref_LXs = realloc(list->ref_LXs, sizeof(*list->ref_LXs) * size); list->size = size; return size == 0 || (list->images && list->cu_arrays && list->pocs); } @@ -71,9 +73,13 @@ for (i = 0; i < list->used_size; ++i) { kvz_image_free(list->images[i]); list->images[i] = NULL; - kvz_cu_array_free(list->cu_arrays[i]); + kvz_cu_array_free(&list->cu_arrays[i]); list->cu_arrays[i] = NULL; list->pocs[i] = 0; + for (int j = 0; j < 16; j++) { + list->ref_LXs[i][0][j] = 0; + list->ref_LXs[i][1][j] = 0; + } } } @@ -81,10 +87,12 @@ free(list->images); free(list->cu_arrays); free(list->pocs); + free(list->ref_LXs); } list->images = NULL; list->cu_arrays = NULL; list->pocs = NULL; + list->ref_LXs = NULL; free(list); return 1; } @@ -95,7 +103,7 @@ * \param picture_list list to use * \return 1 on success */ -int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t *cua, int32_t poc) +int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t *cua, int32_t poc, uint8_t ref_LX[2][16]) { int i = 0; if (KVZ_ATOMIC_INC(&(im->refcount)) == 1) { @@ -119,11 +127,19 @@ list->images[i] = list->images[i - 1]; list->cu_arrays[i] = list->cu_arrays[i - 1]; list->pocs[i] = list->pocs[i - 1]; + for (int j = 0; j < 16; j++) { + list->ref_LXs[i][0][j] = list->ref_LXs[i - 1][0][j]; + list->ref_LXs[i][1][j] = list->ref_LXs[i - 1][1][j]; + } } list->images[0] = im; list->cu_arrays[0] = cua; list->pocs[0] = poc; + for (int j = 0; j < 16; j++) { + list->ref_LXs[0][0][j] = ref_LX[0][j]; + list->ref_LXs[0][1][j] = ref_LX[1][j]; + } list->used_size++; return 1; @@ -145,17 +161,17 @@ kvz_image_free(list->images[n]); - if (!kvz_cu_array_free(list->cu_arrays[n])) { - fprintf(stderr, "Could not free cu_array!\n"); - assert(0); //Stop here - return 0; - } + kvz_cu_array_free(&list->cu_arrays[n]); // The last item is easy to remove if (n == list->used_size - 1) { list->images[n] = NULL; list->cu_arrays[n] = NULL; list->pocs[n] = 0; + for (int j = 0; j < 16; j++) { + list->ref_LXs[n][0][j] = 0; + list->ref_LXs[n][1][j] = 0; + } list->used_size--; } else { int i = n; @@ -164,10 +180,18 @@ list->images[i] = list->images[i + 1]; list->cu_arrays[i] = list->cu_arrays[i + 1]; list->pocs[i] = list->pocs[i + 1]; + for (int j = 0; j < 16; j++) { + list->ref_LXs[i][0][j] = list->ref_LXs[i + 1][0][j]; + list->ref_LXs[i][1][j] = list->ref_LXs[i + 1][1][j]; + } } list->images[list->used_size - 1] = NULL; list->cu_arrays[list->used_size - 1] = NULL; list->pocs[list->used_size - 1] = 0; + for (int j = 0; j < 16; j++) { + list->ref_LXs[list->used_size - 1][0][j] = 0; + list->ref_LXs[list->used_size - 1][1][j] = 0; + } list->used_size--; } @@ -181,7 +205,7 @@ } for (i = source->used_size - 1; i >= 0; --i) { - kvz_image_list_add(target, source->images[i], source->cu_arrays[i], source->pocs[i]); + kvz_image_list_add(target, source->images[i], source->cu_arrays[i], source->pocs[i], source->ref_LXs[i]); } return 1; }
View file
kvazaar-1.1.0.tar.gz/src/imagelist.h -> kvazaar-1.2.0.tar.gz/src/imagelist.h
Changed
@@ -39,14 +39,17 @@ struct kvz_picture* *images; //!< \brief Pointer to array of picture pointers. cu_array_t* *cu_arrays; int32_t *pocs; + uint8_t (*ref_LXs)[2][16]; //!< L0 and L1 reference index list for each image uint32_t size; //!< \brief Array size. uint32_t used_size; + + } image_list_t; image_list_t * kvz_image_list_alloc(int size); int kvz_image_list_resize(image_list_t *list, unsigned size); int kvz_image_list_destroy(image_list_t *list); -int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t* cua, int32_t poc); +int kvz_image_list_add(image_list_t *list, kvz_picture *im, cu_array_t* cua, int32_t poc, uint8_t ref_LX[2][16]); int kvz_image_list_rem(image_list_t *list, unsigned n); int kvz_image_list_copy_contents(image_list_t *target, image_list_t *source);
View file
kvazaar-1.1.0.tar.gz/src/input_frame_buffer.c -> kvazaar-1.2.0.tar.gz/src/input_frame_buffer.c
Changed
@@ -58,8 +58,6 @@ const int gop_buf_size = 3 * cfg->gop_len; - assert(state->frame->num >= 0); - if (cfg->gop_len == 0 || cfg->gop_lowdelay) { // No reordering of output pictures necessary. @@ -69,12 +67,14 @@ state->frame->gop_offset = 0; if (cfg->gop_len > 0) { // Using a low delay GOP structure. - state->frame->gop_offset = (state->frame->num - 1) % cfg->gop_len; - if (state->frame->gop_offset < 0) { - // Set gop_offset of IDR as the highest quality picture. - state->frame->gop_offset += cfg->gop_len; + uint64_t frame_num = buf->num_out; + if (cfg->intra_period) { + frame_num %= cfg->intra_period; } + state->frame->gop_offset = (frame_num + cfg->gop_len - 1) % cfg->gop_len; } + buf->num_in++; + buf->num_out++; return kvz_image_copy_ref(img_in); }
View file
kvazaar-1.1.0.tar.gz/src/inter.c -> kvazaar-1.2.0.tar.gz/src/inter.c
Changed
@@ -31,6 +31,14 @@ #include "videoframe.h" +typedef struct { + const cu_info_t *a[2]; + const cu_info_t *b[3]; + const cu_info_t *c3; + const cu_info_t *h; +} merge_candidates_t; + + static void inter_recon_frac_luma(const encoder_state_t * const state, const kvz_picture * const ref, int32_t xpos, @@ -53,8 +61,8 @@ ypos, mv_param[0] >> 2, mv_param[1] >> 2, - state->tile->lcu_offset_x * LCU_WIDTH, - state->tile->lcu_offset_y * LCU_WIDTH, + state->tile->offset_x, + state->tile->offset_y, ref->y, ref->width, ref->height, @@ -98,8 +106,8 @@ ypos, mv_param[0] >> 2, mv_param[1] >> 2, - state->tile->lcu_offset_x * LCU_WIDTH, - state->tile->lcu_offset_y * LCU_WIDTH, + state->tile->offset_x, + state->tile->offset_y, ref->y, ref->width, ref->height, @@ -146,14 +154,34 @@ kvz_extended_block src_v = { 0, 0, 0, 0 }; //Fractional chroma U - kvz_get_extended_block(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C, - ref->u, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_height, &src_u); + kvz_get_extended_block(xpos, ypos, + (mv_param[0] >> 2) >> 1, + (mv_param[1] >> 2) >> 1, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, + ref->u, + ref->width >> 1, + ref->height >> 1, + FILTER_SIZE_C, + block_width, + block_height, + &src_u); kvz_sample_octpel_chroma(state->encoder_control, src_u.orig_topleft, src_u.stride, block_width, block_height, lcu->rec.u + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param); //Fractional chroma V - kvz_get_extended_block(xpos, ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, state->tile->lcu_offset_x * LCU_WIDTH_C, state->tile->lcu_offset_y * LCU_WIDTH_C, - ref->v, ref->width >> 1, ref->height >> 1, FILTER_SIZE_C, block_width, block_height, &src_v); + kvz_get_extended_block(xpos, ypos, + (mv_param[0] >> 2) >> 1, + (mv_param[1] >> 2) >> 1, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, + ref->v, + ref->width >> 1, + ref->height >> 1, + FILTER_SIZE_C, + block_width, + block_height, + &src_v); kvz_sample_octpel_chroma(state->encoder_control, src_v.orig_topleft, src_v.stride, block_width, block_height, lcu->rec.v + (ypos % LCU_WIDTH_C)*LCU_WIDTH_C + (xpos % LCU_WIDTH_C), LCU_WIDTH_C, mv_frac_x, mv_frac_y, mv_param); @@ -190,8 +218,8 @@ ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, - state->tile->lcu_offset_x * LCU_WIDTH_C, - state->tile->lcu_offset_y * LCU_WIDTH_C, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, ref->u, ref->width >> 1, ref->height >> 1, @@ -215,8 +243,8 @@ ypos, (mv_param[0] >> 2) >> 1, (mv_param[1] >> 2) >> 1, - state->tile->lcu_offset_x * LCU_WIDTH_C, - state->tile->lcu_offset_y * LCU_WIDTH_C, + state->tile->offset_x >> 1, + state->tile->offset_y >> 1, ref->v, ref->width >> 1, ref->height >> 1, @@ -300,17 +328,13 @@ lcu_t *lcu, hi_prec_buf_t *hi_prec_out) { - const vector2d_t tile_in_frame = { - state->tile->lcu_offset_x * LCU_WIDTH, - state->tile->lcu_offset_y * LCU_WIDTH - }; const vector2d_t pu_in_tile = { xpos, ypos }; const vector2d_t pu_in_lcu = { xpos % LCU_WIDTH, ypos % LCU_WIDTH }; const vector2d_t mv_in_pu = { mv_param[0] >> 2, mv_param[1] >> 2 }; const vector2d_t mv_in_frame = { - mv_in_pu.x + pu_in_tile.x + tile_in_frame.x, - mv_in_pu.y + pu_in_tile.y + tile_in_frame.y + mv_in_pu.x + pu_in_tile.x + state->tile->offset_x, + mv_in_pu.y + pu_in_tile.y + state->tile->offset_y }; const bool mv_is_outside_frame = mv_in_frame.x < 0 || @@ -642,24 +666,26 @@ /** -* \brief Get merge candidates for current block -* \param encoder encoder control struct to use -* \param x block x position in SCU -* \param y block y position in SCU -* \param width current block width -* \param height current block height -* \param H candidate H -* \param C1 candidate C1 -*/ -static void kvz_inter_get_temporal_merge_candidates(const encoder_state_t * const state, - int32_t x, - int32_t y, - int32_t width, - int32_t height, - cu_info_t **C3, - cu_info_t **H, - uint8_t ref_list, - uint8_t ref_idx) { + * \brief Get merge candidates for current block + * + * \param state encoder control state to use + * \param x block x position in SCU + * \param y block y position in SCU + * \param width current block width + * \param height current block height + * \param ref_list which reference list, L0 is 1 and L1 is 2 + * \param ref_idx index in the reference list + * \param cand_out will be filled with C3 and H candidates + */ +static void get_temporal_merge_candidates(const encoder_state_t * const state, + int32_t x, + int32_t y, + int32_t width, + int32_t height, + uint8_t ref_list, + uint8_t ref_idx, + merge_candidates_t *cand_out) +{ /* Predictor block locations _________ @@ -670,22 +696,19 @@ |H| */ - *C3 = NULL; - *H = NULL; + cand_out->c3 = cand_out->h = NULL; // Find temporal reference if (state->frame->ref->used_size) { - uint32_t colocated_ref = UINT_MAX; + uint32_t colocated_ref; // Select L0/L1 ref_idx reference - for (int temporal_cand = 0; temporal_cand < state->frame->ref->used_size; temporal_cand++) { - if (state->frame->refmap[temporal_cand].list == ref_list && state->frame->refmap[temporal_cand].idx == ref_idx) { - colocated_ref = temporal_cand; - break; - } + if (state->frame->ref_LX_size[ref_list-1] > ref_idx) { + colocated_ref = state->frame->ref_LX[ref_list - 1][ref_idx]; + } else { + // not found + return; } - - if (colocated_ref == UINT_MAX) return; cu_array_t *ref_cu_array = state->frame->ref->cu_arrays[colocated_ref]; int cu_per_width = ref_cu_array->width / SCU_WIDTH; @@ -707,7 +730,7 @@ if (H_offset >= 0) { // Only use when it's inter block if (ref_cu_array->data[H_offset].type == CU_INTER) { - *H = &ref_cu_array->data[H_offset]; + cand_out->h = &ref_cu_array->data[H_offset]; }
View file
kvazaar-1.1.0.tar.gz/src/inter.h -> kvazaar-1.2.0.tar.gz/src/inter.h
Changed
@@ -35,7 +35,7 @@ typedef struct { uint8_t dir; - uint8_t ref[2]; + uint8_t ref[2]; // index to L0/L1 int16_t mv[2][2]; } inter_merge_cand_t; @@ -85,6 +85,5 @@ int32_t width, int32_t height, bool use_a1, bool use_b1, inter_merge_cand_t mv_cand[MRG_MAX_NUM_CANDS], - lcu_t *lcu, - uint8_t ref_idx); + lcu_t *lcu); #endif
View file
kvazaar-1.1.0.tar.gz/src/intra.c -> kvazaar-1.2.0.tar.gz/src/intra.c
Changed
@@ -114,6 +114,52 @@ return 1; } +#if KVZ_SEL_ENCRYPTION +int8_t kvz_intra_get_dir_luma_predictor_encry( + const uint32_t x, + const uint32_t y, + int8_t *preds, + const cu_info_t *const cur_pu, + const cu_info_t *const left_pu, + const cu_info_t *const above_pu) +{ + // The default mode if block is not coded yet is INTRA_DC. + int8_t left_intra_dir = 1; + if (left_pu && left_pu->type == CU_INTRA) { + left_intra_dir = left_pu->intra.mode_encry ; + } + + int8_t above_intra_dir = 1; + if (above_pu && above_pu->type == CU_INTRA && y % LCU_WIDTH != 0) { + above_intra_dir = above_pu->intra.mode_encry; + } + + // If the predictions are the same, add new predictions + if (left_intra_dir == above_intra_dir) { + if (left_intra_dir > 1) { // angular modes + preds[0] = left_intra_dir; + preds[1] = ((left_intra_dir + 29) % 32) + 2; + preds[2] = ((left_intra_dir - 1 ) % 32) + 2; + } else { //non-angular + preds[0] = 0;//PLANAR_IDX; + preds[1] = 1;//DC_IDX; + preds[2] = 26;//VER_IDX; + } + } else { // If we have two distinct predictions + preds[0] = left_intra_dir; + preds[1] = above_intra_dir; + + // add planar mode if it's not yet present + if (left_intra_dir && above_intra_dir ) { + preds[2] = 0; // PLANAR_IDX; + } else { // Add DC mode if it's not present, otherwise 26. + preds[2] = (left_intra_dir+above_intra_dir)<2? 26 : 1; + } + } + + return 1; +} +#endif static void intra_filter_reference( int_fast8_t log2_width, @@ -541,126 +587,120 @@ } } -void kvz_intra_recon_lcu_luma( +static void intra_recon_tb_leaf( encoder_state_t *const state, int x, int y, int depth, int8_t intra_mode, - cu_info_t *cur_cu, - lcu_t *lcu) + lcu_t *lcu, + color_t color) { - const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; - if (cur_cu == NULL) { - cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); - } - const int8_t width = LCU_WIDTH >> depth; - - if (depth == 0 || cur_cu->tr_depth > depth) { - int offset = width / 2; - - kvz_intra_recon_lcu_luma(state, x, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_luma(state, x + offset, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_luma(state, x, y + offset, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_luma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu); - - if (depth < MAX_DEPTH) { - uint16_t child_cbfs[3] = { - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, - }; - cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_Y); - } + const kvz_config *cfg = &state->encoder_control->cfg; + const int shift = color == COLOR_Y ? 0 : 1; - return; + int log2width = LOG2_LCU_WIDTH - depth; + if (color != COLOR_Y && depth < MAX_PU_DEPTH) { + // Chroma width is half of luma width, when not at maximum depth. + log2width -= 1; } + const int width = 1 << log2width; + const int lcu_width = LCU_WIDTH >> shift; + + const vector2d_t luma_px = { x, y }; + const vector2d_t pic_px = { + state->tile->frame->width, + state->tile->frame->height, + }; + const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift}; - // Perform intra prediction and put the result in correct place lcu. - vector2d_t pic_px = { state->tile->frame->width, state->tile->frame->height }; - vector2d_t luma_px = { x, y }; kvz_intra_references refs; - const int_fast8_t log2_width = kvz_g_convert_to_bit[width] + 2; - kvz_intra_build_reference(log2_width, COLOR_Y, &luma_px, &pic_px, lcu, &refs); + kvz_intra_build_reference(log2width, color, &luma_px, &pic_px, lcu, &refs); kvz_pixel pred[32 * 32]; - const kvz_config *cfg = &state->encoder_control->cfg; - bool filter_boundary = !(cfg->lossless && cfg->implicit_rdpcm); - kvz_intra_predict(&refs, log2_width, intra_mode, COLOR_Y, pred, filter_boundary); - - kvz_pixel *block_in_lcu = &lcu->rec.y[lcu_px.x + lcu_px.y * LCU_WIDTH]; - kvz_pixels_blit(pred, block_in_lcu, width, width, width, LCU_WIDTH); - - kvz_quantize_lcu_luma_residual(state, x, y, depth, cur_cu, lcu); + const bool filter_boundary = color == COLOR_Y && !(cfg->lossless && cfg->implicit_rdpcm); + kvz_intra_predict(&refs, log2width, intra_mode, color, pred, filter_boundary); + + const int index = lcu_px.x + lcu_px.y * lcu_width; + kvz_pixel *block = NULL; + switch (color) { + case COLOR_Y: + block = &lcu->rec.y[index]; + break; + case COLOR_U: + block = &lcu->rec.u[index]; + break; + case COLOR_V: + block = &lcu->rec.v[index]; + break; + } + kvz_pixels_blit(pred, block , width, width, width, lcu_width); } - -void kvz_intra_recon_lcu_chroma( +/** + * \brief Reconstruct an intra CU + * + * \param state encoder state + * \param x x-coordinate of the CU in luma pixels + * \param y y-coordinate of the CU in luma pixels + * \param depth depth in the CU tree + * \param mode_luma intra mode for luma, or -1 to skip luma recon + * \param mode_chroma intra mode for chroma, or -1 to skip chroma recon + * \param cur_cu pointer to the CU, or NULL to fetch CU from LCU + * \param lcu containing LCU + */ +void kvz_intra_recon_cu( encoder_state_t *const state, int x, int y, int depth, - int8_t intra_mode, + int8_t mode_luma, + int8_t mode_chroma, cu_info_t *cur_cu, lcu_t *lcu) { const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int8_t width = LCU_WIDTH >> depth; - const int8_t width_c = (depth == MAX_PU_DEPTH ? width : width / 2); - if (cur_cu == NULL) { cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } if (depth == 0 || cur_cu->tr_depth > depth) { - int offset = width / 2; - - kvz_intra_recon_lcu_chroma(state, x, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_chroma(state, x + offset, y, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_chroma(state, x, y + offset, depth+1, intra_mode, NULL, lcu); - kvz_intra_recon_lcu_chroma(state, x + offset, y + offset, depth+1, intra_mode, NULL, lcu); - - if (depth <= MAX_DEPTH) { - uint16_t child_cbfs[3] = { - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, - LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, - }; + const int offset = width / 2; + const int32_t x2 = x + offset; + const int32_t y2 = y + offset; + + kvz_intra_recon_cu(state, x, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); + kvz_intra_recon_cu(state, x2, y, depth + 1, mode_luma, mode_chroma, NULL, lcu); + kvz_intra_recon_cu(state, x, y2, depth + 1, mode_luma, mode_chroma, NULL, lcu);
View file
kvazaar-1.1.0.tar.gz/src/intra.h -> kvazaar-1.2.0.tar.gz/src/intra.h
Changed
@@ -62,6 +62,26 @@ const cu_info_t *const left_pu, const cu_info_t *const above_pu); +#if KVZ_SEL_ENCRYPTION +/** +* \brief Function for deriving intra luma predictions with encryption +* \param x x-coordinate of the PU in pixels +* \param y y-coordinate of the PU in pixels +* \param preds output buffer for 3 predictions +* \param cur_pu PU to check +* \param left_pu PU to the left of cur_pu +* \param above_pu PU above cur_pu +* \returns 1 if predictions are found, otherwise 0 +*/ +int8_t kvz_intra_get_dir_luma_predictor_encry( +const uint32_t x, +const uint32_t y, +int8_t *preds, +const cu_info_t *const cur_pu, +const cu_info_t *const left_pu, +const cu_info_t *const above_pu); +#endif + /** * \brief Generage angular predictions. * \param width Width in pixels, range 4..32. @@ -97,27 +117,13 @@ kvz_pixel *dst, bool filter_boundary); -/** - * \brief Do a full intra prediction cycle on a CU in lcu for luma. - */ -void kvz_intra_recon_lcu_luma( - encoder_state_t *const state, - int x, - int y, - int depth, - int8_t intra_mode, - cu_info_t *cur_cu, - lcu_t *lcu); - -/** -* \brief Do a full intra prediction cycle on a CU in lcu for chroma. -*/ -void kvz_intra_recon_lcu_chroma( +void kvz_intra_recon_cu( encoder_state_t *const state, int x, int y, int depth, - int8_t intra_mode, + int8_t mode_luma, + int8_t mode_chroma, cu_info_t *cur_cu, lcu_t *lcu);
View file
kvazaar-1.1.0.tar.gz/src/kvazaar.c -> kvazaar-1.2.0.tar.gz/src/kvazaar.c
Changed
@@ -43,7 +43,21 @@ static void kvazaar_close(kvz_encoder *encoder) { if (encoder) { + // The threadqueue must be stopped before freeing states. + if (encoder->control) { + kvz_threadqueue_stop(encoder->control->threadqueue); + } + if (encoder->states) { + // Flush input frame buffer. + kvz_picture *pic = NULL; + while ((pic = kvz_encoder_feed_frame(&encoder->input_buffer, + &encoder->states[0], + NULL)) != NULL) { + kvz_image_free(pic); + pic = NULL; + } + for (unsigned i = 0; i < encoder->num_encoder_states; ++i) { kvz_encoder_state_finalize(&encoder->states[i]); } @@ -127,7 +141,20 @@ info->qp = state->frame->QP; info->nal_unit_type = state->frame->pictype; info->slice_type = state->frame->slicetype; - kvz_encoder_get_ref_lists(state, info->ref_list_len, info->ref_list); + + memset(info->ref_list[0], 0, 16); + memset(info->ref_list[1], 0, 16); + + for (size_t i = 0; i < state->frame->ref_LX_size[0]; i++) { + info->ref_list[0][i] = state->frame->ref->pocs[state->frame->ref_LX[0][i]]; + } + + for (size_t i = 0; i < state->frame->ref_LX_size[1]; i++) { + info->ref_list[1][i] = state->frame->ref->pocs[state->frame->ref_LX[1][i]]; + } + + info->ref_list_len[0] = state->frame->ref_LX_size[0]; + info->ref_list_len[1] = state->frame->ref_LX_size[1]; } @@ -244,7 +271,7 @@ kvz_threadqueue_waitfor(enc->control->threadqueue, output_state->tqj_bitstream_written); // The job pointer must be set to NULL here since it won't be usable after // the next frame is done. - output_state->tqj_bitstream_written = NULL; + kvz_threadqueue_free_job(&output_state->tqj_bitstream_written); // Get stream length before taking chunks since that clears the stream. if (len_out) *len_out = kvz_bitstream_tell(&output_state->stream) / 8;
View file
kvazaar-1.1.0.tar.gz/src/kvazaar.h -> kvazaar-1.2.0.tar.gz/src/kvazaar.h
Changed
@@ -149,7 +149,8 @@ KVZ_CRYPTO_MV_SIGNS = (1 << 1), KVZ_CRYPTO_TRANSF_COEFFS = (1 << 2), KVZ_CRYPTO_TRANSF_COEFF_SIGNS = (1 << 3), - KVZ_CRYPTO_ON = (1 << 4) - 1, + KVZ_CRYPTO_INTRA_MODE = (1 << 4), + KVZ_CRYPTO_ON = (1 << 5) - 1, }; /** @@ -198,6 +199,13 @@ KVZ_SLICES_WPP = (1 << 1), /*!< \brief Put each row in a slice. */ }; +enum kvz_sao { + KVZ_SAO_OFF = 0, + KVZ_SAO_EDGE = 1, + KVZ_SAO_BAND = 2, + KVZ_SAO_FULL = 3 +}; + // Map from input format to chroma format. #define KVZ_FORMAT2CSP(format) ((enum kvz_chroma_format)"\0\1\2\3"[format]) @@ -245,7 +253,7 @@ int32_t framerate_num; /*!< \brief Framerate numerator */ int32_t framerate_denom; /*!< \brief Framerate denominator */ int32_t deblock_enable; /*!< \brief Flag to enable deblocking filter */ - int32_t sao_enable; /*!< \brief Flag to enable sample adaptive offset filter */ + enum kvz_sao sao_type; /*!< \brief Flag to enable sample adaptive offset filter */ int32_t rdoq_enable; /*!< \brief Flag to enable RD optimized quantization. */ int32_t signhide_enable; /*!< \brief Flag to enable sign hiding. */ int32_t smp_enable; /*!< \brief Flag to enable SMP blocks. */ @@ -311,6 +319,7 @@ enum kvz_cu_split_termination cu_split_termination; /*!< \since 3.8.0 \brief Mode of cu split termination. */ enum kvz_crypto_features crypto_features; /*!< \since 3.7.0 */ + uint8_t *optional_key; enum kvz_me_early_termination me_early_termination; /*!< \since 3.8.0 \brief Mode of me early termination. */ @@ -333,10 +342,15 @@ struct { int32_t width; int32_t height; - uint8_t *dqps; + int8_t *dqps; } roi; /*!< \since 3.14.0 \brief Map of delta QPs for region of interest coding. */ unsigned slices; /*!< \since 3.15.0 \brief How to map slices to frame. */ + + /** + * \brief Use adaptive QP for 360 video with equirectangular projection. + */ + int32_t erp_aqp; } kvz_config; /**
View file
kvazaar-1.1.0.tar.gz/src/rate_control.c -> kvazaar-1.2.0.tar.gz/src/rate_control.c
Changed
@@ -170,7 +170,7 @@ static int8_t lambda_to_qp(const double lambda) { const int8_t qp = 4.2005 * log(lambda) + 13.7223 + 0.5; - return CLIP(0, 51, qp); + return CLIP_TO_QP(qp); } static double qp_to_lamba(encoder_state_t * const state, int qp) @@ -240,10 +240,10 @@ kvz_gop_config const * const gop = &ctrl->cfg.gop[state->frame->gop_offset]; const int gop_len = ctrl->cfg.gop_len; - state->frame->QP = ctrl->cfg.qp; - if (gop_len > 0 && state->frame->slicetype != KVZ_SLICE_I) { - state->frame->QP += gop->qp_offset; + state->frame->QP = CLIP_TO_QP(ctrl->cfg.qp + gop->qp_offset); + } else { + state->frame->QP = ctrl->cfg.qp; } state->frame->lambda = qp_to_lamba(state, state->frame->QP); @@ -291,7 +291,7 @@ }; int roi_index = roi.x + roi.y * ctrl->cfg.roi.width; int dqp = ctrl->cfg.roi.dqps[roi_index]; - state->qp = state->frame->QP + dqp; + state->qp = CLIP_TO_QP(state->frame->QP + dqp); state->lambda = qp_to_lamba(state, state->qp); state->lambda_sqrt = sqrt(state->frame->lambda);
View file
kvazaar-1.1.0.tar.gz/src/rdo.c -> kvazaar-1.2.0.tar.gz/src/rdo.c
Changed
@@ -33,12 +33,16 @@ #include "tables.h" #include "transform.h" +#include "strategies/strategies-quant.h" + #define QUANT_SHIFT 14 #define SCAN_SET_SIZE 16 #define LOG2_SCAN_SET_SIZE 4 #define SBH_THRESHOLD 4 +static const double COEFF_SUM_MULTIPLIER = 1.9; + const uint32_t kvz_g_go_rice_range[5] = { 7, 14, 26, 46, 78 }; const uint32_t kvz_g_go_rice_prefix_len[5] = { 8, 7, 6, 5, 4 }; @@ -140,48 +144,82 @@ }; -/** Calculate actual (or really close to actual) bitcost for coding coefficients +/** + * \brief Calculate actual (or really close to actual) bitcost for coding + * coefficients. + * * \param coeff coefficient array * \param width coeff block width * \param type data type (0 == luma) + * * \returns bits needed to code input coefficients */ -int32_t kvz_get_coeff_cost(const encoder_state_t * const state, coeff_t *coeff, int32_t width, int32_t type, int8_t scan_mode) +static INLINE uint32_t get_coeff_cabac_cost( + const encoder_state_t * const state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode) { - int32_t cost = 0; - int i; - int found = 0; - encoder_state_t state_copy; - // Make sure there are coeffs present - for(i = 0; i < width*width; i++) { + bool found = false; + for (int i = 0; i < width*width; i++) { if (coeff[i] != 0) { found = 1; break; } } + if (!found) return 0; - if(!found) return 0; - - // Store cabac state and contexts - memcpy(&state_copy,state,sizeof(encoder_state_t)); + // Take a copy of the CABAC so that we don't overwrite the contexts when + // counting the bits. + cabac_data_t cabac_copy; + memcpy(&cabac_copy, &state->cabac, sizeof(cabac_copy)); // Clear bytes and bits and set mode to "count" - state_copy.cabac.only_count = 1; - state_copy.cabac.num_buffered_bytes = 0; - state_copy.cabac.bits_left = 23; + cabac_copy.only_count = 1; + cabac_copy.num_buffered_bytes = 0; + cabac_copy.bits_left = 23; + + // Execute the coding function. + // It is safe to drop the const modifier since state won't be modified + // when cabac.only_count is set. + kvz_encode_coeff_nxn((encoder_state_t*) state, + &cabac_copy, + coeff, + width, + type, + scan_mode, + 0); + + return (23 - cabac_copy.bits_left) + (cabac_copy.num_buffered_bytes << 3); +} - // Execute the coding function - kvz_encode_coeff_nxn(&state_copy, coeff, width, type, scan_mode, 0); - // Store bitcost before restoring cabac - cost = (23-state_copy.cabac.bits_left) + (state_copy.cabac.num_buffered_bytes << 3); +/** + * \brief Estimate bitcost for coding coefficients. + * + * \param coeff coefficient array + * \param width coeff block width + * \param type data type (0 == luma) + * + * \returns number of bits needed to code coefficients + */ +uint32_t kvz_get_coeff_cost(const encoder_state_t * const state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode) +{ + if (state->encoder_control->cfg.rdo > 0) { + return get_coeff_cabac_cost(state, coeff, width, type, scan_mode); - return cost; + } else { + return COEFF_SUM_MULTIPLIER * kvz_coeff_abs_sum(coeff, width * width) + 0.5; + } } - #define COEF_REMAIN_BIN_REDUCTION 3 /** Calculates the cost for specific absolute transform level * \param abs_level scaled quantized level @@ -191,7 +229,7 @@ * \returns cost of given absolute transform level * From HM 12.0 */ -int32_t kvz_get_ic_rate(encoder_state_t * const state, +INLINE int32_t kvz_get_ic_rate(encoder_state_t * const state, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs, @@ -211,14 +249,14 @@ int32_t length; if (symbol < (COEF_REMAIN_BIN_REDUCTION << abs_go_rice)) { length = symbol>>abs_go_rice; - rate += (length+1+abs_go_rice) << CTX_FRAC_BITS; + rate += (length+1+abs_go_rice) * (1 << CTX_FRAC_BITS); } else { length = abs_go_rice; symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << abs_go_rice); while (symbol >= (1<<length)) { symbol -= (1<<(length++)); } - rate += (COEF_REMAIN_BIN_REDUCTION+length+1-abs_go_rice+length) << CTX_FRAC_BITS; + rate += (COEF_REMAIN_BIN_REDUCTION+length+1-abs_go_rice+length) * (1 << CTX_FRAC_BITS); } if (c1_idx < C1FLAG_NUMBER) { rate += CTX_ENTROPY_BITS(&base_one_ctx[ctx_num_one],1); @@ -255,7 +293,7 @@ * This method calculates the best quantized transform level for a given scan position. * From HM 12.0 */ -uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost, double *coded_cost0, double *coded_cost_sig, +INLINE uint32_t kvz_get_coded_level ( encoder_state_t * const state, double *coded_cost, double *coded_cost0, double *coded_cost_sig, int32_t level_double, uint32_t max_abs_level, uint16_t ctx_num_sig, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, @@ -283,7 +321,7 @@ min_abs_level = ( max_abs_level > 1 ? max_abs_level - 1 : 1 ); for (abs_level = max_abs_level; abs_level >= min_abs_level ; abs_level-- ) { - double err = (double)(level_double - ( abs_level << q_bits ) ); + double err = (double)(level_double - ( abs_level * (1 << q_bits) ) ); double cur_cost = err * err * temp + state->lambda * kvz_get_ic_rate( state, abs_level, ctx_num_one, ctx_num_abs, abs_go_rice, c1_idx, c2_idx, type); @@ -450,8 +488,8 @@ dec_bits -= 4 * CTX_FRAC_ONE_BIT; } - inc_bits = -quant_cost_in_bits + (inc_bits << PRECISION_INC); - dec_bits = quant_cost_in_bits + (dec_bits << PRECISION_INC); + inc_bits = -quant_cost_in_bits + inc_bits * (1 << PRECISION_INC); + dec_bits = quant_cost_in_bits + dec_bits * (1 << PRECISION_INC); if (inc_bits < dec_bits) { current.change = 1; @@ -472,7 +510,7 @@ // Add sign bit, other bits and sig_coeff goes to one. int bits = CTX_FRAC_ONE_BIT + sh_rates->inc[current.pos] + sh_rates->sig_coeff_inc[current.pos]; - current.cost = -llabs(quant_cost_in_bits) + (bits << PRECISION_INC); + current.cost = -llabs(quant_cost_in_bits) + bits * (1 << PRECISION_INC); current.change = 1; if (coeff_scan < first_nz_scan) { @@ -558,10 +596,10 @@ // Explicitly tell the only possible numbers of elements to be zeroed. // Hope the compiler is able to utilize this information. switch (cg_num) { - case 1: memset(sig_coeffgroup_flag, 0, 1 * sizeof(sig_coeffgroup_flag[0])); break; - case 4: memset(sig_coeffgroup_flag, 0, 4 * sizeof(sig_coeffgroup_flag[0])); break; - case 16: memset(sig_coeffgroup_flag, 0, 16 * sizeof(sig_coeffgroup_flag[0])); break; - case 64: memset(sig_coeffgroup_flag, 0, 64 * sizeof(sig_coeffgroup_flag[0])); break; + case 1: FILL_ARRAY(sig_coeffgroup_flag, 0, 1); break; + case 4: FILL_ARRAY(sig_coeffgroup_flag, 0, 4); break; + case 16: FILL_ARRAY(sig_coeffgroup_flag, 0, 16); break; + case 64: FILL_ARRAY(sig_coeffgroup_flag, 0, 64); break; default: assert(0 && "There should be 1, 4, 16 or 64 coefficient groups"); }
View file
kvazaar-1.1.0.tar.gz/src/rdo.h -> kvazaar-1.2.0.tar.gz/src/rdo.h
Changed
@@ -39,7 +39,11 @@ void kvz_rdoq(encoder_state_t *state, coeff_t *coef, coeff_t *dest_coeff, int32_t width, int32_t height, int8_t type, int8_t scan_mode, int8_t block_type, int8_t tr_depth); -int32_t kvz_get_coeff_cost(const encoder_state_t *state, coeff_t *coeff, int32_t width, int32_t type, int8_t scan_mode); +uint32_t kvz_get_coeff_cost(const encoder_state_t *state, + const coeff_t *coeff, + int32_t width, + int32_t type, + int8_t scan_mode); int32_t kvz_get_ic_rate(encoder_state_t *state, uint32_t abs_level, uint16_t ctx_num_one, uint16_t ctx_num_abs, uint16_t abs_go_rice, uint32_t c1_idx, uint32_t c2_idx, int8_t type); @@ -52,7 +56,9 @@ kvz_mvd_cost_func kvz_calc_mvd_cost_cabac; -uint32_t kvz_get_mvd_coding_cost_cabac(encoder_state_t * const state, vector2d_t *mvd, const cabac_data_t* cabac); +uint32_t kvz_get_mvd_coding_cost_cabac(const encoder_state_t *state, + vector2d_t *mvd, + const cabac_data_t* cabac); // Number of fixed point fractional bits used in the fractional bit table. #define CTX_FRAC_BITS 15
View file
kvazaar-1.1.0.tar.gz/src/sao.c -> kvazaar-1.2.0.tar.gz/src/sao.c
Changed
@@ -262,182 +262,81 @@ /** - * \brief Calculate dimensions of the buffer used by sao reconstruction. - - * \param pic Picture. - * \param sao Sao parameters. - * \param rec Top-left corner of the LCU + * \brief Reconstruct SAO. + * + * \param encoder encoder state + * \param buffer Buffer containing the deblocked input pixels. The + * area to filter starts at index 0. + * \param stride stride of buffer + * \param frame_x x-coordinate of the top-left corner in pixels + * \param frame_y y-coordinate of the top-left corner in pixels + * \param width width of the area to filter + * \param height height of the area to filter + * \param sao SAO information + * \param color color plane index */ -static void sao_calc_band_block_dims(const videoframe_t *frame, color_t color_i, - vector2d_t *rec, vector2d_t *block) +void kvz_sao_reconstruct(const encoder_state_t *state, + const kvz_pixel *buffer, + int stride, + int frame_x, + int frame_y, + int width, + int height, + const sao_info_t *sao, + color_t color) { - const int is_chroma = (color_i != COLOR_Y ? 1 : 0); - int width = frame->width >> is_chroma; - int height = frame->height >> is_chroma; - int block_width = LCU_WIDTH >> is_chroma; - + const encoder_control_t *const ctrl = state->encoder_control; + videoframe_t *const frame = state->tile->frame; + const int shift = color == COLOR_Y ? 0 : 1; - // Handle right and bottom, taking care of non-LCU sized CUs. - if (rec->y + block_width >= height) { - if (rec->y + block_width >= height) { - block->y = height - rec->y; - } - } - if (rec->x + block_width >= width) { - if (rec->x + block_width > width) { - block->x = width - rec->x; - } - } + const int frame_width = frame->width >> shift; + const int frame_height = frame->height >> shift; + const int frame_stride = frame->rec->stride >> shift; + kvz_pixel *output = &frame->rec->data[color][frame_x + frame_y * frame_stride]; - rec->x = 0; rec->y = 0; -} + if (sao->type == SAO_TYPE_EDGE) { + const vector2d_t *offset = g_sao_edge_offsets[sao->eo_class]; -/** - * \brief Calculate dimensions of the buffer used by sao reconstruction. - * - * This function calculates 4 vectors that can be used to make the temporary - * buffers required by sao_reconstruct_color. - * - * Vector block is the area affected by sao. Vectors tr and br are top-left - * margin and bottom-right margin, which contain pixels that are not modified - * by the reconstruction of this LCU but are needed by the reconstruction. - * Vector rec is the offset from the CU to the required pixel area. - * - * The margins are always either 0 or 1, depending on the direction of the - * edge offset class. - * - * This also takes into account borders of the picture and non-LCU sized - * CU's at the bottom and right of the picture. - * - * \ CU + rec - * +------+ - * |\ tl | - * | +--+ | - * | |\ block - * | | \| | - * | +--+ | - * | \ br - * +------+ - * - * \param pic Picture. - * \param sao Sao parameters. - * \param rec Top-left corner of the LCU, modified to be top-left corner of - */ -static void sao_calc_edge_block_dims(const videoframe_t * const frame, color_t color_i, - const sao_info_t *sao, vector2d_t *rec, - vector2d_t *tl, vector2d_t *br, - vector2d_t *block) -{ - vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; - vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; - const int is_chroma = (color_i != COLOR_Y ? 1 : 0); - int width = frame->width >> is_chroma; - int height = frame->height >> is_chroma; - int block_width = LCU_WIDTH >> is_chroma; - - // Handle top and left. - if (rec->y == 0) { - tl->y = 0; - if (a_ofs.y == -1 || b_ofs.y == -1) { - block->y -= 1; - tl->y += 1; + if (frame_x + width + offset[0].x > frame_width || + frame_x + width + offset[1].x > frame_width) + { + // Nothing to do for the rightmost column. + width -= 1; } - } - if (rec->x == 0) { - tl->x = 0; - if (a_ofs.x == -1 || b_ofs.x == -1) { - block->x -= 1; - tl->x += 1; + if (frame_x + offset[0].x < 0 || frame_x + offset[1].x < 0) { + // Nothing to do for the leftmost column. + buffer += 1; + output += 1; + width -= 1; } - } - - // Handle right and bottom, taking care of non-LCU sized CUs. - if (rec->y + block_width >= height) { - br->y = 0; - block->y -= block_width + rec->y - height; - if (a_ofs.y == 1 || b_ofs.y == 1) { - block->y -= 1; - br->y += 1; + if (frame_y + height + offset[0].y > frame_height || + frame_y + height + offset[1].y > frame_height) + { + // Nothing to do for the bottommost row. + height -= 1; } - } - if (rec->x + block_width >= width) { - br->x = 0; - block->x -= block_width + rec->x - width; - if (a_ofs.x == 1 || b_ofs.x == 1) { - block->x -= 1; - br->x += 1; + if (frame_y + offset[0].y < 0 || frame_y + offset[1].y < 0) { + // Nothing to do for the topmost row. + buffer += stride; + output += frame_stride; + height -= 1; } } - rec->y = (rec->y == 0 ? 0 : -1); - rec->x = (rec->x == 0 ? 0 : -1); -} - -void kvz_sao_reconstruct(const encoder_control_t * const encoder, videoframe_t * frame, const kvz_pixel *old_rec, - unsigned x_ctb, unsigned y_ctb, - const sao_info_t *sao, color_t color_i) -{ - const int is_chroma = (color_i != COLOR_Y ? 1 : 0); - const int pic_stride = frame->width >> is_chroma; - const int lcu_stride = LCU_WIDTH >> is_chroma; - const int buf_stride = lcu_stride + 2; - - kvz_pixel *recdata = frame->rec->data[color_i]; - kvz_pixel buf_rec[(LCU_WIDTH + 2) * (LCU_WIDTH + 2)]; - kvz_pixel new_rec[LCU_WIDTH * LCU_WIDTH]; - // Calling CU_TO_PIXEL with depth 1 is the same as using block size of 32. - kvz_pixel *lcu_rec = &recdata[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, frame->rec->stride>>is_chroma)]; - const kvz_pixel *old_lcu_rec = &old_rec[CU_TO_PIXEL(x_ctb, y_ctb, is_chroma, pic_stride)]; - - vector2d_t ofs; - vector2d_t tl = { 1, 1 }; - vector2d_t br = { 1, 1 }; - vector2d_t block; - - if (sao->type == SAO_TYPE_NONE) { - return; - } - - ofs.x = x_ctb * lcu_stride; - ofs.y = y_ctb * lcu_stride; - block.x = lcu_stride; - block.y = lcu_stride; - if (sao->type == SAO_TYPE_BAND) { - tl.x = 0; tl.y = 0; - br.x = 0; br.y = 0; - sao_calc_band_block_dims(frame, color_i, &ofs, &block); - } - else {
View file
kvazaar-1.1.0.tar.gz/src/sao.h -> kvazaar-1.2.0.tar.gz/src/sao.h
Changed
@@ -72,10 +72,16 @@ (sao).offsets[0], (sao).offsets[1], (sao).offsets[2], (sao).offsets[3], (sao).offsets[4]) -void kvz_sao_reconstruct(const encoder_control_t * encoder, videoframe_t *frame, const kvz_pixel *old_rec, - unsigned x_ctb, unsigned y_ctb, - const sao_info_t *sao, color_t color_i); -void kvz_sao_reconstruct_frame(encoder_state_t *state); +void kvz_sao_reconstruct(const encoder_state_t *state, + const kvz_pixel *buffer, + int stride, + int frame_x, + int frame_y, + int width, + int height, + const sao_info_t *sao, + color_t color); + void kvz_sao_search_lcu(const encoder_state_t* const state, int lcu_x, int lcu_y); void kvz_calc_sao_offset_array(const encoder_control_t * const encoder, const sao_info_t *sao, int *offset, color_t color_i);
View file
kvazaar-1.1.0.tar.gz/src/search.c -> kvazaar-1.2.0.tar.gz/src/search.c
Changed
@@ -36,6 +36,7 @@ #include "transform.h" #include "videoframe.h" #include "strategies/strategies-picture.h" +#include "strategies/strategies-quant.h" #define IN_FRAME(x, y, width, height, block_width, block_height) \ @@ -43,11 +44,8 @@ && (x) + (block_width) <= (width) \ && (y) + (block_height) <= (height)) -// Cost treshold for doing intra search in inter frames with --rd=0. -#ifndef INTRA_TRESHOLD -# define INTRA_TRESHOLD 20 -#endif - +// Cost threshold for doing intra search in inter frames with --rd=0. +static const int INTRA_THRESHOLD = 8; // Modify weight of luma SSD. #ifndef LUMA_MULT @@ -58,216 +56,133 @@ # define CHROMA_MULT 1.5 #endif - -/** - * Copy all non-reference CU data from depth+1 to depth. - */ -static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) +static INLINE void copy_cu_info(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) { - assert(depth >= 0 && depth < MAX_PU_DEPTH); - - // Copy non-reference CUs. - { - const int x_orig = SUB_SCU(x_px); - const int y_orig = SUB_SCU(y_px); - const int width_cu = LCU_WIDTH >> depth; - for (int y = y_orig; y < y_orig + width_cu; y += SCU_WIDTH) { - for (int x = x_orig; x < x_orig + width_cu; x += SCU_WIDTH) { - const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x, y); - cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y); - memcpy(to_cu, from_cu, sizeof(*to_cu)); - } - } - } - - // Copy reconstructed pixels. - { - const int x = SUB_SCU(x_px); - const int y = SUB_SCU(y_px); - const int width_px = LCU_WIDTH >> depth; - const int luma_index = x + y * LCU_WIDTH; - const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2); - - const lcu_yuv_t *from = &work_tree[depth + 1].rec; - lcu_yuv_t *to = &work_tree[depth].rec; - - const lcu_coeff_t *from_coeff = &work_tree[depth + 1].coeff; - lcu_coeff_t *to_coeff = &work_tree[depth].coeff; - - kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index], - width_px, width_px, LCU_WIDTH, LCU_WIDTH); - if (from->chroma_format != KVZ_CSP_400) { - kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - } - - // Copy coefficients up. They do not have to be copied down because they - // are not used for the search. - kvz_coefficients_blit(&from_coeff->y[luma_index], &to_coeff->y[luma_index], - width_px, width_px, LCU_WIDTH, LCU_WIDTH); - if (from->chroma_format != KVZ_CSP_400) { - kvz_coefficients_blit(&from_coeff->u[chroma_index], &to_coeff->u[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - kvz_coefficients_blit(&from_coeff->v[chroma_index], &to_coeff->v[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); + for (int y = y_local; y < y_local + width; y += SCU_WIDTH) { + for (int x = x_local; x < x_local + width; x += SCU_WIDTH) { + *LCU_GET_CU_AT_PX(to, x, y) = *LCU_GET_CU_AT_PX(from, x, y); } } } - -/** - * Copy all non-reference CU data from depth to depth+1..MAX_PU_DEPTH. - */ -static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) +static INLINE void copy_cu_pixels(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) { - assert(depth >= 0 && depth < MAX_PU_DEPTH); - - // TODO: clean up to remove the copy pasta - const int width_px = LCU_WIDTH >> depth; + const int luma_index = x_local + y_local * LCU_WIDTH; + const int chroma_index = (x_local / 2) + (y_local / 2) * (LCU_WIDTH / 2); - int d; - - for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { - const int x_orig = SUB_SCU(x_px); - const int y_orig = SUB_SCU(y_px); - - for (int y = y_orig; y < y_orig + width_px; y += SCU_WIDTH) { - for (int x = x_orig; x < x_orig + width_px; x += SCU_WIDTH) { - const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y); - cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[d], x, y); - memcpy(to_cu, from_cu, sizeof(*to_cu)); - } - } + kvz_pixels_blit(&from->rec.y[luma_index], &to->rec.y[luma_index], + width, width, LCU_WIDTH, LCU_WIDTH); + if (from->rec.chroma_format != KVZ_CSP_400) { + kvz_pixels_blit(&from->rec.u[chroma_index], &to->rec.u[chroma_index], + width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); + kvz_pixels_blit(&from->rec.v[chroma_index], &to->rec.v[chroma_index], + width / 2, width / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); } +} - // Copy reconstructed pixels. - for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { - const int x = SUB_SCU(x_px); - const int y = SUB_SCU(y_px); - - const int luma_index = x + y * LCU_WIDTH; - const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2); - - lcu_yuv_t *from = &work_tree[depth].rec; - lcu_yuv_t *to = &work_tree[d].rec; +static INLINE void copy_cu_coeffs(int x_local, int y_local, int width, lcu_t *from, lcu_t *to) +{ + const int luma_z = xy_to_zorder(LCU_WIDTH, x_local, y_local); + copy_coeffs(&from->coeff.y[luma_z], &to->coeff.y[luma_z], width); - kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index], - width_px, width_px, LCU_WIDTH, LCU_WIDTH); - if (from->chroma_format != KVZ_CSP_400) { - kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index], - width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); - } + if (from->rec.chroma_format != KVZ_CSP_400) { + const int chroma_z = xy_to_zorder(LCU_WIDTH_C, x_local >> 1, y_local >> 1); + copy_coeffs(&from->coeff.u[chroma_z], &to->coeff.u[chroma_z], width >> 1); + copy_coeffs(&from->coeff.v[chroma_z], &to->coeff.v[chroma_z], width >> 1); } } - -void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth) +/** + * Copy all non-reference CU data from next level to current level. + */ +static void work_tree_copy_up(int x_local, int y_local, int depth, lcu_t *work_tree) { const int width = LCU_WIDTH >> depth; - const vector2d_t lcu_cu = { SUB_SCU(x_px), SUB_SCU(y_px) }; - - // Depth 4 doesn't go inside the loop. Set the top-left CU. - LCU_GET_CU_AT_PX(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth; - - for (unsigned y = 0; y < width; y += SCU_WIDTH) { - for (unsigned x = 0; x < width; x += SCU_WIDTH) { - cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, lcu_cu.x + x, lcu_cu.y + y); - cu->tr_depth = tr_depth; - } - } + copy_cu_info (x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); + copy_cu_pixels(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); + copy_cu_coeffs(x_local, y_local, width, &work_tree[depth + 1], &work_tree[depth]); } -static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pred_mode, int chroma_mode, int part_mode) +/** + * Copy all non-reference CU data from current level to all lower levels. + */ +static void work_tree_copy_down(int x_local, int y_local, int depth, lcu_t *work_tree) { const int width = LCU_WIDTH >> depth; - const int x_cu = SUB_SCU(x_px); - const int y_cu = SUB_SCU(y_px); - - if (part_mode == SIZE_NxN) { - assert(depth == MAX_DEPTH + 1); - assert(width == SCU_WIDTH); + for (int i = depth + 1; i <= MAX_PU_DEPTH; i++) { + copy_cu_info (x_local, y_local, width, &work_tree[depth], &work_tree[i]); + copy_cu_pixels(x_local, y_local, width, &work_tree[depth], &work_tree[i]); } +} - if (depth > MAX_DEPTH) {
View file
kvazaar-1.1.0.tar.gz/src/search_inter.c -> kvazaar-1.2.0.tar.gz/src/search_inter.c
Changed
@@ -35,68 +35,199 @@ #include "videoframe.h" +typedef struct { + encoder_state_t *state; + + /** + * \brief Current frame + */ + const kvz_picture *pic; + /** + * \brief Reference frame + */ + const kvz_picture *ref; + + /** + * \brief Index of the reference frame + */ + int32_t ref_idx; + + /** + * \brief Top-left corner of the PU + */ + const vector2d_t origin; + int32_t width; + int32_t height; + + int16_t mv_cand[2][2]; + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS]; + int32_t num_merge_cand; + + kvz_mvd_cost_func *mvd_cost_func; + + /** + * \brief Best motion vector among the ones tested so far + */ + vector2d_t best_mv; + /** + * \brief Cost of best_mv + */ + uint32_t best_cost; + /** + * \brief Bit cost of best_mv + */ + uint32_t best_bitcost; +} inter_search_info_t; + + /** * \return True if referred block is within current tile. */ -static INLINE bool fracmv_within_tile(const encoder_state_t *state, const vector2d_t* orig, int x, int y, int width, int height, int wpp_limit) +static INLINE bool fracmv_within_tile(const inter_search_info_t *info, int x, int y) { - if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_NONE) { - return (wpp_limit == -1 || y + (height << 2) <= (wpp_limit << 2)); - }; + const encoder_control_t *ctrl = info->state->encoder_control; + + const bool is_frac_luma = x % 4 != 0 || y % 4 != 0; + const bool is_frac_chroma = x % 8 != 0 || y % 8 != 0; + + if (ctrl->cfg.owf && ctrl->cfg.wpp) { + // Check that the block does not reference pixels that are not final. + + // Margin as luma pixels. + int margin = 0; + if (is_frac_luma) { + // Fractional motion estimation needs up to 4 pixels outside the + // block. + margin = 4; + } else if (is_frac_chroma) { + // Odd chroma interpolation needs up to 2 luma pixels outside the + // block. + margin = 2; + } + + if (ctrl->cfg.sao_type) { + // Make sure we don't refer to pixels for which SAO reconstruction + // has not been done. + margin += SAO_DELAY_PX; + } else if (ctrl->cfg.deblock_enable) { + // Make sure we don't refer to pixels that have not been deblocked. + margin += DEBLOCK_DELAY_PX; + } + + // Coordinates of the top-left corner of the containing LCU. + const vector2d_t orig_lcu = { + .x = info->origin.x / LCU_WIDTH, + .y = info->origin.y / LCU_WIDTH, + }; + // Difference between the coordinates of the LCU containing the + // bottom-left corner of the referenced block and the LCU containing + // this block. + const vector2d_t mv_lcu = { + ((info->origin.x + info->width + margin) * 4 + x) / (LCU_WIDTH << 2) - orig_lcu.x, + ((info->origin.y + info->height + margin) * 4 + y) / (LCU_WIDTH << 2) - orig_lcu.y, + }; + if (mv_lcu.y > ctrl->max_inter_ref_lcu.down) { + return false; + } + + if (mv_lcu.x + mv_lcu.y > + ctrl->max_inter_ref_lcu.down + ctrl->max_inter_ref_lcu.right) + { + return false; + } + } + + if (ctrl->cfg.mv_constraint == KVZ_MV_CONSTRAIN_NONE) { + return true; + } + + // Margin as luma quater pixels. int margin = 0; - if (state->encoder_control->cfg.mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) { - // Enforce a distance of 8 from any tile boundary. - margin = 4 * 4; + if (ctrl->cfg.mv_constraint == KVZ_MV_CONSTRAIN_FRAME_AND_TILE_MARGIN) { + if (is_frac_luma) { + margin = 4 << 2; + } else if (is_frac_chroma) { + margin = 2 << 2; + } } // TODO implement KVZ_MV_CONSTRAIN_FRAM and KVZ_MV_CONSTRAIN_TILE. - const vector2d_t abs_mv = { (orig->x << 2) + x, (orig->y << 2) + y }; + const vector2d_t abs_mv = { + info->origin.x * 4 + x, + info->origin.y * 4 + y, + }; - // Check that both margin and wpp_limit constraints are satisfied. - if (abs_mv.x >= margin && abs_mv.x + (width << 2) <= (state->tile->frame->width << 2) - margin && - abs_mv.y >= margin && abs_mv.y + (height << 2) <= (state->tile->frame->height << 2) - margin && - (wpp_limit == -1 || y + (height << 2) <= (wpp_limit << 2))) - { - return true; - } else { - return false; - } + // Check that both margin constraints are satisfied. + const int from_right = + (info->state->tile->frame->width << 2) - (abs_mv.x + (info->width << 2)); + const int from_bottom = + (info->state->tile->frame->height << 2) - (abs_mv.y + (info->height << 2)); + + return abs_mv.x >= margin && + abs_mv.y >= margin && + from_right >= margin && + from_bottom >= margin; } -static INLINE int get_wpp_limit(const encoder_state_t *state, const vector2d_t* orig) +/** + * \return True if referred block is within current tile. + */ +static INLINE bool intmv_within_tile(const inter_search_info_t *info, int x, int y) { - const encoder_control_t *ctrl = state->encoder_control; - if (ctrl->cfg.owf && ctrl->cfg.wpp) { - // Limit motion vectors to the LCU-row below this row. - // To avoid fractional pixel interpolation depending on things outside - // this range, add a margin of 4 pixels. - // - fme needs 4 pixels - // - odd chroma interpolation needs 4 pixels - int wpp_limit = 2 * LCU_WIDTH - 4 - orig->y % LCU_WIDTH; - if (ctrl->cfg.deblock_enable && !ctrl->cfg.sao_enable) { - // As a special case, when deblocking is enabled but SAO is not, we have - // to avoid the possibility of interpolation filters reaching the - // non-deblocked pixels. The deblocking for the horizontal edge on the - // LCU boundary can reach 4 pixels. If SAO is enabled, this WPP-row - // depends on the SAO job, which depends on the deblocking having - // already been done. - wpp_limit -= 4; - } - return wpp_limit; - } else { - return -1; - } + return fracmv_within_tile(info, x * 4, y * 4); } /** - * \return True if referred block is within current tile. + * \brief Calculate cost for an integer motion vector. + * + * Updates info->best_mv, info->best_cost and info->best_bitcost to the new + * motion vector if it yields a lower cost than the current one. + * + * If the motion vector violates the MV constraints for tiles or WPP, the + * cost is not set. + * + * \return true if info->best_mv was changed, false otherwise */
View file
kvazaar-1.1.0.tar.gz/src/search_inter.h -> kvazaar-1.2.0.tar.gz/src/search_inter.h
Changed
@@ -50,14 +50,14 @@ HPEL_POS_DIA = 2 }; -typedef int kvz_mvd_cost_func(encoder_state_t * const state, - int x, int y, - int mv_shift, - int16_t mv_cand[2][2], - inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], - int16_t num_cand, - int32_t ref_idx, - uint32_t *bitcost); +typedef uint32_t kvz_mvd_cost_func(const encoder_state_t *state, + int x, int y, + int mv_shift, + int16_t mv_cand[2][2], + inter_merge_cand_t merge_cand[MRG_MAX_NUM_CANDS], + int16_t num_cand, + int32_t ref_idx, + uint32_t *bitcost); void kvz_search_cu_inter(encoder_state_t * const state, int x, int y, int depth, @@ -73,4 +73,10 @@ double *inter_cost, uint32_t *inter_bitcost); + +unsigned kvz_inter_satd_cost(const encoder_state_t* state, + const lcu_t *lcu, + int x, + int y); + #endif // SEARCH_INTER_H_
View file
kvazaar-1.1.0.tar.gz/src/search_intra.c -> kvazaar-1.2.0.tar.gz/src/search_intra.c
Changed
@@ -220,15 +220,20 @@ nosplit_cost = 0.0; cbf_clear(&pred_cu->cbf, depth, COLOR_Y); - - kvz_intra_recon_lcu_luma(state, x_px, y_px, depth, intra_mode, pred_cu, lcu); - nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); - if (reconstruct_chroma) { cbf_clear(&pred_cu->cbf, depth, COLOR_U); cbf_clear(&pred_cu->cbf, depth, COLOR_V); + } - kvz_intra_recon_lcu_chroma(state, x_px, y_px, depth, intra_mode, pred_cu, lcu); + const int8_t chroma_mode = reconstruct_chroma ? intra_mode : -1; + kvz_intra_recon_cu(state, + x_px, y_px, + depth, + intra_mode, chroma_mode, + pred_cu, lcu); + + nosplit_cost += kvz_cu_rd_cost_luma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); + if (reconstruct_chroma) { nosplit_cost += kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, pred_cu, lcu); } @@ -697,7 +702,11 @@ for (int8_t chroma_mode_i = 0; chroma_mode_i < num_modes; ++chroma_mode_i) { chroma.mode = modes[chroma_mode_i]; - kvz_intra_recon_lcu_chroma(state, x_px, y_px, depth, chroma.mode, NULL, lcu); + kvz_intra_recon_cu(state, + x_px, y_px, + depth, + -1, chroma.mode, // skip luma + NULL, lcu); chroma.cost = kvz_cu_rd_cost_chroma(state, lcu_px.x, lcu_px.y, depth, tr_cu, lcu); double mode_bits = kvz_chroma_mode_bits(state, chroma.mode, intra_mode); @@ -836,7 +845,7 @@ // Set transform depth to current depth, meaning no transform splits. kvz_lcu_set_trdepth(lcu, x_px, y_px, depth, depth); - + double best_rough_cost = costs[select_best_mode_index(modes, costs, number_of_modes)]; // Refine results with slower search or get some results if rough search was skipped. const int32_t rdo_level = state->encoder_control->cfg.rdo; if (rdo_level >= 2 || skip_rough_search) { @@ -844,7 +853,7 @@ if (rdo_level == 3) { number_of_modes_to_search = 35; } else if (rdo_level == 2) { - number_of_modes_to_search = (cu_width <= 8) ? 8 : 3; + number_of_modes_to_search = (cu_width == 4) ? 3 : 2; } else { // Check only the predicted modes. number_of_modes_to_search = 0; @@ -863,5 +872,5 @@ uint8_t best_mode_i = select_best_mode_index(modes, costs, number_of_modes); *mode_out = modes[best_mode_i]; - *cost_out = costs[best_mode_i]; + *cost_out = skip_rough_search ? costs[best_mode_i]:best_rough_cost; }
View file
kvazaar-1.1.0.tar.gz/src/strategies/avx2/ipol-avx2.c -> kvazaar-1.2.0.tar.gz/src/strategies/avx2/ipol-avx2.c
Changed
@@ -1384,7 +1384,9 @@ int sample_out_of_bounds = out_of_bounds_y || out_of_bounds_x; if (sample_out_of_bounds){ - out->buffer = MALLOC(kvz_pixel, (width + filter_size) * (height + filter_size)); + // Alloc 5 pixels more than we actually use because AVX2 filter + // functions read up to 5 pixels past the last pixel. + out->buffer = MALLOC(kvz_pixel, (width + filter_size) * (height + filter_size) + 5); if (!out->buffer){ fprintf(stderr, "Memory allocation failed!\n"); assert(0);
View file
kvazaar-1.1.0.tar.gz/src/strategies/avx2/quant-avx2.c -> kvazaar-1.2.0.tar.gz/src/strategies/avx2/quant-avx2.c
Changed
@@ -343,7 +343,7 @@ * \param color Color. * \param scan_order Coefficient scan order. * \param use_trskip Whether transform skip is used. -* \param stride Stride for ref_in, pred_in rec_out and coeff_out. +* \param stride Stride for ref_in, pred_in and rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Reconstructed pixels. @@ -360,7 +360,6 @@ { // Temporary arrays to pass data to and from kvz_quant and transform functions. int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; - coeff_t quant_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; int has_coeffs = 0; @@ -379,35 +378,32 @@ kvz_transform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535)); } - // Quantize coeffs. (coeff -> quant_coeff) + // Quantize coeffs. (coeff -> coeff_out) if (state->encoder_control->cfg.rdoq_enable && (width > 4 || !state->encoder_control->cfg.rdoq_skip)) { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0); - kvz_rdoq(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_rdoq(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type, tr_depth); } else { - kvz_quant(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_quant(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type); } // Check if there are any non-zero coefficients. for (int i = 0; i < width * width; i += 8) { - __m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(quant_coeff[i])); + __m128i v_quant_coeff = _mm_loadu_si128((__m128i*)&(coeff_out[i])); has_coeffs = !_mm_testz_si128(_mm_set1_epi8(0xFF), v_quant_coeff); if(has_coeffs) break; } - // Copy coefficients to coeff_out. - kvz_coefficients_blit(quant_coeff, coeff_out, width, width, width, out_stride); - // Do the inverse quantization and transformation and the reconstruction to // rec_out. if (has_coeffs) { - // Get quantized residual. (quant_coeff -> coeff -> residual) - kvz_dequant(state, quant_coeff, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); + // Get quantized residual. (coeff_out -> coeff -> residual) + kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); if (use_trskip) { kvz_itransformskip(state->encoder_control, residual, coeff, width); } @@ -506,8 +502,29 @@ } } -#endif //COMPILE_INTEL_AVX2 && defined X86_64 +static uint32_t coeff_abs_sum_avx2(const coeff_t *coeffs, const size_t length) +{ + assert(length % 8 == 0); + + __m256i total = _mm256_abs_epi32(_mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*) coeffs))); + + for (int i = 8; i < length; i += 8) { + __m256i temp = _mm256_abs_epi32(_mm256_cvtepi16_epi32(_mm_loadu_si128((__m128i*) &coeffs[i]))); + total = _mm256_add_epi32(total, temp); + } + __m128i result128 = _mm_add_epi32( + _mm256_castsi256_si128(total), + _mm256_extractf128_si256(total, 1) + ); + + uint32_t parts[4]; + _mm_storeu_si128((__m128i*) parts, result128); + + return parts[0] + parts[1] + parts[2] + parts[3]; +} + +#endif //COMPILE_INTEL_AVX2 && defined X86_64 int kvz_strategy_register_quant_avx2(void* opaque, uint8_t bitdepth) { @@ -519,6 +536,7 @@ success &= kvz_strategyselector_register(opaque, "quantize_residual", "avx2", 40, &kvz_quantize_residual_avx2); success &= kvz_strategyselector_register(opaque, "dequant", "avx2", 40, &kvz_dequant_avx2); } + success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "avx2", 0, &coeff_abs_sum_avx2); #endif //COMPILE_INTEL_AVX2 && defined X86_64 return success;
View file
kvazaar-1.1.0.tar.gz/src/strategies/avx2/sao-avx2.c -> kvazaar-1.2.0.tar.gz/src/strategies/avx2/sao-avx2.c
Changed
@@ -36,18 +36,13 @@ // is difficult to understand. -static INLINE __m256i load_6_offsets(const int* offsets){ - - return _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*) offsets)), _mm_loadl_epi64((__m128i*)&(offsets[4])), 1); -} - -static INLINE __m128i load_6_pixels(const kvz_pixel* data){ - +static INLINE __m128i load_6_pixels(const kvz_pixel* data) +{ return _mm_insert_epi16(_mm_cvtsi32_si128(*(int32_t*)&(data[0])), *(int16_t*)&(data[4]), 2); } -static INLINE __m256i load_5_offsets(const int* offsets){ - +static INLINE __m256i load_5_offsets(const int* offsets) +{ return _mm256_inserti128_si256(_mm256_castsi128_si256(_mm_loadu_si128((__m128i*) offsets)), _mm_insert_epi32(_mm_setzero_si128(), offsets[4], 0), 1); } @@ -73,9 +68,12 @@ } -int kvz_sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES]) +static int sao_edge_ddistortion_avx2(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int eo_class, + int offsets[NUM_SAO_EDGE_CATEGORIES]) { int y, x; int sum = 0; @@ -96,7 +94,7 @@ __m256i v_cat = _mm256_cvtepu8_epi32(sao_calc_eo_cat_avx2(&v_a, &v_b, &v_c)); - __m256i v_offset = _mm256_loadu_si256((__m256i*) offsets); + __m256i v_offset = load_5_offsets(offsets); v_offset = _mm256_permutevar8x32_epi32(v_offset, v_cat); __m256i v_diff = _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i*)&(orig_data[y * block_width + x]))); @@ -117,7 +115,7 @@ __m256i v_cat = _mm256_cvtepu8_epi32(sao_calc_eo_cat_avx2(&v_a, &v_b, &v_c)); - __m256i v_offset = load_6_offsets(offsets); + __m256i v_offset = load_5_offsets(offsets); v_offset = _mm256_permutevar8x32_epi32(v_offset, v_cat); const kvz_pixel* orig_ptr = &(orig_data[y * block_width + x]); @@ -139,7 +137,12 @@ } -static INLINE void accum_count_eo_cat_avx2(__m256i* __restrict v_diff_accum, __m256i* __restrict v_count, __m256i* __restrict v_cat, __m256i* __restrict v_diff, int eo_cat){ +static INLINE void accum_count_eo_cat_avx2(__m256i* __restrict v_diff_accum, + __m256i* __restrict v_count, + __m256i* __restrict v_cat, + __m256i* __restrict v_diff, + int eo_cat) +{ __m256i v_mask = _mm256_cmpeq_epi32(*v_cat, _mm256_set1_epi32(eo_cat)); *v_diff_accum = _mm256_add_epi32(*v_diff_accum, _mm256_and_si256(*v_diff, v_mask)); *v_count = _mm256_sub_epi32(*v_count, v_mask); @@ -151,9 +154,12 @@ accum_count_eo_cat_avx2(&(v_diff_accum[ EO_CAT ]), &(v_count[ EO_CAT ]), &V_CAT , &v_diff, EO_CAT); -void kvz_calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int eo_class, int block_width, int block_height, - int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) +static void calc_sao_edge_dir_avx2(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int eo_class, + int block_width, + int block_height, + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) { int y, x; vector2d_t a_ofs = g_sao_edge_offsets[eo_class][0]; @@ -240,30 +246,29 @@ } -void kvz_sao_reconstruct_color_avx2(const encoder_control_t * const encoder, - const kvz_pixel *rec_data, kvz_pixel *new_rec_data, - const sao_info_t *sao, - int stride, int new_stride, - int block_width, int block_height, - color_t color_i) +static void sao_reconstruct_color_avx2(const encoder_control_t * const encoder, + const kvz_pixel *rec_data, kvz_pixel *new_rec_data, + const sao_info_t *sao, + int stride, int new_stride, + int block_width, int block_height, + color_t color_i) { - int y, x; // Arrays orig_data and rec_data are quarter size for chroma. int offset_v = color_i == COLOR_V ? 5 : 0; - if(sao->type == SAO_TYPE_BAND) { - int offsets[1<<KVZ_BIT_DEPTH]; + if (sao->type == SAO_TYPE_BAND) { + int offsets[1 << KVZ_BIT_DEPTH]; kvz_calc_sao_offset_array(encoder, sao, offsets, color_i); - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; ++x) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; ++x) { new_rec_data[y * new_stride + x] = offsets[rec_data[y * stride + x]]; } } } else { // Don't sample the edge pixels because this function doesn't have access to // their neighbours. - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; x+=8) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; x+=8) { vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; const kvz_pixel *c_data = &rec_data[y * stride + x]; @@ -299,9 +304,13 @@ } -int kvz_sao_band_ddistortion_avx2(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int band_pos, int sao_bands[4]) +static int sao_band_ddistortion_avx2(const encoder_state_t * const state, + const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int band_pos, + int sao_bands[4]) { int y, x; int shift = state->encoder_control->bitdepth-5; @@ -348,10 +357,10 @@ bool success = true; #if COMPILE_INTEL_AVX2 if (bitdepth == 8) { - success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "avx2", 40, &kvz_sao_edge_ddistortion_avx2); - success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "avx2", 40, &kvz_calc_sao_edge_dir_avx2); - success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "avx2", 40, &kvz_sao_reconstruct_color_avx2); - success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "avx2", 40, &kvz_sao_band_ddistortion_avx2); + success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "avx2", 40, &sao_edge_ddistortion_avx2); + success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "avx2", 40, &calc_sao_edge_dir_avx2); + success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "avx2", 40, &sao_reconstruct_color_avx2); + success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "avx2", 40, &sao_band_ddistortion_avx2); } #endif //COMPILE_INTEL_AVX2 return success;
View file
kvazaar-1.1.0.tar.gz/src/strategies/generic/quant-generic.c -> kvazaar-1.2.0.tar.gz/src/strategies/generic/quant-generic.c
Changed
@@ -169,7 +169,7 @@ * \param color Color. * \param scan_order Coefficient scan order. * \param use_trskip Whether transform skip is used. -* \param stride Stride for ref_in, pred_in rec_out and coeff_out. +* \param stride Stride for ref_in, pred_in and rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Reconstructed pixels. @@ -186,7 +186,6 @@ { // Temporary arrays to pass data to and from kvz_quant and transform functions. int16_t residual[TR_MAX_WIDTH * TR_MAX_WIDTH]; - coeff_t quant_coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; coeff_t coeff[TR_MAX_WIDTH * TR_MAX_WIDTH]; int has_coeffs = 0; @@ -212,16 +211,16 @@ kvz_transform2d(state->encoder_control, residual, coeff, width, (color == COLOR_Y ? 0 : 65535)); } - // Quantize coeffs. (coeff -> quant_coeff) + // Quantize coeffs. (coeff -> coeff_out) if (state->encoder_control->cfg.rdoq_enable && (width > 4 || !state->encoder_control->cfg.rdoq_skip)) { int8_t tr_depth = cur_cu->tr_depth - cur_cu->depth; tr_depth += (cur_cu->part_size == SIZE_NxN ? 1 : 0); - kvz_rdoq(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_rdoq(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type, tr_depth); } else { - kvz_quant(state, coeff, quant_coeff, width, width, (color == COLOR_Y ? 0 : 2), + kvz_quant(state, coeff, coeff_out, width, width, (color == COLOR_Y ? 0 : 2), scan_order, cur_cu->type); } @@ -229,23 +228,20 @@ { int i; for (i = 0; i < width * width; ++i) { - if (quant_coeff[i] != 0) { + if (coeff_out[i] != 0) { has_coeffs = 1; break; } } } - // Copy coefficients to coeff_out. - kvz_coefficients_blit(quant_coeff, coeff_out, width, width, width, out_stride); - // Do the inverse quantization and transformation and the reconstruction to // rec_out. if (has_coeffs) { int y, x; - // Get quantized residual. (quant_coeff -> coeff -> residual) - kvz_dequant(state, quant_coeff, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); + // Get quantized residual. (coeff_out -> coeff -> residual) + kvz_dequant(state, coeff_out, coeff, width, width, (color == COLOR_Y ? 0 : (color == COLOR_U ? 2 : 3)), cur_cu->type); if (use_trskip) { kvz_itransformskip(state->encoder_control, residual, coeff, width); } @@ -324,6 +320,15 @@ } } +static uint32_t coeff_abs_sum_generic(const coeff_t *coeffs, size_t length) +{ + uint32_t sum = 0; + for (int i = 0; i < length; i++) { + sum += abs(coeffs[i]); + } + return sum; +} + int kvz_strategy_register_quant_generic(void* opaque, uint8_t bitdepth) { bool success = true; @@ -331,6 +336,7 @@ success &= kvz_strategyselector_register(opaque, "quant", "generic", 0, &kvz_quant_generic); success &= kvz_strategyselector_register(opaque, "quantize_residual", "generic", 0, &kvz_quantize_residual_generic); success &= kvz_strategyselector_register(opaque, "dequant", "generic", 0, &kvz_dequant_generic); + success &= kvz_strategyselector_register(opaque, "coeff_abs_sum", "generic", 0, &coeff_abs_sum_generic); return success; }
View file
kvazaar-1.1.0.tar.gz/src/strategies/generic/sao-generic.c -> kvazaar-1.2.0.tar.gz/src/strategies/generic/sao-generic.c
Changed
@@ -40,9 +40,12 @@ } -int kvz_sao_edge_ddistortion_generic(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int eo_class, int offsets[NUM_SAO_EDGE_CATEGORIES]) +static int sao_edge_ddistortion_generic(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int eo_class, + int offsets[NUM_SAO_EDGE_CATEGORIES]) { int y, x; int sum = 0; @@ -76,9 +79,12 @@ * \param dir_offsets * \param is_chroma 0 for luma, 1 for chroma. Indicates */ -void kvz_calc_sao_edge_dir_generic(const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int eo_class, int block_width, int block_height, - int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) +static void calc_sao_edge_dir_generic(const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int eo_class, + int block_width, + int block_height, + int cat_sum_cnt[2][NUM_SAO_EDGE_CATEGORIES]) { int y, x; vector2d_t a_ofs = g_sao_edge_offsets[eo_class][0]; @@ -103,30 +109,32 @@ } -void kvz_sao_reconstruct_color_generic(const encoder_control_t * const encoder, - const kvz_pixel *rec_data, kvz_pixel *new_rec_data, - const sao_info_t *sao, - int stride, int new_stride, - int block_width, int block_height, - color_t color_i) +static void sao_reconstruct_color_generic(const encoder_control_t * const encoder, + const kvz_pixel *rec_data, + kvz_pixel *new_rec_data, + const sao_info_t *sao, + int stride, + int new_stride, + int block_width, + int block_height, + color_t color_i) { - int y, x; // Arrays orig_data and rec_data are quarter size for chroma. int offset_v = color_i == COLOR_V ? 5 : 0; - if(sao->type == SAO_TYPE_BAND) { + if (sao->type == SAO_TYPE_BAND) { int offsets[1<<KVZ_BIT_DEPTH]; kvz_calc_sao_offset_array(encoder, sao, offsets, color_i); - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; ++x) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; ++x) { new_rec_data[y * new_stride + x] = offsets[rec_data[y * stride + x]]; } } } else { // Don't sample the edge pixels because this function doesn't have access to // their neighbours. - for (y = 0; y < block_height; ++y) { - for (x = 0; x < block_width; ++x) { + for (int y = 0; y < block_height; ++y) { + for (int x = 0; x < block_width; ++x) { vector2d_t a_ofs = g_sao_edge_offsets[sao->eo_class][0]; vector2d_t b_ofs = g_sao_edge_offsets[sao->eo_class][1]; const kvz_pixel *c_data = &rec_data[y * stride + x]; @@ -144,9 +152,13 @@ } -int kvz_sao_band_ddistortion_generic(const encoder_state_t * const state, const kvz_pixel *orig_data, const kvz_pixel *rec_data, - int block_width, int block_height, - int band_pos, int sao_bands[4]) +static int sao_band_ddistortion_generic(const encoder_state_t * const state, + const kvz_pixel *orig_data, + const kvz_pixel *rec_data, + int block_width, + int block_height, + int band_pos, + int sao_bands[4]) { int y, x; int shift = state->encoder_control->bitdepth-5; @@ -174,11 +186,11 @@ int kvz_strategy_register_sao_generic(void* opaque, uint8_t bitdepth) { bool success = true; - - success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "generic", 0, &kvz_sao_edge_ddistortion_generic); - success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "generic", 0, &kvz_calc_sao_edge_dir_generic); - success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "generic", 0, &kvz_sao_reconstruct_color_generic); - success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "generic", 0, &kvz_sao_band_ddistortion_generic); + + success &= kvz_strategyselector_register(opaque, "sao_edge_ddistortion", "generic", 0, &sao_edge_ddistortion_generic); + success &= kvz_strategyselector_register(opaque, "calc_sao_edge_dir", "generic", 0, &calc_sao_edge_dir_generic); + success &= kvz_strategyselector_register(opaque, "sao_reconstruct_color", "generic", 0, &sao_reconstruct_color_generic); + success &= kvz_strategyselector_register(opaque, "sao_band_ddistortion", "generic", 0, &sao_band_ddistortion_generic); return success; }
View file
kvazaar-1.1.0.tar.gz/src/strategies/strategies-quant.c -> kvazaar-1.2.0.tar.gz/src/strategies/strategies-quant.c
Changed
@@ -29,6 +29,7 @@ quant_func *kvz_quant; quant_residual_func *kvz_quantize_residual; dequant_func *kvz_dequant; +coeff_abs_sum_func *kvz_coeff_abs_sum; int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth) { @@ -40,4 +41,4 @@ success &= kvz_strategy_register_quant_avx2(opaque, bitdepth); } return success; -} \ No newline at end of file +}
View file
kvazaar-1.1.0.tar.gz/src/strategies/strategies-quant.h -> kvazaar-1.2.0.tar.gz/src/strategies/strategies-quant.h
Changed
@@ -45,10 +45,13 @@ typedef unsigned (dequant_func)(const encoder_state_t * const state, coeff_t *q_coef, coeff_t *coef, int32_t width, int32_t height, int8_t type, int8_t block_type); +typedef uint32_t (coeff_abs_sum_func)(const coeff_t *coeffs, size_t length); + // Declare function pointers. extern quant_func * kvz_quant; extern quant_residual_func * kvz_quantize_residual; extern dequant_func *kvz_dequant; +extern coeff_abs_sum_func *kvz_coeff_abs_sum; int kvz_strategy_register_quant(void* opaque, uint8_t bitdepth); @@ -57,6 +60,7 @@ {"quant", (void**) &kvz_quant}, \ {"quantize_residual", (void**) &kvz_quantize_residual}, \ {"dequant", (void**) &kvz_dequant}, \ + {"coeff_abs_sum", (void**) &kvz_coeff_abs_sum}, \
View file
kvazaar-1.1.0.tar.gz/src/threadqueue.c -> kvazaar-1.2.0.tar.gz/src/threadqueue.c
Changed
@@ -30,668 +30,601 @@ #include "threads.h" -typedef struct { - threadqueue_queue_t * threadqueue; - int worker_id; -} threadqueue_worker_spec; +/** + * \file + * + * Lock acquisition order: + * + * 1. When locking a job and its dependency, the dependecy must be locked + * first and then the job depending on it. + * + * 2. When locking a job and the thread queue, the thread queue must be + * locked first and then the job. + * + * 3. When accessing threadqueue_job_t.next, the thread queue must be + * locked. + */ #define THREADQUEUE_LIST_REALLOC_SIZE 32 -//#define PTHREAD_COND_SIGNAL(c) fprintf(stderr, "%s:%d pthread_cond_signal(%s=%p)\n", __FUNCTION__, __LINE__, #c, c); if (pthread_cond_signal((c)) != 0) { fprintf(stderr, "pthread_cond_signal(%s=%p) failed!\n", #c, c); assert(0); return 0; } -//#define PTHREAD_COND_BROADCAST(c) fprintf(stderr, "%s:%d pthread_cond_broadcast(%s=%p)\n", __FUNCTION__, __LINE__, #c, c); if (pthread_cond_broadcast((c)) != 0) { fprintf(stderr, "pthread_cond_broadcast(%s=%p) failed!\n", #c, c); assert(0); return 0; } -//#define PTHREAD_COND_WAIT(c,l) fprintf(stderr, "%s:%d pthread_cond_wait(%s=%p, %s=%p)\n", __FUNCTION__, __LINE__, #c, c, #l, l); if (pthread_cond_wait((c),(l)) != 0) { fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); assert(0); return 0; } else {fprintf(stderr, "%s:%d pthread_cond_wait(%s=%p, %s=%p) (done)\n", __FUNCTION__, __LINE__, #c, c, #l, l);} -//#define PTHREAD_LOCK(l) fprintf(stderr, "%s:%d pthread_mutex_lock(%s=%p) (try)\n", __FUNCTION__, __LINE__, #l, l); if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s=%p) failed!\n", #l, l); assert(0); return 0; } else {fprintf(stderr, "%s:%d pthread_mutex_lock(%s=%p)\n", __FUNCTION__, __LINE__, #l, l);} -//#define PTHREAD_UNLOCK(l) if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s=%p) failed!\n", #l, l); assert(0); return 0; } else {fprintf(stderr, "%s:%d pthread_mutex_unlock(%s=%p)\n", __FUNCTION__, __LINE__, #l, l);} - - -#define PTHREAD_COND_SIGNAL(c) if (pthread_cond_signal((c)) != 0) { fprintf(stderr, "pthread_cond_signal(%s=%p) failed!\n", #c, c); assert(0); return 0; } -#define PTHREAD_COND_BROADCAST(c) if (pthread_cond_broadcast((c)) != 0) { fprintf(stderr, "pthread_cond_broadcast(%s=%p) failed!\n", #c, c); assert(0); return 0; } - -#ifndef _PTHREAD_DUMP -#define PTHREAD_COND_WAIT(c,l) if (pthread_cond_wait((c),(l)) != 0) { fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); assert(0); return 0; } -#define PTHREAD_LOCK(l) if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); assert(0); return 0; } -#define PTHREAD_UNLOCK(l) if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); assert(0); return 0; } - -#else //PTHREAD_DUMP -#define PTHREAD_LOCK(l) do { \ - PERFORMANCE_MEASURE_START(); \ - if (pthread_mutex_lock((l)) != 0) { fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); assert(0); return 0; } \ - PERFORMANCE_MEASURE_END(NULL, "pthread_mutex_lock(%s=%p)@%s:%d",#l,l,__FUNCTION__, __LINE__); \ -} while (0); - -#define PTHREAD_UNLOCK(l) do { \ - PERFORMANCE_MEASURE_START(); \ - if (pthread_mutex_unlock((l)) != 0) { fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); assert(0); return 0; } \ - PERFORMANCE_MEASURE_END(NULL, "pthread_mutex_unlock(%s=%p)@%s:%d",#l,l,__FUNCTION__, __LINE__); \ -} while (0); - -#define PTHREAD_COND_WAIT(c,l) do { \ - PERFORMANCE_MEASURE_START(); \ - if (pthread_cond_wait((c),(l)) != 0) { fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); assert(0); return 0;} \ - PERFORMANCE_MEASURE_END(NULL, "pthread_cond_wait(%s=%p, %s=%p)@%s:%d",#c, c, #l, l,__FUNCTION__, __LINE__); \ -} while (0); -#endif //PTHREAD_DUMP - -static void* threadqueue_worker(void* threadqueue_worker_spec_opaque) +#define PTHREAD_COND_SIGNAL(c) \ + if (pthread_cond_signal((c)) != 0) { \ + fprintf(stderr, "pthread_cond_signal(%s=%p) failed!\n", #c, c); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_COND_BROADCAST(c) \ + if (pthread_cond_broadcast((c)) != 0) { \ + fprintf(stderr, "pthread_cond_broadcast(%s=%p) failed!\n", #c, c); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_COND_WAIT(c,l) \ + if (pthread_cond_wait((c),(l)) != 0) { \ + fprintf(stderr, "pthread_cond_wait(%s=%p, %s=%p) failed!\n", #c, c, #l, l); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_LOCK(l) \ + if (pthread_mutex_lock((l)) != 0) { \ + fprintf(stderr, "pthread_mutex_lock(%s) failed!\n", #l); \ + assert(0); \ + return 0; \ + } + +#define PTHREAD_UNLOCK(l) \ + if (pthread_mutex_unlock((l)) != 0) { \ + fprintf(stderr, "pthread_mutex_unlock(%s) failed!\n", #l); \ + assert(0); \ + return 0; \ + } + + +typedef enum { + /** + * \brief Job has been submitted, but is not allowed to run yet. + */ + THREADQUEUE_JOB_STATE_PAUSED, + + /** + * \brief Job is waiting for dependencies. + */ + THREADQUEUE_JOB_STATE_WAITING, + + /** + * \brief Job is ready to run. + */ + THREADQUEUE_JOB_STATE_READY, + + /** + * \brief Job is running. + */ + THREADQUEUE_JOB_STATE_RUNNING, + + /** + * \brief Job is completed. + */ + THREADQUEUE_JOB_STATE_DONE, + +} threadqueue_job_state; + + +struct threadqueue_job_t { + pthread_mutex_t lock; + + threadqueue_job_state state; + + /** + * \brief Number of dependencies that have not been completed yet. + */ + int ndepends; + + /** + * \brief Reverse dependencies. + * + * Array of pointers to jobs that depend on this one. They have to exist + * when the thread finishes, because they cannot be run before. + */ + struct threadqueue_job_t **rdepends; + + /** + * \brief Number of elements in rdepends. + */ + int rdepends_count; + + /** + * \brief Allocated size of rdepends. + */ + int rdepends_size; + + /** + * \brief Reference count + */ + int refcount; + + /** + * \brief Pointer to the function to execute. + */ + void (*fptr)(void *arg); + + /** + * \brief Argument for fptr. + */ + void *arg; + + /** + * \brief Pointer to the next job in the queue. + */ + struct threadqueue_job_t *next; + +}; + + +struct threadqueue_queue_t { + pthread_mutex_t lock; + + /** + * \brief Job available condition variable + * + * Signalled when there is a new job to do. + */ + pthread_cond_t job_available; + + /** + * \brief Job done condition variable + * + * Signalled when a job has been completed. + */ + pthread_cond_t job_done; + + /** + * Array containing spawned threads + */ + pthread_t *threads; + + /**
View file
kvazaar-1.1.0.tar.gz/src/threadqueue.h -> kvazaar-1.2.0.tar.gz/src/threadqueue.h
Changed
@@ -30,140 +30,22 @@ #include "global.h" // IWYU pragma: keep -typedef enum { - THREADQUEUE_JOB_STATE_QUEUED = 0, - THREADQUEUE_JOB_STATE_RUNNING = 1, - THREADQUEUE_JOB_STATE_DONE = 2 -} threadqueue_job_state; +typedef struct threadqueue_job_t threadqueue_job_t; +typedef struct threadqueue_queue_t threadqueue_queue_t; -typedef struct threadqueue_job_t { - pthread_mutex_t lock; - - threadqueue_job_state state; - - unsigned int ndepends; //Number of active dependencies that this job wait for - - struct threadqueue_job_t **rdepends; //array of pointer to jobs that depend on this one. They have to exist when the thread finishes, because they cannot be run before. - unsigned int rdepends_count; //number of rdepends - unsigned int rdepends_size; //allocated size of rdepends - - //Job function and state to use - void (*fptr)(void *arg); - void *arg; - -#ifdef KVZ_DEBUG - const char* debug_description; - - int debug_worker_id; - - KVZ_CLOCK_T debug_clock_enqueue; - KVZ_CLOCK_T debug_clock_start; - KVZ_CLOCK_T debug_clock_stop; - KVZ_CLOCK_T debug_clock_dequeue; -#endif -} threadqueue_job_t; +threadqueue_queue_t * kvz_threadqueue_init(int thread_count); +threadqueue_job_t * kvz_threadqueue_job_create(void (*fptr)(void *arg), void *arg); +int kvz_threadqueue_submit(threadqueue_queue_t * threadqueue, threadqueue_job_t *job); - +int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *dependency); -typedef struct { - pthread_mutex_t lock; - pthread_cond_t cond; - pthread_cond_t cb_cond; - - pthread_t *threads; - int threads_count; - int threads_running; +threadqueue_job_t *kvz_threadqueue_copy_ref(threadqueue_job_t *job); - int stop; //=>1: threads should stop asap - - int fifo; - - threadqueue_job_t **queue; - unsigned int queue_start; - unsigned int queue_count; - unsigned int queue_size; - unsigned int queue_waiting_execution; //Number of jobs without any dependency which could be run - unsigned int queue_waiting_dependency; //Number of jobs waiting for a dependency to complete - unsigned int queue_running; //Number of jobs running - -#ifdef KVZ_DEBUG - //Format: pointer <tab> worker id <tab> time enqueued <tab> time started <tab> time stopped <tab> time dequeued <tab> job description - //For threads, pointer = "" and job description == "thread", time enqueued and time dequeued are equal to "-" - //For flush, pointer = "" and job description == "FLUSH", time enqueued, time dequeued and time started are equal to "-" - //Each time field, except the first one in the line be expressed in a relative way, by prepending the number of seconds by +. - //Dependencies: pointer -> pointer +void kvz_threadqueue_free_job(threadqueue_job_t **job_ptr); - FILE *debug_log; - - KVZ_CLOCK_T *debug_clock_thread_start; - KVZ_CLOCK_T *debug_clock_thread_end; -#endif -} threadqueue_queue_t; - -//Init a threadqueue (if fifo, then behave as a FIFO with dependencies, otherwise as a LIFO with dependencies) -int kvz_threadqueue_init(threadqueue_queue_t * threadqueue, int thread_count, int fifo); - -//Add a job to the queue, and returs a threadqueue_job handle. If wait == 1, one has to run kvz_threadqueue_job_unwait_job in order to have it run -threadqueue_job_t * kvz_threadqueue_submit(threadqueue_queue_t * threadqueue, void (*fptr)(void *arg), void *arg, int wait, const char* debug_description); - -int kvz_threadqueue_job_unwait_job(threadqueue_queue_t * threadqueue, threadqueue_job_t *job); - -//Add a dependency between two jobs. -int kvz_threadqueue_job_dep_add(threadqueue_job_t *job, threadqueue_job_t *depends_on); - -//Blocking call until the queue is empty. Previously set threadqueue_job handles should not be used anymore -int kvz_threadqueue_flush(threadqueue_queue_t * threadqueue); - -//Blocking call until job is executed. Job handles submitted before job should not be used any more as they are removed from the queue. int kvz_threadqueue_waitfor(threadqueue_queue_t * threadqueue, threadqueue_job_t * job); +int kvz_threadqueue_stop(threadqueue_queue_t * threadqueue); +void kvz_threadqueue_free(threadqueue_queue_t * threadqueue); -//Free ressources in a threadqueue -int kvz_threadqueue_finalize(threadqueue_queue_t * threadqueue); - -#ifdef KVZ_DEBUG -int threadqueue_log(threadqueue_queue_t * threadqueue, const KVZ_CLOCK_T *start, const KVZ_CLOCK_T *stop, const char* debug_description); - -// Bitmasks for PERFORMANCE_MEASURE_START and PERFORMANCE_MEASURE_END. -#define KVZ_PERF_FRAME (1 << 0) -#define KVZ_PERF_JOB (1 << 1) -#define KVZ_PERF_LCU (1 << 2) -#define KVZ_PERF_SAOREC (1 << 3) -#define KVZ_PERF_BSLEAF (1 << 4) -#define KVZ_PERF_SEARCHCU (1 << 5) - -#define IMPL_PERFORMANCE_MEASURE_START(mask) KVZ_CLOCK_T start, stop; if ((KVZ_DEBUG) & mask) { KVZ_GET_TIME(&start); } -#define IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) { if ((KVZ_DEBUG) & mask) { KVZ_GET_TIME(&stop); {char job_description[256]; sprintf(job_description, (str), __VA_ARGS__); threadqueue_log((threadqueue), &start, &stop, job_description);}} } \ - -#ifdef _MSC_VER -// Disable VS conditional expression warning from debug code. -# define WITHOUT_CONSTANT_EXP_WARNING(macro) \ - __pragma(warning(push)) \ - __pragma(warning(disable:4127)) \ - macro \ - __pragma(warning(pop)) -# define PERFORMANCE_MEASURE_START(mask) \ - WITHOUT_CONSTANT_EXP_WARNING(IMPL_PERFORMANCE_MEASURE_START(mask)) -# define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) \ - WITHOUT_CONSTANT_EXP_WARNING(IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ##__VA_ARGS__)) -#else -# define PERFORMANCE_MEASURE_START(mask) \ - IMPL_PERFORMANCE_MEASURE_START(mask) -# define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) \ - IMPL_PERFORMANCE_MEASURE_END(mask, threadqueue, str, ##__VA_ARGS__) -#endif - -#else -#define PERFORMANCE_MEASURE_START(mask) -#define PERFORMANCE_MEASURE_END(mask, threadqueue, str, ...) -#endif - -/* Constraints: - * - * - Always first lock threadqueue, than a job inside it - * - When job A depends on job B, always lock first job B and then job A - * - Jobs should be submitted in an order which is compatible with serial execution. - * - * */ - -#endif //THREADQUEUE_H_ +#endif // THREADQUEUE_H_
View file
kvazaar-1.1.0.tar.gz/src/threads.h -> kvazaar-1.2.0.tar.gz/src/threads.h
Changed
@@ -30,10 +30,6 @@ #include <pthread.h> -#define E3 1000 -#define E9 1000000000 -#define FILETIME_TO_EPOCH 0x19DB1DED53E8000LL - #if defined(__GNUC__) && !defined(__MINGW32__) #include <unistd.h> // IWYU pragma: export #include <time.h> // IWYU pragma: export @@ -76,7 +72,64 @@ #endif //__GNUC__ -#undef E9 -#undef E3 +#ifdef __APPLE__ +// POSIX semaphores are deprecated on Mac so we use Grand Central Dispatch +// semaphores instead. +#include <dispatch/dispatch.h> +typedef dispatch_semaphore_t kvz_sem_t; + +static INLINE void kvz_sem_init(kvz_sem_t *sem, int value) +{ + assert(value >= 0); + *sem = dispatch_semaphore_create(value); +} + +static INLINE void kvz_sem_wait(kvz_sem_t *sem) +{ + dispatch_semaphore_wait(*sem, DISPATCH_TIME_FOREVER); +} + +static INLINE void kvz_sem_post(kvz_sem_t *sem) +{ + dispatch_semaphore_signal(*sem); +} + + +static INLINE void kvz_sem_destroy(kvz_sem_t *sem) +{ + // Do nothing for GCD semaphores. +} + +#else +// Use POSIX semaphores. +#include <semaphore.h> + +typedef sem_t kvz_sem_t; + +static INLINE void kvz_sem_init(kvz_sem_t *sem, int value) +{ + assert(value >= 0); + // Pthreads-w32 does not support process-shared semaphores, so pshared + // must always be zero. + int pshared = 0; + sem_init(sem, pshared, value); +} + +static INLINE void kvz_sem_wait(kvz_sem_t *sem) +{ + sem_wait(sem); +} + +static INLINE void kvz_sem_post(kvz_sem_t *sem) +{ + sem_post(sem); +} + +static INLINE void kvz_sem_destroy(kvz_sem_t *sem) +{ + sem_destroy(sem); +} + +#endif #endif //THREADS_H_
View file
kvazaar-1.1.0.tar.gz/src/transform.c -> kvazaar-1.2.0.tar.gz/src/transform.c
Changed
@@ -62,7 +62,7 @@ * * \param width Transform width. * \param in_stride Stride for ref_in and pred_in - * \param out_stride Stride for rec_out and coeff_out. + * \param out_stride Stride for rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Returns the reconstructed pixels. @@ -82,14 +82,15 @@ for (int y = 0; y < width; ++y) { for (int x = 0; x < width; ++x) { - int32_t in_idx = x + y * in_stride; - int32_t out_idx = x + y * out_stride; + int32_t in_idx = x + y * in_stride; + int32_t out_idx = x + y * out_stride; + int32_t coeff_idx = x + y * width; // The residual must be computed before writing to rec_out because // pred_in and rec_out may point to the same array. - coeff_t coeff = (coeff_t)(ref_in[in_idx] - pred_in[in_idx]); - coeff_out[out_idx] = coeff; - rec_out[out_idx] = ref_in[in_idx]; + coeff_t coeff = (coeff_t)(ref_in[in_idx] - pred_in[in_idx]); + coeff_out[coeff_idx] = coeff; + rec_out[out_idx] = ref_in[in_idx]; nonzero_coeffs |= (coeff != 0); } @@ -102,22 +103,20 @@ * Apply DPCM to residual. * * \param width width of the block - * \param stride stride of coeff array * \param dir RDPCM direction * \param coeff coefficients (residual) to filter */ static void rdpcm(const int width, - const int stride, const rdpcm_dir dir, coeff_t *coeff) { - const int offset = (dir == RDPCM_HOR) ? 1 : stride; + const int offset = (dir == RDPCM_HOR) ? 1 : width; const int min_x = (dir == RDPCM_HOR) ? 1 : 0; const int min_y = (dir == RDPCM_HOR) ? 0 : 1; for (int y = width - 1; y >= min_y; y--) { for (int x = width - 1; x >= min_x; x--) { - const int index = x + y * stride; + const int index = x + y * width; coeff[index] -= coeff[index - offset]; } } @@ -209,7 +208,7 @@ * \param color Color. * \param scan_order Coefficient scan order. * \param trskip_out Whether transform skip is used. - * \param stride Stride for ref_in, pred_in rec_out and coeff_out. + * \param stride Stride for ref_in, pred_in and rec_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Reconstructed pixels. @@ -261,19 +260,142 @@ // we can skip this. kvz_pixels_blit(best->rec, rec_out, width, width, 4, out_stride); } - kvz_coefficients_blit(best->coeff, coeff_out, width, width, 4, out_stride); + copy_coeffs(best->coeff, coeff_out, width); return best->has_coeffs; } +/** + * Calculate the residual coefficients for a single TU. + */ +static void quantize_tr_residual(encoder_state_t * const state, + const color_t color, + const int32_t x, + const int32_t y, + const uint8_t depth, + cu_info_t *cur_pu, + lcu_t* lcu) +{ + const kvz_config *cfg = &state->encoder_control->cfg; + const int32_t shift = color == COLOR_Y ? 0 : 1; + const vector2d_t lcu_px = { SUB_SCU(x) >> shift, SUB_SCU(y) >> shift }; + + // If luma is 4x4, do chroma for the 8x8 luma area when handling the top + // left PU because the coordinates are correct. + bool handled_elsewhere = color != COLOR_Y && + depth > MAX_DEPTH && + (lcu_px.x % 4 != 0 || lcu_px.y % 4 != 0); + if (handled_elsewhere) { + return; + } + + // Clear coded block flag structures for depths lower than current depth. + // This should ensure that the CBF data doesn't get corrupted if this function + // is called more than once. + cbf_clear(&cur_pu->cbf, depth, color); + + int32_t tr_width; + if (color == COLOR_Y) { + tr_width = LCU_WIDTH >> depth; + } else { + const int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth); + tr_width = LCU_WIDTH_C >> chroma_depth; + } + const int32_t lcu_width = LCU_WIDTH >> shift; + const int8_t mode = + (color == COLOR_Y) ? cur_pu->intra.mode : cur_pu->intra.mode_chroma; + const coeff_scan_order_t scan_idx = + kvz_get_scan_order(cur_pu->type, mode, depth); + const int offset = lcu_px.x + lcu_px.y * lcu_width; + const int z_index = xy_to_zorder(lcu_width, lcu_px.x, lcu_px.y); + + // Pointers to current location in arrays with prediction. The + // reconstruction will be written to this array. + kvz_pixel *pred = NULL; + // Pointers to current location in arrays with reference. + const kvz_pixel *ref = NULL; + // Pointers to current location in arrays with quantized coefficients. + coeff_t *coeff = NULL; + + switch (color) { + case COLOR_Y: + pred = &lcu->rec.y[offset]; + ref = &lcu->ref.y[offset]; + coeff = &lcu->coeff.y[z_index]; + break; + case COLOR_U: + pred = &lcu->rec.u[offset]; + ref = &lcu->ref.u[offset]; + coeff = &lcu->coeff.u[z_index]; + break; + case COLOR_V: + pred = &lcu->rec.v[offset]; + ref = &lcu->ref.v[offset]; + coeff = &lcu->coeff.v[z_index]; + break; + } + + const bool can_use_trskip = tr_width == 4 && + color == COLOR_Y && + cfg->trskip_enable; + + bool has_coeffs; + + if (cfg->lossless) { + has_coeffs = bypass_transquant(tr_width, + lcu_width, // in stride + lcu_width, // out stride + ref, + pred, + pred, + coeff); + if (cfg->implicit_rdpcm && cur_pu->type == CU_INTRA) { + // implicit rdpcm for horizontal and vertical intra modes + if (mode == 10) { + rdpcm(tr_width, RDPCM_HOR, coeff); + } else if (mode == 26) { + rdpcm(tr_width, RDPCM_VER, coeff); + } + } + + } else if (can_use_trskip) { + // Try quantization with trskip and use it if it's better. + has_coeffs = kvz_quantize_residual_trskip(state, + cur_pu, + tr_width, + color, + scan_idx, + &cur_pu->intra.tr_skip, + lcu_width, + lcu_width, + ref, + pred, + pred, + coeff); + } else { + has_coeffs = kvz_quantize_residual(state, + cur_pu, + tr_width, + color, + scan_idx, + false, // tr skip + lcu_width, + lcu_width, + ref, + pred, + pred, + coeff); + } + + if (has_coeffs) { + cbf_set(&cur_pu->cbf, depth, color); + } +}
View file
kvazaar-1.1.0.tar.gz/src/transform.h -> kvazaar-1.2.0.tar.gz/src/transform.h
Changed
@@ -43,7 +43,13 @@ int32_t kvz_get_scaled_qp(int8_t type, int8_t qp, int8_t qp_offset); -void kvz_quantize_lcu_luma_residual(encoder_state_t *state, int32_t x, int32_t y, uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu); -void kvz_quantize_lcu_chroma_residual(encoder_state_t *state, int32_t x, int32_t y, uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu); +void kvz_quantize_lcu_residual(encoder_state_t *state, + bool luma, + bool chroma, + int32_t x, + int32_t y, + uint8_t depth, + cu_info_t *cur_cu, + lcu_t* lcu); #endif
View file
kvazaar-1.1.0.tar.gz/src/videoframe.c -> kvazaar-1.2.0.tar.gz/src/videoframe.c
Changed
@@ -35,26 +35,13 @@ int32_t height, enum kvz_chroma_format chroma_format) { - videoframe_t *frame = MALLOC(videoframe_t, 1); - + videoframe_t *frame = calloc(1, sizeof(videoframe_t)); if (!frame) return 0; - FILL(*frame, 0); - frame->width = width; frame->height = height; - frame->width_in_lcu = frame->width / LCU_WIDTH; - if (frame->width_in_lcu * LCU_WIDTH < frame->width) frame->width_in_lcu++; - frame->height_in_lcu = frame->height / LCU_WIDTH; - if (frame->height_in_lcu * LCU_WIDTH < frame->height) frame->height_in_lcu++; - - { - unsigned cu_array_width = frame->width_in_lcu * LCU_WIDTH; - unsigned cu_array_height = frame->height_in_lcu * LCU_WIDTH; - frame->cu_array = kvz_cu_array_alloc(cu_array_width, cu_array_height); - } - - frame->coeff_y = NULL; frame->coeff_u = NULL; frame->coeff_v = NULL; + frame->width_in_lcu = CEILDIV(frame->width, LCU_WIDTH); + frame->height_in_lcu = CEILDIV(frame->height, LCU_WIDTH); frame->sao_luma = MALLOC(sao_info_t, frame->width_in_lcu * frame->height_in_lcu); if (chroma_format != KVZ_CSP_400) { @@ -76,11 +63,7 @@ kvz_image_free(frame->rec); frame->rec = NULL; - kvz_cu_array_free(frame->cu_array); - - FREE_POINTER(frame->coeff_y); - FREE_POINTER(frame->coeff_u); - FREE_POINTER(frame->coeff_v); + kvz_cu_array_free(&frame->cu_array); FREE_POINTER(frame->sao_luma); FREE_POINTER(frame->sao_chroma); @@ -93,17 +76,3 @@ void kvz_videoframe_set_poc(videoframe_t * const frame, const int32_t poc) { frame->poc = poc; } - -const cu_info_t* kvz_videoframe_get_cu_const(const videoframe_t * const frame, - unsigned int x_in_scu, - unsigned int y_in_scu) -{ - return kvz_cu_array_at_const(frame->cu_array, x_in_scu << 3, y_in_scu << 3); -} - -cu_info_t* kvz_videoframe_get_cu(videoframe_t * const frame, - const unsigned int x_in_scu, - const unsigned int y_in_scu) -{ - return kvz_cu_array_at(frame->cu_array, x_in_scu << 3, y_in_scu << 3); -}
View file
kvazaar-1.1.0.tar.gz/src/videoframe.h -> kvazaar-1.2.0.tar.gz/src/videoframe.h
Changed
@@ -39,10 +39,6 @@ kvz_picture *source; //!< \brief Source image. kvz_picture *rec; //!< \brief Reconstructed image. - coeff_t* coeff_y; //!< \brief coefficient pointer Y - coeff_t* coeff_u; //!< \brief coefficient pointer U - coeff_t* coeff_v; //!< \brief coefficient pointer V - int32_t width; //!< \brief Luma pixel array width. int32_t height; //!< \brief Luma pixel array height. int32_t height_in_lcu; //!< \brief Picture width in number of LCU's. @@ -60,7 +56,4 @@ void kvz_videoframe_set_poc(videoframe_t * frame, int32_t poc); -const cu_info_t* kvz_videoframe_get_cu_const(const videoframe_t * const frame, unsigned int x_in_scu, unsigned int y_in_scu); -cu_info_t* kvz_videoframe_get_cu(videoframe_t * const frame, const unsigned int x_in_scu, const unsigned int y_in_scu); - #endif
View file
kvazaar-1.1.0.tar.gz/tests/Makefile.am -> kvazaar-1.2.0.tar.gz/tests/Makefile.am
Changed
@@ -1,9 +1,22 @@ -TESTS = $(check_PROGRAMS) +TESTS = $(check_PROGRAMS) \ + test_external_symbols.sh \ + test_gop.sh \ + test_interlace.sh \ + test_intra.sh \ + test_invalid_input.sh \ + test_mv_constraint.sh \ + test_owf_wpp_tiles.sh \ + test_rate_control.sh \ + test_slices.sh \ + test_smp.sh \ + test_tools.sh \ + test_weird_shapes.sh check_PROGRAMS = kvazaar_tests kvazaar_tests_SOURCES = \ + coeff_sum_tests.c \ dct_tests.c \ intra_sad_tests.c \ mv_cand_tests.c \ @@ -18,3 +31,15 @@ kvazaar_tests_CFLAGS = -I$(srcdir) -I$(top_srcdir) -I$(top_srcdir)/src kvazaar_tests_LDFLAGS = -static $(top_builddir)/src/libkvazaar.la $(LIBS) +# This makes sure that CXXLD gets defined. +nodist_EXTRA_kvazaar_tests_SOURCES = cpp.cpp + +if USE_CRYPTOPP +kvazaar_tests_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CXX $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(kvazaar_tests_CFLAGS) $(CXXFLAGS) \ + $(kvazaar_tests_LDFLAGS) $(LDFLAGS) -o $@ +else +kvazaar_tests_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(kvazaar_tests_CFLAGS) $(CFLAGS) \ + $(kvazaar_tests_LDFLAGS) $(LDFLAGS) -o $@ +endif
View file
kvazaar-1.2.0.tar.gz/tests/coeff_sum_tests.c
Added
@@ -0,0 +1,63 @@ +/***************************************************************************** + * This file is part of Kvazaar HEVC encoder. + * + * Copyright (C) 2017 Tampere University of Technology and others (see + * COPYING file). + * + * Kvazaar is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License version 2.1 as + * published by the Free Software Foundation. + * + * Kvazaar is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Kvazaar. If not, see <http://www.gnu.org/licenses/>. + ****************************************************************************/ + +#include "greatest/greatest.h" + +#include "test_strategies.h" + +#include <string.h> + +static coeff_t coeff_test_data[64 * 64]; +static uint32_t expected_test_result; + +static void setup() +{ + // Fill test data. + coeff_t value = INT16_MIN; + for (int i = 0; i < 64 * 64; i++) { + coeff_test_data[i] = value; + value += 16; + } + + // Calculate expected result using the formula for an arithmetic sum. + expected_test_result = + 2048 * (16 - INT16_MIN) / 2 + + 2048 * 2047 * 16 / 2; +} + +TEST test_coeff_abs_sum() +{ + uint32_t sum = kvz_coeff_abs_sum(coeff_test_data, 64 * 64); + ASSERT_EQ(sum, expected_test_result); + PASS(); +} + +SUITE(coeff_sum_tests) +{ + setup(); + + for (volatile int i = 0; i < strategies.count; ++i) { + if (strcmp(strategies.strategies[i].type, "coeff_abs_sum") != 0) { + continue; + } + + kvz_coeff_abs_sum = strategies.strategies[i].fptr; + RUN_TEST(test_coeff_abs_sum); + } +}
View file
kvazaar-1.1.0.tar.gz/tests/intra_sad_tests.c -> kvazaar-1.2.0.tar.gz/tests/intra_sad_tests.c
Changed
@@ -177,7 +177,7 @@ // Loop through all strategies picking out the intra sad ones and run // selectec strategies though all tests. - for (unsigned i = 0; i < strategies.count; ++i) { + for (volatile unsigned i = 0; i < strategies.count; ++i) { const char * type = strategies.strategies[i].type; if (strcmp(type, "sad_4x4") == 0) {
View file
kvazaar-1.1.0.tar.gz/tests/mv_cand_tests.c -> kvazaar-1.2.0.tar.gz/tests/mv_cand_tests.c
Changed
@@ -31,22 +31,19 @@ lcu.cu[i].type = CU_INTER; } - cu_info_t *mv_cand[5] = { NULL }; + merge_candidates_t cand = { {0, 0}, {0, 0, 0}, 0, 0 }; + get_spatial_merge_candidates(64 + 32, 64, // x, y 32, 24, // width, height 1920, 1080, // picture size - &mv_cand[0], // b0 - &mv_cand[1], // b1 - &mv_cand[2], // b2 - &mv_cand[3], // a0 - &mv_cand[4], // a1 - &lcu); - - ASSERT_EQ(mv_cand[0], &lcu.cu[289]); // b0 - ASSERT_EQ(mv_cand[1], &lcu.cu[ 16]); // b1 - ASSERT_EQ(mv_cand[2], &lcu.cu[ 8]); // b2 - ASSERT_EQ(mv_cand[3], &lcu.cu[127]); // a0 - ASSERT_EQ(mv_cand[4], &lcu.cu[110]); // a1 + &lcu, + &cand); + + ASSERT_EQ(cand.b[0], &lcu.cu[289]); + ASSERT_EQ(cand.b[1], &lcu.cu[ 16]); + ASSERT_EQ(cand.b[2], &lcu.cu[ 8]); + ASSERT_EQ(cand.a[0], &lcu.cu[127]); + ASSERT_EQ(cand.a[1], &lcu.cu[110]); PASS(); }
View file
kvazaar-1.1.0.tar.gz/tests/sad_tests.c -> kvazaar-1.2.0.tar.gz/tests/sad_tests.c
Changed
@@ -31,7 +31,7 @@ ////////////////////////////////////////////////////////////////////////// // DEFINES -#define TEST_SAD(X, Y) kvz_image_calc_sad(g_pic, g_ref, 0, 0, (X), (Y), 8, 8, -1) +#define TEST_SAD(X, Y) kvz_image_calc_sad(g_pic, g_ref, 0, 0, (X), (Y), 8, 8) ////////////////////////////////////////////////////////////////////////// // GLOBALS @@ -378,7 +378,7 @@ sad_test_env.tested_func = strategies.strategies[i].fptr; sad_test_env.strategy = &strategies.strategies[i]; int num_dim_tests = sizeof(tested_dims) / sizeof(tested_dims[0]); - for (int dim_test = 0; dim_test < num_dim_tests; ++dim_test) { + for (volatile int dim_test = 0; dim_test < num_dim_tests; ++dim_test) { sad_test_env.width = tested_dims[dim_test].width; sad_test_env.height = tested_dims[dim_test].height; RUN_TEST(test_reg_sad);
View file
kvazaar-1.1.0.tar.gz/tests/satd_tests.c -> kvazaar-1.2.0.tar.gz/tests/satd_tests.c
Changed
@@ -167,7 +167,7 @@ // Loop through all strategies picking out the intra sad ones and run // selectec strategies though all tests. - for (unsigned i = 0; i < strategies.count; ++i) { + for (volatile unsigned i = 0; i < strategies.count; ++i) { const char * type = strategies.strategies[i].type; if (strcmp(type, "satd_4x4") == 0) {
View file
kvazaar-1.1.0.tar.gz/tests/speed_tests.c -> kvazaar-1.2.0.tar.gz/tests/speed_tests.c
Changed
@@ -405,7 +405,7 @@ int num_tested_dims = sizeof(tested_dims) / sizeof(*tested_dims); // Call reg_sad with all the sizes it is actually called with. - for (int dim_i = 0; dim_i < num_tested_dims; ++dim_i) { + for (volatile int dim_i = 0; dim_i < num_tested_dims; ++dim_i) { test_env.width = tested_dims[dim_i].x; test_env.height = tested_dims[dim_i].y; RUN_TEST(inter_sad);
View file
kvazaar-1.2.0.tar.gz/tests/test_external_symbols.sh
Added
@@ -0,0 +1,10 @@ +#!/bin/sh + +# Check for external symbols without kvz_ prefix. + +set -eu${BASH+o pipefail} + +if nm -go --defined-only ../src/.libs/libkvazaar.a | grep -v ' kvz_'; then + printf '%s\n' 'Only symbols prefixed with "kvz_" should be exported from libkvazaar.' + false +fi
View file
kvazaar-1.2.0.tar.gz/tests/test_gop.sh
Added
@@ -0,0 +1,12 @@ +#!/bin/sh + +# Test GOP, with and without OWF. + +set -eu +. "${0%/*}/util.sh" + +common_args='-p0 --threads=2 --wpp --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3' +valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=1 +valgrind_test 264x130 10 $common_args --gop=8 -p0 --owf=4 +valgrind_test 264x130 20 $common_args --gop=8 -p16 --owf=0 +valgrind_test 264x130 10 $common_args --gop=lp-g4d3t1 -p5 --owf=4
View file
kvazaar-1.2.0.tar.gz/tests/test_interlace.sh
Added
@@ -0,0 +1,6 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 10 --source-scan-type=tff -p0 --preset=ultrafast --threads=2 --owf=1 --wpp
View file
kvazaar-1.2.0.tar.gz/tests/test_intra.sh
Added
@@ -0,0 +1,11 @@ +#!/bin/sh + +# Test all-intra coding. + +set -eu + +. "${0%/*}/util.sh" + +common_args='264x130 10 -p1 --threads=2 --owf=1 --no-rdoq --no-deblock --no-sao --no-signhide' +valgrind_test $common_args --rd=1 +valgrind_test $common_args --rd=2 --no-transform-skip
View file
kvazaar-1.2.0.tar.gz/tests/test_invalid_input.sh
Added
@@ -0,0 +1,8 @@ +#!/bin/sh + +# Test trying to use invalid input dimensions. + +set -eu +. "${0%/*}/util.sh" + +encode_test 1x65 1 1
View file
kvazaar-1.2.0.tar.gz/tests/test_mv_constraint.sh
Added
@@ -0,0 +1,7 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --pu-depth-inter=0-3 --mv-constraint=frametilemargin +valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --subme=4 --mv-constraint=frametilemargin
View file
kvazaar-1.2.0.tar.gz/tests/test_owf_wpp_tiles.sh
Added
@@ -0,0 +1,18 @@ +#!/bin/sh + +# Test OWF, WPP and tiles. There is lots of separate branches of code +# related to owf == 0 and owf != 0, which is why all permutations are +# tried. + +set -eu +. "${0%/*}/util.sh" + +common_args='-p4 --rd=0 --no-rdoq --no-signhide --subme=0 --deblock --sao --pu-depth-inter=1-3 --pu-depth-intra=2-3' +valgrind_test 264x130 10 $common_args -r1 --owf=1 --threads=0 --no-wpp +valgrind_test 264x130 10 $common_args -r1 --owf=0 --threads=0 --no-wpp +valgrind_test 264x130 10 $common_args -r2 --owf=1 --threads=2 --wpp +valgrind_test 264x130 10 $common_args -r2 --owf=0 --threads=2 --no-wpp +valgrind_test 264x130 10 $common_args -r2 --owf=1 --threads=2 --tiles-height-split=u2 --no-wpp +valgrind_test 264x130 10 $common_args -r2 --owf=0 --threads=2 --tiles-height-split=u2 --no-wpp +valgrind_test 512x512 3 $common_args -r2 --owf=1 --threads=2 --tiles=2x2 --no-wpp +valgrind_test 512x512 3 $common_args -r2 --owf=0 --threads=2 --tiles=2x2 --no-wpp
View file
kvazaar-1.2.0.tar.gz/tests/test_rate_control.sh
Added
@@ -0,0 +1,6 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 10 --bitrate=500000 -p0 -r1 --owf=1 --threads=2 --rd=0 --no-rdoq --no-deblock --no-sao --no-signhide --subme=0 --pu-depth-inter=1-3 --pu-depth-intra=2-3
View file
kvazaar-1.2.0.tar.gz/tests/test_slices.sh
Added
@@ -0,0 +1,7 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 512x256 10 --threads=2 --owf=1 --preset=ultrafast --tiles=2x2 --slices=tiles +valgrind_test 264x130 10 --threads=2 --owf=1 --preset=ultrafast --slices=wpp
View file
kvazaar-1.2.0.tar.gz/tests/test_smp.sh
Added
@@ -0,0 +1,10 @@ +#!/bin/sh + +# Test SMP and AMP blocks. + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 264x130 4 --threads=2 --owf=1 --wpp --smp +valgrind_test 264x130 4 --threads=2 --owf=1 --wpp --amp +valgrind_test 264x130 4 --threads=2 --owf=1 --wpp --smp --amp
View file
kvazaar-1.1.0.tar.gz/tests/test_strategies.c -> kvazaar-1.2.0.tar.gz/tests/test_strategies.c
Changed
@@ -44,4 +44,9 @@ fprintf(stderr, "strategy_register_dct failed!\n"); return; } + + if (!kvz_strategy_register_quant(&strategies, KVZ_BIT_DEPTH)) { + fprintf(stderr, "strategy_register_quant failed!\n"); + return; + } }
View file
kvazaar-1.2.0.tar.gz/tests/test_tools.sh
Added
@@ -0,0 +1,12 @@ +#!/bin/sh + +# Test RDOQ, SAO, deblock and signhide and subme. + +set -eu +. "${0%/*}/util.sh" + +common_args='264x130 10 -p0 -r1 --threads=2 --wpp --owf=1 --rd=0' + +valgrind_test $common_args --no-rdoq --no-deblock --no-sao --no-signhide --subme=1 --pu-depth-intra=2-3 +valgrind_test $common_args --no-rdoq --no-signhide --subme=0 +valgrind_test $common_args --rdoq --no-deblock --no-sao --subme=0
View file
kvazaar-1.2.0.tar.gz/tests/test_weird_shapes.sh
Added
@@ -0,0 +1,8 @@ +#!/bin/sh + +set -eu +. "${0%/*}/util.sh" + +valgrind_test 16x16 10 --threads=2 --owf=1 --preset=veryslow +valgrind_test 256x16 10 --threads=2 --owf=1 --preset=veryslow +valgrind_test 16x256 10 --threads=2 --owf=1 --preset=veryslow
View file
kvazaar-1.1.0.tar.gz/tests/tests_main.c -> kvazaar-1.2.0.tar.gz/tests/tests_main.c
Changed
@@ -30,6 +30,7 @@ extern SUITE(dct_tests); #endif //KVZ_BIT_DEPTH == 8 +extern SUITE(coeff_sum_tests); extern SUITE(mv_cand_tests); int main(int argc, char **argv) @@ -52,6 +53,8 @@ printf("10-bit tests are not yet supported\n"); #endif //KVZ_BIT_DEPTH == 8 + RUN_SUITE(coeff_sum_tests); + RUN_SUITE(mv_cand_tests); GREATEST_MAIN_END();
View file
kvazaar-1.2.0.tar.gz/tests/util.sh
Added
@@ -0,0 +1,65 @@ +#!/bin/sh + +# Helper functions for test scripts. + +set -eu${BASH+o pipefail} + +# Temporary files for encoder input and output. +yuvfile="$(mktemp)" +hevcfile="$(mktemp)" + +cleanup() { + rm -rf "${yuvfile}" "${hevcfile}" +} +trap cleanup EXIT + +print_and_run() { + printf '\n\n$ %s\n' "$*" + "$@" +} + +prepare() { + cleanup + print_and_run \ + ffmpeg -f lavfi -i "mandelbrot=size=${1}" \ + -vframes "${2}" -pix_fmt yuv420p -f yuv4mpegpipe \ + "${yuvfile}" +} + +valgrind_test() { + dimensions="$1" + shift + frames="$1" + shift + + prepare "${dimensions}" "${frames}" + + print_and_run \ + libtool execute \ + valgrind --leak-check=full --error-exitcode=1 -- \ + ../src/kvazaar -i "${yuvfile}" "--input-res=${dimensions}" -o "${hevcfile}" "$@" + + print_and_run \ + TAppDecoderStatic -b "${hevcfile}" + + cleanup +} + +encode_test() { + dimensions="$1" + shift + frames="$1" + shift + expected_status="$1" + shift + + prepare "${dimensions}" "${frames}" + + set +e + print_and_run \ + libtool execute \ + ../src/kvazaar -i "${yuvfile}" "--input-res=${dimensions}" -o "${hevcfile}" "$@" + actual_status="$?" + set -e + [ ${actual_status} -eq ${expected_status} ] +}
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.