Projects
Staging
x265
Sign Up
Log In
Username
Password
Problem getting expanded diff: bad link: could not apply patch 'project.diff'
×
We truncated the diff of some files because they were too big. If you want to see the full diff for every file,
click here
.
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 12
View file
x265.changes
Changed
@@ -1,4 +1,40 @@ ------------------------------------------------------------------- +Mon Jun 1 17:51:22 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> + +- Update to version 3.4 + New features: + * Edge-aware quadtree partitioning to terminate CU depth + recursion based on edge information. --rskip level 2 enables + the feature and --rskip-edge-threshold denotes the minimum + expected edge-density percentage within the CU, below which + the recursion is skipped. Experimental feature. + * Application-level feature --abr-ladder for automating + efficient ABR ladder generation. Shows ~65% savings in the + over-all turn-around time required for the generation of a + typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 + CPU @ 2.70GHz over a sequential ABR-ladder generation + approach that leverages save-load architecture. + Enhancements to existing features: + * Improved efficiency in 2-pass rate-control algorithm. The + savings in the bitrate is ~1.72% with visual improvement in + quality in the initial 1-2 secs. + Encoder enhancements: + * Faster ARM64 encodes enabled by ASM contributions from + Huawei. The speed-up over no-asm version for 1080p encodes @ + medium preset is ~15% in a 16 core H/W. + * Strict VBV conformance in zone encoding. + Bug fixes: + * Multi-pass encode failures with --frame-dup. + * Corrupted bitstreams with --hist-scenecut when input depth + and internal bit-depth differ. + * Incorrect analysis propagation in multi-level save-load + architecture. + * Failure in detecting NUMA packages installed in non-standard + directories. + +- Refreshed arm.patch + +------------------------------------------------------------------- Sat Mar 28 14:28:56 UTC 2020 - Luigi Baldoni <aloisio@gmx.com> - Update to version 3.3
View file
x265.spec
Changed
@@ -17,11 +17,11 @@ # -%define sover 188 +%define sover 192 %define libname lib%{name} %define libsoname %{libname}-%{sover} Name: x265 -Version: 3.3 +Version: 3.4 Release: 0 Summary: A free h265/HEVC encoder - encoder binary License: GPL-2.0-or-later @@ -67,7 +67,6 @@ %patch0 -p1 %patch1 -p1 %patch2 -p1 - sed -i -e "s/0.0/%{sover}.0/g" source/cmake/version.cmake
View file
arm.patch
Changed
@@ -1,8 +1,8 @@ -Index: x265_2.2/source/CMakeLists.txt +Index: x265_3.4/source/CMakeLists.txt =================================================================== ---- x265_2.2.orig/source/CMakeLists.txt -+++ x265_2.2/source/CMakeLists.txt -@@ -65,15 +65,22 @@ elseif(POWERMATCH GREATER "-1") +--- x265_3.4.orig/source/CMakeLists.txt ++++ x265_3.4/source/CMakeLists.txt +@@ -64,26 +64,26 @@ elseif(POWERMATCH GREATER "-1") add_definitions(-DPPC64=1) message(STATUS "Detected POWER PPC64 target processor") endif() @@ -12,41 +12,62 @@ - else() - set(CROSS_COMPILE_ARM 0) - endif() -- message(STATUS "Detected ARM target processor") - set(ARM 1) -- add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) +- if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) +- message(STATUS "Detected ARM64 target processor") +- set(ARM64 1) +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) +- else() +- message(STATUS "Detected ARM target processor") +- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) +- endif() +elseif(${SYSPROC} MATCHES "armv5.*") + message(STATUS "Detected ARMV5 system processor") + set(ARMV5 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=0 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv6l") + message(STATUS "Detected ARMV6 system processor") + set(ARMV6 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "armv7l") + message(STATUS "Detected ARMV7 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0) +elseif(${SYSPROC} STREQUAL "aarch64") + message(STATUS "Detected AArch64 system processor") + set(ARMV7 1) -+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0) ++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0) else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") -@@ -208,18 +215,9 @@ if(GCC) + endif() +- + if(UNIX) + list(APPEND PLATFORM_LIBS pthread) + find_library(LIBRT rt) +@@ -238,28 +238,9 @@ if(GCC) endif() endif() endif() - if(ARM AND CROSS_COMPILE_ARM) -- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) +- else() +- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) +- endif() +- message(STATUS "cross compile arm") - elseif(ARM) -- find_package(Neon) -- if(CPU_HAS_NEON) -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- if(ARM64) +- set(ARM_ARGS -fPIC) - add_definitions(-DHAVE_NEON) - else() -- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- find_package(Neon) +- if(CPU_HAS_NEON) +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) +- add_definitions(-DHAVE_NEON) +- else() +- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) +- endif() - endif() + if(ARMV7) + add_definitions(-fPIC) @@ -55,11 +76,11 @@ if(FPROFILE_GENERATE) if(INTEL_CXX) add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}") -Index: x265_2.2/source/common/cpu.cpp +Index: x265_3.4/source/common/cpu.cpp =================================================================== ---- x265_2.2.orig/source/common/cpu.cpp -+++ x265_2.2/source/common/cpu.cpp -@@ -37,7 +37,7 @@ +--- x265_3.4.orig/source/common/cpu.cpp ++++ x265_3.4/source/common/cpu.cpp +@@ -39,7 +39,7 @@ #include <machine/cpu.h> #endif @@ -68,7 +89,7 @@ #include <signal.h> #include <setjmp.h> static sigjmp_buf jmpbuf; -@@ -344,7 +344,6 @@ uint32_t cpu_detect(void) +@@ -350,7 +350,6 @@ uint32_t cpu_detect(bool benableavx512) } canjump = 1; @@ -76,7 +97,7 @@ canjump = 0; signal(SIGILL, oldsig); #endif // if !HAVE_NEON -@@ -360,7 +359,7 @@ uint32_t cpu_detect(void) +@@ -366,7 +365,7 @@ uint32_t cpu_detect(bool benableavx512) // which may result in incorrect detection and the counters stuck enabled. // right now Apple does not seem to support performance counters for this test #ifndef __MACH__ @@ -84,4 +105,4 @@ + //flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) - #endif // if HAVE_ARMV6 + #elif X265_ARCH_ARM64
View file
baselibs.conf
Changed
@@ -1,1 +1,1 @@ -libx265-179 +libx265-192
View file
x265_3.3.tar.gz/.hg_archival.txt -> x265_3.4.tar.gz/.hg_archival.txt
Changed
@@ -1,5 +1,4 @@ repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf -node: f94b0d32737d40b2b9a9d74df57fee45e6be5cb0 -branch: Release_3.3 -latesttag: 3.3 -latesttagdistance: 1 +node: 2a65b720985096bcb1664f7cb05c3d04aeb576f5 +branch: Release_3.4 +tag: 3.4
View file
x265_3.3.tar.gz/.hgtags -> x265_3.4.tar.gz/.hgtags
Changed
@@ -40,3 +40,4 @@ 5ee3593ebd82b4d8957909bbc1b68b99b59ba773 3.3_RC1 96a10df63c0b778b480330bdf3be8da7db8a5fb1 3.3_RC2 057215961bc4b51b6260a584ff3d506e6d65cfd6 3.3 +ee92f36782800f145970131e01c79955a3ed5c10 3.4_RC1
View file
x265_3.4.tar.gz/build/aarch64-linux/crosscompile.cmake
Added
@@ -0,0 +1,15 @@ +# CMake toolchain file for cross compiling x265 for aarch64 +# This feature is only supported as experimental. Use with caution. +# Please report bugs on bitbucket +# Run cmake with: cmake -DCMAKE_TOOLCHAIN_FILE=crosscompile.cmake -G "Unix Makefiles" ../../source && ccmake ../../source + +set(CROSS_COMPILE_ARM 1) +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +# specify the cross compiler +set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++) + +# specify the target environment +SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu)
View file
x265_3.4.tar.gz/build/aarch64-linux/make-Makefiles.bash
Added
@@ -0,0 +1,4 @@ +#!/bin/bash +# Run this from within a bash shell + +cmake -DCMAKE_TOOLCHAIN_FILE="crosscompile.cmake" -G "Unix Makefiles" ../../source && ccmake ../../source
View file
x265_3.3.tar.gz/doc/reST/cli.rst -> x265_3.4.tar.gz/doc/reST/cli.rst
Changed
@@ -107,6 +107,9 @@ **BufferFillFinal** Buffer bits available after removing the frame out of CPB. + **UnclippedBufferFillFinal** Unclipped buffer bits available after removing the frame + out of CPB only used for csv logging purpose. + **Latency** Latency in terms of number of frames between when the frame was given in and when the frame is given out. @@ -842,15 +845,31 @@ Measure 2Nx2N merge candidates first; if no residual is found, additional modes at that depth are not analysed. Default disabled -.. option:: --rskip, --no-rskip +.. option:: --rskip <0|1|2> + + This option determines early exit from CU depth recursion in modes 1 and 2. When a skip CU is + found, additional heuristics (depending on the RD level and rskip mode) are used to decide whether + to terminate recursion. The following table summarizes the behavior. + + +----------+------------+----------------------------------------------------------------+ + | RD Level | Rskip Mode | Skip Recursion Heuristic | + +==========+============+================================================================+ + | 0 - 4 | 1 | Neighbour costs and CU homogenity. | + +----------+------------+----------------------------------------------------------------+ + | 5 - 6 | 1 | Comparison with inter2Nx2N. | + +----------+------------+----------------------------------------------------------------+ + | 0 - 6 | 2 | CU edge density. | + +----------+------------+----------------------------------------------------------------+ + + Provides minimal quality degradation at good performance gains for non-zero modes. + :option:`--rskip mode 0` means disabled. Default: 1, disabled when :option:`--tune grain` is used. + This is a integer value representing the edge-density percentage within the CU. Internally normalized to a number between 0.0 to 1.0 in x265. + Recommended low thresholds for slow encodes and high for fast encodes. - This option determines early exit from CU depth recursion. When a skip CU is - found, additional heuristics (depending on rd-level) are used to decide whether - to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used, - while at rdlevels 4 and neighbour costs are used to skip recursion. - Provides minimal quality degradation at good performance gains when enabled. +.. option:: --rskip-edge-threshold <0..100> - Default: enabled, disabled for :option:`--tune grain` + Denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. + Default: 5, requires :option:`--rskip mode 2` to be enabled. .. option:: --splitrd-skip, --no-splitrd-skip @@ -2501,6 +2520,28 @@ --recon-y4m-exec "ffplay -i pipe:0 -autoexit" **CLI ONLY** + +ABR-ladder Options +================== + +.. option:: --abr-ladder <filename> + + File containing the encoder configurations to generate ABR ladder. + The format of each line is: + + **<encID:reuse-level:refID> <CLI>** + + where, encID indicates the unique name given to the encode, refID indicates + the name of the encode from which analysis info has to be re-used ( set to 'nil' + if analysis reuse isn't preferred ), and reuse-level indicates the level ( :option:`--analysis-load-reuse-level`) + at which analysis info has to be reused. + + A sample config file is available in `the downloads page <https://bitbucket.org/multicoreware/x265/downloads/Sample_ABR_ladder_config>`_ + + Default: Disabled ( Conventional single encode generation ). Experimental feature. + + **CLI ONLY** + SVT-HEVC Encoder Options ========================
View file
x265_3.3.tar.gz/doc/reST/releasenotes.rst -> x265_3.4.tar.gz/doc/reST/releasenotes.rst
Changed
@@ -2,6 +2,32 @@ Release Notes ************* +Version 3.4 +=========== + +Release date - 29th May, 2020. + +New features +------------ +1. **Edge-aware quadtree partitioning** to terminate CU depth recursion based on edge information. :option:`--rskip` level 2 enables the feature and :option:`--rskip-edge-threshold` denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. Experimental feature. +2. Application-level feature :option:`--abr-ladder` for automating efficient ABR ladder generation. Shows ~65% savings in the over-all turn-around time required for the generation of a typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz over a sequential ABR-ladder generation approach that leverages save-load architecture. + +Enhancements to existing features +--------------------------------- +1. Improved efficiency in 2-pass rate-control algorithm. The savings in the bitrate is ~1.72% with visual improvement in quality in the initial 1-2 secs. + +Encoder enhancements +-------------------- +1. Faster ARM64 encodes enabled by ASM contributions from Huawei. The speed-up over no-asm version for 1080p encodes @ medium preset is ~15% in a 16 core H/W. +2. Strict VBV conformance in zone encoding. + +Bug fixes +--------- +1. Multi-pass encode failures with :option:`--frame-dup`. +2. Corrupted bitstreams with :option:`--hist-scenecut` when input depth and internal bit-depth differ. +3. Incorrect analysis propagation in multi-level save-load architecture. +4. Failure in detecting NUMA packages installed in non-standard directories. + Version 3.3 ===========
View file
x265_3.3.tar.gz/source/CMakeLists.txt -> x265_3.4.tar.gz/source/CMakeLists.txt
Changed
@@ -29,7 +29,7 @@ option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF) mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD) # X265_BUILD must be incremented each time the public API is changed -set(X265_BUILD 188) +set(X265_BUILD 192) configure_file("${PROJECT_SOURCE_DIR}/x265.def.in" "${PROJECT_BINARY_DIR}/x265.def") configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in" @@ -40,7 +40,7 @@ # System architecture detection string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC) set(X86_ALIASES x86 i386 i686 x86_64 amd64) -set(ARM_ALIASES armv6l armv7l) +set(ARM_ALIASES armv6l armv7l aarch64) list(FIND X86_ALIASES "${SYSPROC}" X86MATCH) list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH) set(POWER_ALIASES ppc64 ppc64le) @@ -70,9 +70,15 @@ else() set(CROSS_COMPILE_ARM 0) endif() - message(STATUS "Detected ARM target processor") set(ARM 1) - add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1) + if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8) + message(STATUS "Detected ARM64 target processor") + set(ARM64 1) + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0) + else() + message(STATUS "Detected ARM target processor") + add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1) + endif() else() message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown") message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}") @@ -95,6 +101,8 @@ if(NUMA_FOUND) link_directories(${NUMA_LIBRARY_DIR}) list(APPEND CMAKE_REQUIRED_LIBRARIES numa) + list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR}) + list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}") check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2) if(NUMA_V2) add_definitions(-DHAVE_LIBNUMA) @@ -231,14 +239,24 @@ endif() endif() if(ARM AND CROSS_COMPILE_ARM) - set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) + else() + set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC) + endif() + message(STATUS "cross compile arm") elseif(ARM) - find_package(Neon) - if(CPU_HAS_NEON) - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + if(ARM64) + set(ARM_ARGS -fPIC) add_definitions(-DHAVE_NEON) else() - set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + find_package(Neon) + if(CPU_HAS_NEON) + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC) + add_definitions(-DHAVE_NEON) + else() + set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm) + endif() endif() endif() add_definitions(${ARM_ARGS}) @@ -518,7 +536,11 @@ # compile ARM arch asm files here enable_language(ASM) foreach(ASM ${ARM_ASMS}) - set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + if(ARM64) + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/aarch64/${ASM}) + else() + set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM}) + endif() list(APPEND ASM_SRCS ${ASM_SRC}) list(APPEND ASM_OBJS ${ASM}.${SUFFIX}) add_custom_command( @@ -725,16 +747,16 @@ # Xcode seems unable to link the CLI with libs, so link as one targget if(ENABLE_HDR10_PLUS) add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS}) else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} - x265.cpp x265.h x265cli.h + x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS}) endif() else() add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE} - ${ExportDefs} x265.cpp x265.h x265cli.h) + ${ExportDefs} x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h) if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX) # The CLI cannot link to the shared library on Windows, it # requires internal APIs not exported from the DLL
View file
x265_3.4.tar.gz/source/abrEncApp.cpp
Added
@@ -0,0 +1,1108 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "abrEncApp.h" +#include "mv.h" +#include "slice.h" +#include "param.h" + +#include <signal.h> +#include <errno.h> + +#include <queue> + +using namespace X265_NS; + +/* Ctrl-C handler */ +static volatile sig_atomic_t b_ctrl_c /* = 0 */; +static void sigint_handler(int) +{ + b_ctrl_c = 1; +} + +namespace X265_NS { + // private namespace +#define X265_INPUT_QUEUE_SIZE 250 + + AbrEncoder::AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int &ret) + { + m_numEncodes = numEncodes; + m_numActiveEncodes.set(numEncodes); + m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1; + m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes); + + for (uint8_t i = 0; i < m_numEncodes; i++) + { + m_passEnc[i] = new PassEncoder(i, cliopt[i], this); + if (!m_passEnc[i]) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n"); + ret = 4; + } + m_passEnc[i]->init(ret); + } + + if (!allocBuffers()) + { + x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n"); + ret = 4; + } + + /* start passEncoder worker threads */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + m_passEnc[pass]->startThreads(); + } + + bool AbrEncoder::allocBuffers() + { + m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes); + m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes); + + m_picWriteCnt = new ThreadSafeInteger[m_numEncodes]; + m_picReadCnt = new ThreadSafeInteger[m_numEncodes]; + m_analysisWriteCnt = new ThreadSafeInteger[m_numEncodes]; + m_analysisReadCnt = new ThreadSafeInteger[m_numEncodes]; + + m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes); + m_readFlag = X265_MALLOC(int*, m_numEncodes); + + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*, m_queueSize); + for (uint32_t idx = 0; idx < m_queueSize; idx++) + { + m_inputPicBuffer[pass][idx] = x265_picture_alloc(); + x265_picture_init(m_passEnc[pass]->m_param, m_inputPicBuffer[pass][idx]); + } + + m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data, m_queueSize); + m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize]; + m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize]; + m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize]; + m_readFlag[pass] = X265_MALLOC(int, m_queueSize); + } + return true; + } + + void AbrEncoder::destroy() + { + x265_cleanup(); /* Free library singletons */ + for (uint8_t pass = 0; pass < m_numEncodes; pass++) + { + for (uint32_t index = 0; index < m_queueSize; index++) + { + X265_FREE(m_inputPicBuffer[pass][index]->planes[0]); + x265_picture_free(m_inputPicBuffer[pass][index]); + } + + X265_FREE(m_inputPicBuffer[pass]); + X265_FREE(m_analysisBuffer[pass]); + X265_FREE(m_readFlag[pass]); + delete[] m_picIdxReadCnt[pass]; + delete[] m_analysisWrite[pass]; + delete[] m_analysisRead[pass]; + m_passEnc[pass]->destroy(); + delete m_passEnc[pass]; + } + X265_FREE(m_inputPicBuffer); + X265_FREE(m_analysisBuffer); + X265_FREE(m_readFlag); + + delete[] m_picWriteCnt; + delete[] m_picReadCnt; + delete[] m_analysisWriteCnt; + delete[] m_analysisReadCnt; + + X265_FREE(m_picIdxReadCnt); + X265_FREE(m_analysisWrite); + X265_FREE(m_analysisRead); + + X265_FREE(m_passEnc); + } + + PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent) + { + m_id = id; + m_cliopt = cliopt; + m_parent = parent; + if(!(m_cliopt.enableScaler && m_id)) + m_input = m_cliopt.input; + m_param = cliopt.param; + m_inputOver = false; + m_lastIdx = -1; + m_encoder = NULL; + m_scaler = NULL; + m_reader = NULL; + m_ret = 0; + } + + int PassEncoder::init(int &result) + { + if (m_parent->m_numEncodes > 1) + setReuseLevel(); + + if (!(m_cliopt.enableScaler && m_id)) + m_reader = new Reader(m_id, this); + else + { + VideoDesc *src = NULL, *dst = NULL; + dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth); + int dstW = m_parent->m_passEnc[m_id - 1]->m_param->sourceWidth; + int dstH = m_parent->m_passEnc[m_id - 1]->m_param->sourceHeight; + src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth); + if (src != NULL && dst != NULL) + { + m_scaler = new Scaler(0, 1, m_id, src, dst, this); + if (!m_scaler) + { + x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler"); + result = 4; + } + } + } + + /* note: we could try to acquire a different libx265 API here based on + * the profile found during option parsing, but it must be done before + * opening an encoder */ + + if (m_param) + m_encoder = m_cliopt.api->encoder_open(m_param); + if (!m_encoder) + { + x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n"); + m_ret = 2; + return -1; + } +
View file
x265_3.4.tar.gz/source/abrEncApp.h
Added
@@ -0,0 +1,153 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* Aruna Matheswaran <aruna@multicorewareinc.com> +* +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#ifndef ABR_ENCODE_H +#define ABR_ENCODE_H + +#include "x265.h" +#include "scaler.h" +#include "threading.h" +#include "x265cli.h" + +namespace X265_NS { + // private namespace + + class PassEncoder; + class Scaler; + class Reader; + + class AbrEncoder + { + public: + uint8_t m_numEncodes; + PassEncoder **m_passEnc; + uint32_t m_queueSize; + ThreadSafeInteger m_numActiveEncodes; + + x265_picture ***m_inputPicBuffer; //[numEncodes][queueSize] + x265_analysis_data **m_analysisBuffer; //[numEncodes][queueSize] + int **m_readFlag; + + ThreadSafeInteger *m_picWriteCnt; + ThreadSafeInteger *m_picReadCnt; + ThreadSafeInteger **m_picIdxReadCnt; + ThreadSafeInteger *m_analysisWriteCnt; //[numEncodes][queueSize] + ThreadSafeInteger *m_analysisReadCnt; //[numEncodes][queueSize] + ThreadSafeInteger **m_analysisWrite; //[numEncodes][queueSize] + ThreadSafeInteger **m_analysisRead; //[numEncodes][queueSize] + + AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int& ret); + bool allocBuffers(); + void destroy(); + + }; + + class PassEncoder : public Thread + { + public: + + uint32_t m_id; + x265_param *m_param; + AbrEncoder *m_parent; + x265_encoder *m_encoder; + Reader *m_reader; + Scaler *m_scaler; + bool m_inputOver; + + int m_threadActive; + int m_lastIdx; + uint32_t m_outputNalsCount; + + x265_picture **m_inputPicBuffer; + x265_analysis_data **m_analysisBuffer; + x265_nal **m_outputNals; + x265_picture **m_outputRecon; + + CLIOptions m_cliopt; + InputFile* m_input; + const char* m_reconPlayCmd; + FILE* m_qpfile; + FILE* m_zoneFile; + FILE* m_dolbyVisionRpu;/* File containing Dolby Vision BL RPU metadata */ + + int m_ret; + + PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent); + int init(int &result); + void setReuseLevel(); + + void startThreads(); + void copyInfo(x265_analysis_data *src); + + bool readPicture(x265_picture*); + void destroy(); + + private: + void threadMain(); + }; + + class Scaler : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + int m_scalePlanes[3]; + int m_scaleFrameSize; + uint32_t m_threadId; + uint32_t m_threadTotal; + ThreadSafeInteger m_scaledWriteCnt; + VideoDesc* m_srcFormat; + VideoDesc* m_dstFormat; + int m_threadActive; + ScalerFilterManager* m_filterManager; + + Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc * dst, PassEncoder *parentEnc); + bool scalePic(x265_picture *destination, x265_picture *source); + void threadMain(); + void destroy() + { + if (m_filterManager) + { + delete m_filterManager; + m_filterManager = NULL; + } + } + }; + + class Reader : public Thread + { + public: + PassEncoder *m_parentEnc; + int m_id; + InputFile* m_input; + int m_threadActive; + + Reader(int id, PassEncoder *parentEnc); + void threadMain(); + }; +} + +#endif // ifndef ABR_ENCODE_H +#pragma once
View file
x265_3.3.tar.gz/source/common/CMakeLists.txt -> x265_3.4.tar.gz/source/common/CMakeLists.txt
Changed
@@ -14,7 +14,7 @@ endif(EXTRA_LIB) if(ENABLE_ASSEMBLY) - set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) + set_source_files_properties(threading.cpp primitives.cpp pixel.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1) list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1") endif(ENABLE_ASSEMBLY) @@ -84,16 +84,33 @@ endif(ENABLE_ASSEMBLY AND X86) if(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) - set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + if(ARM64) + if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3")) + message(STATUS "Detected CXX compiler using -O3 optimization level") + add_definitions(-DAUTO_VECTORIZE=1) + endif() + set(C_SRCS asm-primitives.cpp pixel.h ipfilter8.h) - # add ARM assembly/intrinsic files here - set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) - set(VEC_PRIMITIVES) + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S mc-a.S sad-a.S pixel-util.S ipfilter8.S) + set(VEC_PRIMITIVES) - set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") - foreach(SRC ${C_SRCS}) - set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) - endforeach() + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC}) + endforeach() + else() + set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h) + + # add ARM assembly/intrinsic files here + set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S) + set(VEC_PRIMITIVES) + + set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources") + foreach(SRC ${C_SRCS}) + set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC}) + endforeach() + endif() source_group(Assembly FILES ${ASM_PRIMITIVES}) endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM)) @@ -151,4 +168,5 @@ predict.cpp predict.h scalinglist.cpp scalinglist.h quant.cpp quant.h contexts.h - deblock.cpp deblock.h) + deblock.cpp deblock.h + scaler.cpp scaler.h)
View file
x265_3.4.tar.gz/source/common/aarch64/asm-primitives.cpp
Added
@@ -0,0 +1,219 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "common.h" +#include "primitives.h" +#include "x265.h" +#include "cpu.h" + + +#if defined(__GNUC__) +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif + +#define GCC_4_9_0 40900 +#define GCC_5_1_0 50100 + +extern "C" { +#include "pixel.h" +#include "pixel-util.h" +#include "ipfilter8.h" +} + +namespace X265_NS { +// private x265 namespace + + +template<int size> +void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY) +{ + ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]); + const int halfFilterSize = NTAPS_LUMA >> 1; + const int immedStride = MAX_CU_SIZE; + + primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1); + primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY); +} + + +/* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + asmp.pu[LUMA_8x4].luma_vsp = cp.pu[LUMA_8x4].luma_vsp; + asmp.pu[LUMA_8x8].luma_vsp = cp.pu[LUMA_8x8].luma_vsp; + asmp.pu[LUMA_8x16].luma_vsp = cp.pu[LUMA_8x16].luma_vsp; + asmp.pu[LUMA_8x32].luma_vsp = cp.pu[LUMA_8x32].luma_vsp; + asmp.pu[LUMA_12x16].luma_vsp = cp.pu[LUMA_12x16].luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + asmp.pu[LUMA_16x4].luma_vsp = cp.pu[LUMA_16x4].luma_vsp; + asmp.pu[LUMA_16x8].luma_vsp = cp.pu[LUMA_16x8].luma_vsp; + asmp.pu[LUMA_16x12].luma_vsp = cp.pu[LUMA_16x12].luma_vsp; + asmp.pu[LUMA_16x16].luma_vsp = cp.pu[LUMA_16x16].luma_vsp; + asmp.pu[LUMA_16x32].luma_vsp = cp.pu[LUMA_16x32].luma_vsp; + asmp.pu[LUMA_16x64].luma_vsp = cp.pu[LUMA_16x64].luma_vsp; + asmp.pu[LUMA_32x16].luma_vsp = cp.pu[LUMA_32x16].luma_vsp; + asmp.pu[LUMA_32x24].luma_vsp = cp.pu[LUMA_32x24].luma_vsp; + asmp.pu[LUMA_32x32].luma_vsp = cp.pu[LUMA_32x32].luma_vsp; + asmp.pu[LUMA_32x64].luma_vsp = cp.pu[LUMA_32x64].luma_vsp; + asmp.pu[LUMA_48x64].luma_vsp = cp.pu[LUMA_48x64].luma_vsp; + asmp.pu[LUMA_64x16].luma_vsp = cp.pu[LUMA_64x16].luma_vsp; + asmp.pu[LUMA_64x32].luma_vsp = cp.pu[LUMA_64x32].luma_vsp; + asmp.pu[LUMA_64x48].luma_vsp = cp.pu[LUMA_64x48].luma_vsp; + asmp.pu[LUMA_64x64].luma_vsp = cp.pu[LUMA_64x64].luma_vsp; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */ + asmp.pu[LUMA_4x4].luma_vsp = cp.pu[LUMA_4x4].luma_vsp; + asmp.pu[LUMA_4x8].luma_vsp = cp.pu[LUMA_4x8].luma_vsp; + asmp.pu[LUMA_4x16].luma_vsp = cp.pu[LUMA_4x16].luma_vsp; + asmp.pu[LUMA_24x32].luma_vsp = cp.pu[LUMA_24x32].luma_vsp; + asmp.pu[LUMA_32x8].luma_vsp = cp.pu[LUMA_32x8].luma_vsp; +#endif +#endif + } +} + + +void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) +{ + if (cpuMask & X265_CPU_NEON) + { + p.pu[LUMA_4x4].satd = PFX(pixel_satd_4x4_neon); + p.pu[LUMA_4x8].satd = PFX(pixel_satd_4x8_neon); + p.pu[LUMA_4x16].satd = PFX(pixel_satd_4x16_neon); + p.pu[LUMA_8x4].satd = PFX(pixel_satd_8x4_neon); + p.pu[LUMA_8x8].satd = PFX(pixel_satd_8x8_neon); + p.pu[LUMA_12x16].satd = PFX(pixel_satd_12x16_neon); + + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd = PFX(pixel_satd_4x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd = PFX(pixel_satd_4x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd = PFX(pixel_satd_4x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd = PFX(pixel_satd_8x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd = PFX(pixel_satd_8x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = PFX(pixel_satd_12x16_neon); + + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd = PFX(pixel_satd_4x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd = PFX(pixel_satd_4x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd = PFX(pixel_satd_4x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = PFX(pixel_satd_4x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd = PFX(pixel_satd_8x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd = PFX(pixel_satd_8x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = PFX(pixel_satd_12x32_neon); + + p.pu[LUMA_4x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x4_neon); + p.pu[LUMA_4x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x8_neon); + p.pu[LUMA_4x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x16_neon); + p.pu[LUMA_8x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x4_neon); + p.pu[LUMA_8x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x8_neon); + p.pu[LUMA_8x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x16_neon); + p.pu[LUMA_8x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x32_neon); + + p.pu[LUMA_4x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x4_neon); + p.pu[LUMA_4x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x8_neon); + p.pu[LUMA_4x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x16_neon); + p.pu[LUMA_8x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x4_neon); + p.pu[LUMA_8x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x8_neon); + p.pu[LUMA_8x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x16_neon); + p.pu[LUMA_8x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x32_neon); + + p.pu[LUMA_8x4].sad_x3 = PFX(sad_x3_8x4_neon); + p.pu[LUMA_8x8].sad_x3 = PFX(sad_x3_8x8_neon); + p.pu[LUMA_8x16].sad_x3 = PFX(sad_x3_8x16_neon); + p.pu[LUMA_8x32].sad_x3 = PFX(sad_x3_8x32_neon); + + p.pu[LUMA_8x4].sad_x4 = PFX(sad_x4_8x4_neon); + p.pu[LUMA_8x8].sad_x4 = PFX(sad_x4_8x8_neon); + p.pu[LUMA_8x16].sad_x4 = PFX(sad_x4_8x16_neon); + p.pu[LUMA_8x32].sad_x4 = PFX(sad_x4_8x32_neon); + + // quant + p.quant = PFX(quant_neon); + // luma_hps + p.pu[LUMA_4x4].luma_hps = PFX(interp_8tap_horiz_ps_4x4_neon); + p.pu[LUMA_4x8].luma_hps = PFX(interp_8tap_horiz_ps_4x8_neon); + p.pu[LUMA_4x16].luma_hps = PFX(interp_8tap_horiz_ps_4x16_neon); + p.pu[LUMA_8x4].luma_hps = PFX(interp_8tap_horiz_ps_8x4_neon); + p.pu[LUMA_8x8].luma_hps = PFX(interp_8tap_horiz_ps_8x8_neon); + p.pu[LUMA_8x16].luma_hps = PFX(interp_8tap_horiz_ps_8x16_neon); + p.pu[LUMA_8x32].luma_hps = PFX(interp_8tap_horiz_ps_8x32_neon); + p.pu[LUMA_12x16].luma_hps = PFX(interp_8tap_horiz_ps_12x16_neon); + p.pu[LUMA_24x32].luma_hps = PFX(interp_8tap_horiz_ps_24x32_neon); +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.pu[LUMA_16x4].luma_hps = PFX(interp_8tap_horiz_ps_16x4_neon); + p.pu[LUMA_16x8].luma_hps = PFX(interp_8tap_horiz_ps_16x8_neon); + p.pu[LUMA_16x12].luma_hps = PFX(interp_8tap_horiz_ps_16x12_neon); + p.pu[LUMA_16x16].luma_hps = PFX(interp_8tap_horiz_ps_16x16_neon); + p.pu[LUMA_16x32].luma_hps = PFX(interp_8tap_horiz_ps_16x32_neon); + p.pu[LUMA_16x64].luma_hps = PFX(interp_8tap_horiz_ps_16x64_neon); + p.pu[LUMA_32x8].luma_hps = PFX(interp_8tap_horiz_ps_32x8_neon); + p.pu[LUMA_32x16].luma_hps = PFX(interp_8tap_horiz_ps_32x16_neon); + p.pu[LUMA_32x24].luma_hps = PFX(interp_8tap_horiz_ps_32x24_neon); + p.pu[LUMA_32x32].luma_hps = PFX(interp_8tap_horiz_ps_32x32_neon); + p.pu[LUMA_32x64].luma_hps = PFX(interp_8tap_horiz_ps_32x64_neon); + p.pu[LUMA_48x64].luma_hps = PFX(interp_8tap_horiz_ps_48x64_neon); + p.pu[LUMA_64x16].luma_hps = PFX(interp_8tap_horiz_ps_64x16_neon); + p.pu[LUMA_64x32].luma_hps = PFX(interp_8tap_horiz_ps_64x32_neon); + p.pu[LUMA_64x48].luma_hps = PFX(interp_8tap_horiz_ps_64x48_neon); + p.pu[LUMA_64x64].luma_hps = PFX(interp_8tap_horiz_ps_64x64_neon); +#endif + + p.pu[LUMA_8x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x4>; + p.pu[LUMA_8x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x8>; + p.pu[LUMA_8x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x16>; + p.pu[LUMA_8x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x32>; + p.pu[LUMA_12x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_12x16>; +#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */ + p.pu[LUMA_16x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x4>; + p.pu[LUMA_16x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x8>; + p.pu[LUMA_16x12].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x12>; + p.pu[LUMA_16x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x16>; + p.pu[LUMA_16x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x32>; + p.pu[LUMA_16x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x64>; + p.pu[LUMA_32x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x16>; + p.pu[LUMA_32x24].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x24>; + p.pu[LUMA_32x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x32>; + p.pu[LUMA_32x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x64>; + p.pu[LUMA_48x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_48x64>;
View file
x265_3.4.tar.gz/source/common/aarch64/asm.S
Added
@@ -0,0 +1,69 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +.arch armv8-a + +#ifdef PREFIX +#define EXTERN_ASM _ +#else +#define EXTERN_ASM +#endif + +#ifdef __ELF__ +#define ELF +#else +#define ELF @ +#endif + +#define HAVE_AS_FUNC 1 + +#if HAVE_AS_FUNC +#define FUNC +#else +#define FUNC @ +#endif + +.macro function name, export=1 + .macro endfunc +ELF .size \name, . - \name +FUNC .endfunc + .purgem endfunc + .endm + .align 2 +.if \export == 1 + .global EXTERN_ASM\name +ELF .hidden EXTERN_ASM\name +ELF .type EXTERN_ASM\name, %function +FUNC .func EXTERN_ASM\name +EXTERN_ASM\name: +.else +ELF .hidden \name +ELF .type \name, %function +FUNC .func \name +\name: +.endif +.endm + + +#define FENC_STRIDE 64 +#define FDEC_STRIDE 32
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.S
Added
@@ -0,0 +1,414 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + + + +.macro qpel_filter_0_32b + movi v24.8h, #64 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v24.4h + smull2 v18.4s, v19.8h, v24.8h +.endm + +.macro qpel_filter_1_32b + movi v16.8h, #58 + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + movi v24.8h, #10 + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #17 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #5 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v4.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v0.8b + uxtl v2.8h, v3.8b + ssubl v21.4s, v2.4h, v1.4h + ssubl2 v22.4s, v2.8h, v1.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + +.macro qpel_filter_2_32b + movi v16.4s, #11 + uxtl v19.8h, v5.8b + uxtl v20.8h, v2.8b + saddl v17.4s, v19.4h, v20.4h + saddl2 v18.4s, v19.8h, v20.8h + + uxtl v21.8h, v1.8b + uxtl v22.8h, v6.8b + saddl v19.4s, v21.4h, v22.4h + saddl2 v20.4s, v21.8h, v22.8h + + mul v19.4s, v19.4s, v16.4s + mul v20.4s, v20.4s, v16.4s + + movi v16.4s, #40 + mul v17.4s, v17.4s, v16.4s + mul v18.4s, v18.4s, v16.4s + + uxtl v21.8h, v4.8b + uxtl v22.8h, v3.8b + saddl v23.4s, v21.4h, v22.4h + saddl2 v16.4s, v21.8h, v22.8h + + uxtl v1.8h, v0.8b + uxtl v2.8h, v7.8b + saddl v21.4s, v1.4h, v2.4h + saddl2 v22.4s, v1.8h, v2.8h + + shl v23.4s, v23.4s, #2 + shl v16.4s, v16.4s, #2 + + add v19.4s, v19.4s, v21.4s + add v20.4s, v20.4s, v22.4s + add v17.4s, v17.4s, v23.4s + add v18.4s, v18.4s, v16.4s + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s +.endm + +.macro qpel_filter_3_32b + movi v16.8h, #17 + movi v24.8h, #5 + + uxtl v19.8h, v5.8b + smull v17.4s, v19.4h, v16.4h + smull2 v18.4s, v19.8h, v16.8h + + uxtl v21.8h, v1.8b + smull v19.4s, v21.4h, v24.4h + smull2 v20.4s, v21.8h, v24.8h + + movi v16.8h, #58 + uxtl v23.8h, v2.8b + smull v21.4s, v23.4h, v16.4h + smull2 v22.4s, v23.8h, v16.8h + + movi v24.8h, #10 + uxtl v1.8h, v6.8b + smull v23.4s, v1.4h, v24.4h + smull2 v16.4s, v1.8h, v24.8h + + sub v17.4s, v17.4s, v19.4s + sub v18.4s, v18.4s, v20.4s + + uxtl v1.8h, v3.8b + sshll v19.4s, v1.4h, #2 + sshll2 v20.4s, v1.8h, #2 + + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s + + uxtl v1.8h, v4.8b + uxtl v2.8h, v7.8b + ssubl v21.4s, v1.4h, v2.4h + ssubl2 v22.4s, v1.8h, v2.8h + + add v17.4s, v17.4s, v19.4s + add v18.4s, v18.4s, v20.4s + sub v21.4s, v21.4s, v23.4s + sub v22.4s, v22.4s, v16.4s + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v22.4s +.endm + + + + +.macro vextin8 + ld1 {v3.16b}, [x11], #16 + mov v7.d[0], v3.d[1] + ext v0.8b, v3.8b, v7.8b, #1 + ext v4.8b, v3.8b, v7.8b, #2 + ext v1.8b, v3.8b, v7.8b, #3 + ext v5.8b, v3.8b, v7.8b, #4 + ext v2.8b, v3.8b, v7.8b, #5 + ext v6.8b, v3.8b, v7.8b, #6 + ext v3.8b, v3.8b, v7.8b, #7 +.endm + + + +// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt) +.macro HPS_FILTER a b filterhps + mov w12, #8192 + mov w6, w10 + sub x3, x3, #\a + lsl x3, x3, #1 + mov w9, #\a + cmp w9, #4 + b.eq 14f + cmp w9, #12 + b.eq 15f + b 7f +14:
View file
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.h
Added
@@ -0,0 +1,55 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_IPFILTER8_AARCH64_H +#define X265_IPFILTER8_AARCH64_H + + +void x265_interp_8tap_horiz_ps_4x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_4x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); +void x265_interp_8tap_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt); + + +#endif // ifndef X265_IPFILTER8_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/mc-a.S
Added
@@ -0,0 +1,63 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro pixel_avg_pp_4xN_neon h +function x265_pixel_avg_pp_4x\h\()_neon +.rept \h + ld1 {v0.s}[0], [x2], x3 + ld1 {v1.s}[0], [x4], x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.s}[0], [x0], x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_4xN_neon 4 +pixel_avg_pp_4xN_neon 8 +pixel_avg_pp_4xN_neon 16 + +.macro pixel_avg_pp_8xN_neon h +function x265_pixel_avg_pp_8x\h\()_neon +.rept \h + ld1 {v0.8b}, [x2], x3 + ld1 {v1.8b}, [x4], x5 + urhadd v2.8b, v0.8b, v1.8b + st1 {v2.8b}, [x0], x1 +.endr + ret +endfunc +.endm + +pixel_avg_pp_8xN_neon 4 +pixel_avg_pp_8xN_neon 8 +pixel_avg_pp_8xN_neon 16 +pixel_avg_pp_8xN_neon 32
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.S
Added
@@ -0,0 +1,419 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro x265_satd_4x8_8x4_end_neon + add v0.8h, v4.8h, v6.8h + add v1.8h, v5.8h, v7.8h + sub v2.8h, v4.8h, v6.8h + sub v3.8h, v5.8h, v7.8h + + trn1 v16.8h, v0.8h, v1.8h + trn2 v17.8h, v0.8h, v1.8h + add v4.8h, v16.8h, v17.8h + trn1 v18.8h, v2.8h, v3.8h + trn2 v19.8h, v2.8h, v3.8h + sub v5.8h, v16.8h, v17.8h + add v6.8h, v18.8h, v19.8h + sub v7.8h, v18.8h, v19.8h + trn1 v0.4s, v4.4s, v6.4s + trn2 v2.4s, v4.4s, v6.4s + abs v0.8h, v0.8h + trn1 v1.4s, v5.4s, v7.4s + trn2 v3.4s, v5.4s, v7.4s + abs v2.8h, v2.8h + abs v1.8h, v1.8h + abs v3.8h, v3.8h + umax v0.8h, v0.8h, v2.8h + umax v1.8h, v1.8h, v3.8h + add v0.8h, v0.8h, v1.8h + uaddlv s0, v0.8h +.endm + +.macro pixel_satd_4x8_neon + ld1r {v1.2s}, [x2], x3 + ld1r {v0.2s}, [x0], x1 + ld1r {v3.2s}, [x2], x3 + ld1r {v2.2s}, [x0], x1 + ld1r {v5.2s}, [x2], x3 + ld1r {v4.2s}, [x0], x1 + ld1r {v7.2s}, [x2], x3 + ld1r {v6.2s}, [x0], x1 + + ld1 {v1.s}[1], [x2], x3 + ld1 {v0.s}[1], [x0], x1 + usubl v0.8h, v0.8b, v1.8b + ld1 {v3.s}[1], [x2], x3 + ld1 {v2.s}[1], [x0], x1 + usubl v1.8h, v2.8b, v3.8b + ld1 {v5.s}[1], [x2], x3 + ld1 {v4.s}[1], [x0], x1 + usubl v2.8h, v4.8b, v5.8b + ld1 {v7.s}[1], [x2], x3 + add v4.8h, v0.8h, v1.8h + sub v5.8h, v0.8h, v1.8h + ld1 {v6.s}[1], [x0], x1 + usubl v3.8h, v6.8b, v7.8b + add v6.8h, v2.8h, v3.8h + sub v7.8h, v2.8h, v3.8h + x265_satd_4x8_8x4_end_neon +.endm + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x8_neon + pixel_satd_4x8_neon + mov w0, v0.s[0] + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x16_neon + eor w4, w4, w4 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w0, w5, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_4x32_neon + eor w4, w4, w4 +.rept 4 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 +.endr + mov w0, w4 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x16_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + + add x0, x4, #4 + add x2, x5, #4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + + add x0, x4, #8 + add x2, x5, #8 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w0, w7, w6 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_12x32_neon + mov x4, x0 + mov x5, x2 + eor w7, w7, w7 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + add x0, x4, #4 + add x2, x5, #4 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + add x0, x4, #8 + add x2, x5, #8 +.rept 4 + pixel_satd_4x8_neon + mov w6, v0.s[0] + add w7, w7, w6 +.endr + + mov w0, w7 + ret +endfunc + +// template<int w, int h> +// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2) +function x265_pixel_satd_8x8_neon + eor w4, w4, w4 + mov x6, x0 + mov x7, x2 + pixel_satd_4x8_neon + mov w5, v0.s[0] + add w4, w4, w5 + add x0, x6, #4 + add x2, x7, #4
View file
x265_3.4.tar.gz/source/common/aarch64/pixel-util.h
Added
@@ -0,0 +1,40 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Yimeng Su <yimeng.su@huawei.com> + * Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_PIXEL_UTIL_AARCH64_H +#define X265_PIXEL_UTIL_AARCH64_H + +int x265_pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); +int x265_pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2); + +uint32_t x265_quant_neon(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff); +int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride); + +#endif // ifndef X265_PIXEL_UTIL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/pixel.h
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_I386_PIXEL_AARCH64_H +#define X265_I386_PIXEL_AARCH64_H + +void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); +void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int); + +void x265_sad_x3_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); +void x265_sad_x3_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res); + +void x265_sad_x4_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); +void x265_sad_x4_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res); + +#endif // ifndef X265_I386_PIXEL_AARCH64_H
View file
x265_3.4.tar.gz/source/common/aarch64/sad-a.S
Added
@@ -0,0 +1,105 @@ +/***************************************************************************** + * Copyright (C) 2020 MulticoreWare, Inc + * + * Authors: Hongbin Liu <liuhongbin1@huawei.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#include "asm.S" + +.section .rodata + +.align 4 + +.text + +.macro SAD_X_START_8 x + ld1 {v0.8b}, [x0], x9 +.if \x == 3 + ld1 {v1.8b}, [x1], x4 + ld1 {v2.8b}, [x2], x4 + ld1 {v3.8b}, [x3], x4 +.elseif \x == 4 + ld1 {v1.8b}, [x1], x5 + ld1 {v2.8b}, [x2], x5 + ld1 {v3.8b}, [x3], x5 + ld1 {v4.8b}, [x4], x5 +.endif + uabdl v16.8h, v0.8b, v1.8b + uabdl v17.8h, v0.8b, v2.8b + uabdl v18.8h, v0.8b, v3.8b +.if \x == 4 + uabdl v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8 x + ld1 {v0.8b}, [x0], x9 +.if \x == 3 + ld1 {v1.8b}, [x1], x4 + ld1 {v2.8b}, [x2], x4 + ld1 {v3.8b}, [x3], x4 +.elseif \x == 4 + ld1 {v1.8b}, [x1], x5 + ld1 {v2.8b}, [x2], x5 + ld1 {v3.8b}, [x3], x5 + ld1 {v4.8b}, [x4], x5 +.endif + uabal v16.8h, v0.8b, v1.8b + uabal v17.8h, v0.8b, v2.8b + uabal v18.8h, v0.8b, v3.8b +.if \x == 4 + uabal v19.8h, v0.8b, v4.8b +.endif +.endm + +.macro SAD_X_8xN x, h +function x265_sad_x\x\()_8x\h\()_neon + mov x9, #FENC_STRIDE + SAD_X_START_8 \x +.rept \h - 1 + SAD_X_8 \x +.endr + uaddlv s0, v16.8h + uaddlv s1, v17.8h + uaddlv s2, v18.8h +.if \x == 4 + uaddlv s3, v19.8h +.endif + +.if \x == 3 + stp s0, s1, [x5] + str s2, [x5, #8] +.elseif \x == 4 + stp s0, s1, [x6] + stp s2, s3, [x6, #8] +.endif + ret +endfunc +.endm + +SAD_X_8xN 3 4 +SAD_X_8xN 3 8 +SAD_X_8xN 3 16 +SAD_X_8xN 3 32 + +SAD_X_8xN 4 4 +SAD_X_8xN 4 8 +SAD_X_8xN 4 16 +SAD_X_8xN 4 32
View file
x265_3.3.tar.gz/source/common/arm/asm-primitives.cpp -> x265_3.4.tar.gz/source/common/arm/asm-primitives.cpp
Changed
@@ -5,6 +5,7 @@ * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com> * Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -48,77 +49,77 @@ p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon); // addAvg - p.pu[LUMA_4x4].addAvg = PFX(addAvg_4x4_neon); - p.pu[LUMA_4x8].addAvg = PFX(addAvg_4x8_neon); - p.pu[LUMA_4x16].addAvg = PFX(addAvg_4x16_neon); - p.pu[LUMA_8x4].addAvg = PFX(addAvg_8x4_neon); - p.pu[LUMA_8x8].addAvg = PFX(addAvg_8x8_neon); - p.pu[LUMA_8x16].addAvg = PFX(addAvg_8x16_neon); - p.pu[LUMA_8x32].addAvg = PFX(addAvg_8x32_neon); - p.pu[LUMA_12x16].addAvg = PFX(addAvg_12x16_neon); - p.pu[LUMA_16x4].addAvg = PFX(addAvg_16x4_neon); - p.pu[LUMA_16x8].addAvg = PFX(addAvg_16x8_neon); - p.pu[LUMA_16x12].addAvg = PFX(addAvg_16x12_neon); - p.pu[LUMA_16x16].addAvg = PFX(addAvg_16x16_neon); - p.pu[LUMA_16x32].addAvg = PFX(addAvg_16x32_neon); - p.pu[LUMA_16x64].addAvg = PFX(addAvg_16x64_neon); - p.pu[LUMA_24x32].addAvg = PFX(addAvg_24x32_neon); - p.pu[LUMA_32x8].addAvg = PFX(addAvg_32x8_neon); - p.pu[LUMA_32x16].addAvg = PFX(addAvg_32x16_neon); - p.pu[LUMA_32x24].addAvg = PFX(addAvg_32x24_neon); - p.pu[LUMA_32x32].addAvg = PFX(addAvg_32x32_neon); - p.pu[LUMA_32x64].addAvg = PFX(addAvg_32x64_neon); - p.pu[LUMA_48x64].addAvg = PFX(addAvg_48x64_neon); - p.pu[LUMA_64x16].addAvg = PFX(addAvg_64x16_neon); - p.pu[LUMA_64x32].addAvg = PFX(addAvg_64x32_neon); - p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_neon); - p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_neon); + p.pu[LUMA_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); + p.pu[LUMA_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.pu[LUMA_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.pu[LUMA_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.pu[LUMA_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.pu[LUMA_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.pu[LUMA_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.pu[LUMA_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); + p.pu[LUMA_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); + p.pu[LUMA_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.pu[LUMA_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); + p.pu[LUMA_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.pu[LUMA_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.pu[LUMA_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); + p.pu[LUMA_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); + p.pu[LUMA_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); + p.pu[LUMA_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.pu[LUMA_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); + p.pu[LUMA_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + p.pu[LUMA_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); + p.pu[LUMA_48x64].addAvg[NONALIGNED] = PFX(addAvg_48x64_neon); + p.pu[LUMA_64x16].addAvg[NONALIGNED] = PFX(addAvg_64x16_neon); + p.pu[LUMA_64x32].addAvg[NONALIGNED] = PFX(addAvg_64x32_neon); + p.pu[LUMA_64x48].addAvg[NONALIGNED] = PFX(addAvg_64x48_neon); + p.pu[LUMA_64x64].addAvg[NONALIGNED] = PFX(addAvg_64x64_neon); // chroma addAvg - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg = PFX(addAvg_4x2_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg = PFX(addAvg_4x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg = PFX(addAvg_4x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg = PFX(addAvg_4x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg = PFX(addAvg_6x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg = PFX(addAvg_8x2_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg = PFX(addAvg_8x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg = PFX(addAvg_8x6_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg = PFX(addAvg_8x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg = PFX(addAvg_8x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg = PFX(addAvg_8x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg = PFX(addAvg_12x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg = PFX(addAvg_16x4_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg = PFX(addAvg_16x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg = PFX(addAvg_16x12_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg = PFX(addAvg_16x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg = PFX(addAvg_16x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg = PFX(addAvg_24x32_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg = PFX(addAvg_32x8_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg = PFX(addAvg_32x16_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = PFX(addAvg_32x24_neon); - p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = PFX(addAvg_32x32_neon); - - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg = PFX(addAvg_4x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg = PFX(addAvg_4x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg = PFX(addAvg_4x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg = PFX(addAvg_6x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg = PFX(addAvg_8x4_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg = PFX(addAvg_8x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg = PFX(addAvg_8x12_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg = PFX(addAvg_8x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg = PFX(addAvg_8x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg = PFX(addAvg_8x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg = PFX(addAvg_12x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg = PFX(addAvg_16x8_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = PFX(addAvg_16x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = PFX(addAvg_16x24_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = PFX(addAvg_16x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = PFX(addAvg_16x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg = PFX(addAvg_24x64_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg = PFX(addAvg_32x16_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = PFX(addAvg_32x32_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_neon); - p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg[NONALIGNED] = PFX(addAvg_4x2_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg[NONALIGNED] = PFX(addAvg_6x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg[NONALIGNED] = PFX(addAvg_8x2_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg[NONALIGNED] = PFX(addAvg_8x6_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon); + p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg[NONALIGNED] = PFX(addAvg_4x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg[NONALIGNED] = PFX(addAvg_6x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg[NONALIGNED] = PFX(addAvg_8x12_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg[NONALIGNED] = PFX(addAvg_8x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg[NONALIGNED] = PFX(addAvg_12x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg[NONALIGNED] = PFX(addAvg_16x24_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg[NONALIGNED] = PFX(addAvg_24x64_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg[NONALIGNED] = PFX(addAvg_32x48_neon); + p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon); // quant p.quant = PFX(quant_neon); @@ -402,7 +403,7 @@ p.scale2D_64to32 = PFX(scale2D_64to32_neon); // scale1D_128to64 - p.scale1D_128to64 = PFX(scale1D_128to64_neon); + p.scale1D_128to64[NONALIGNED] = PFX(scale1D_128to64_neon); // copy_count p.cu[BLOCK_4x4].copy_cnt = PFX(copy_cnt_4_neon); @@ -411,37 +412,37 @@ p.cu[BLOCK_32x32].copy_cnt = PFX(copy_cnt_32_neon); // filterPixelToShort - p.pu[LUMA_4x4].convert_p2s = PFX(filterPixelToShort_4x4_neon); - p.pu[LUMA_4x8].convert_p2s = PFX(filterPixelToShort_4x8_neon); - p.pu[LUMA_4x16].convert_p2s = PFX(filterPixelToShort_4x16_neon); - p.pu[LUMA_8x4].convert_p2s = PFX(filterPixelToShort_8x4_neon); - p.pu[LUMA_8x8].convert_p2s = PFX(filterPixelToShort_8x8_neon); - p.pu[LUMA_8x16].convert_p2s = PFX(filterPixelToShort_8x16_neon); - p.pu[LUMA_8x32].convert_p2s = PFX(filterPixelToShort_8x32_neon); - p.pu[LUMA_12x16].convert_p2s = PFX(filterPixelToShort_12x16_neon); - p.pu[LUMA_16x4].convert_p2s = PFX(filterPixelToShort_16x4_neon); - p.pu[LUMA_16x8].convert_p2s = PFX(filterPixelToShort_16x8_neon); - p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_neon); - p.pu[LUMA_16x16].convert_p2s = PFX(filterPixelToShort_16x16_neon); - p.pu[LUMA_16x32].convert_p2s = PFX(filterPixelToShort_16x32_neon); - p.pu[LUMA_16x64].convert_p2s = PFX(filterPixelToShort_16x64_neon); - p.pu[LUMA_24x32].convert_p2s = PFX(filterPixelToShort_24x32_neon); - p.pu[LUMA_32x8].convert_p2s = PFX(filterPixelToShort_32x8_neon); - p.pu[LUMA_32x16].convert_p2s = PFX(filterPixelToShort_32x16_neon); - p.pu[LUMA_32x24].convert_p2s = PFX(filterPixelToShort_32x24_neon); - p.pu[LUMA_32x32].convert_p2s = PFX(filterPixelToShort_32x32_neon); - p.pu[LUMA_32x64].convert_p2s = PFX(filterPixelToShort_32x64_neon); - p.pu[LUMA_48x64].convert_p2s = PFX(filterPixelToShort_48x64_neon); - p.pu[LUMA_64x16].convert_p2s = PFX(filterPixelToShort_64x16_neon); - p.pu[LUMA_64x32].convert_p2s = PFX(filterPixelToShort_64x32_neon); - p.pu[LUMA_64x48].convert_p2s = PFX(filterPixelToShort_64x48_neon); - p.pu[LUMA_64x64].convert_p2s = PFX(filterPixelToShort_64x64_neon); + p.pu[LUMA_4x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x4_neon); + p.pu[LUMA_4x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x8_neon); + p.pu[LUMA_4x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x16_neon); + p.pu[LUMA_8x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x4_neon); + p.pu[LUMA_8x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x8_neon); + p.pu[LUMA_8x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x16_neon); + p.pu[LUMA_8x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x32_neon);
View file
x265_3.3.tar.gz/source/common/common.h -> x265_3.4.tar.gz/source/common/common.h
Changed
@@ -129,6 +129,7 @@ typedef uint64_t sum2_t; typedef uint64_t pixel4; typedef int64_t ssum2_t; +#define SHIFT_TO_BITPLANE 9 #define HISTOGRAM_BINS 1024 #else typedef uint8_t pixel; @@ -136,6 +137,7 @@ typedef uint32_t sum2_t; typedef uint32_t pixel4; typedef int32_t ssum2_t; // Signed sum +#define SHIFT_TO_BITPLANE 7 #define HISTOGRAM_BINS 256 #endif // if HIGH_BIT_DEPTH @@ -270,6 +272,9 @@ #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE) #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE) +#define RDCOST_BASED_RSKIP 1 +#define EDGE_BASED_RSKIP 2 + #define COEF_REMAIN_BIN_REDUCTION 3 // indicates the level at which the VLC // transitions from Golomb-Rice to TU+EG(k)
View file
x265_3.3.tar.gz/source/common/cpu.cpp -> x265_3.4.tar.gz/source/common/cpu.cpp
Changed
@@ -5,6 +5,8 @@ * Laurent Aimar <fenrir@via.ecp.fr> * Fiona Glaser <fiona@x264.com> * Steve Borho <steve@borho.org> + * Hongbin Liu <liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -367,6 +369,8 @@ flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) +#elif X265_ARCH_ARM64 + flags |= X265_CPU_NEON; #endif // if HAVE_ARMV6 return flags; }
View file
x265_3.3.tar.gz/source/common/frame.cpp -> x265_3.4.tar.gz/source/common/frame.cpp
Changed
@@ -61,6 +61,8 @@ m_edgePic = NULL; m_gaussianPic = NULL; m_thetaPic = NULL; + m_edgeBitPlane = NULL; + m_edgeBitPic = NULL; } bool Frame::create(x265_param *param, float* quantOffsets) @@ -115,6 +117,19 @@ m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2))); } + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize; + uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize; + uint32_t lumaMarginX = param->maxCUSize + 32; + uint32_t lumaMarginY = param->maxCUSize + 16; + uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1); + uint32_t maxHeight = numCuInHeight * param->maxCUSize; + uint32_t bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2)); + CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, bitPlaneSize); + m_edgeBitPic = m_edgeBitPlane + lumaMarginY * stride + lumaMarginX; + } + if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize)) { X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized"); @@ -267,4 +282,10 @@ X265_FREE(m_gaussianPic); X265_FREE(m_thetaPic); } + + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + X265_FREE_ZERO(m_edgeBitPlane); + m_edgeBitPic = NULL; + } }
View file
x265_3.3.tar.gz/source/common/frame.h -> x265_3.4.tar.gz/source/common/frame.h
Changed
@@ -99,7 +99,7 @@ float* m_quantOffsets; // points to quantOffsets in x265_picture x265_sei m_userSEI; uint32_t m_picStruct; // picture structure SEI message - x265_dolby_vision_rpu m_rpu; + x265_dolby_vision_rpu m_rpu; /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */ ThreadSafeInteger* m_reconRowFlag; // flag of CTU rows completely reconstructed and extended for motion reference @@ -137,6 +137,10 @@ pixel* m_gaussianPic; pixel* m_thetaPic; + /* edge bit plane for rskips 2 and 3 */ + pixel* m_edgeBitPlane; + pixel* m_edgeBitPic; + Frame(); bool create(x265_param *param, float* quantOffsets);
View file
x265_3.3.tar.gz/source/common/param.cpp -> x265_3.4.tar.gz/source/common/param.cpp
Changed
@@ -198,7 +198,8 @@ param->bEnableWeightedPred = 1; param->bEnableWeightedBiPred = 0; param->bEnableEarlySkip = 1; - param->bEnableRecursionSkip = 1; + param->recursionSkipMode = 1; + param->edgeVarThreshold = 0.05f; param->bEnableAMP = 0; param->bEnableRectInter = 0; param->rdLevel = 3; @@ -285,6 +286,7 @@ param->rc.bEnableConstVbv = 0; param->bResetZoneConfig = 1; param->reconfigWindowSize = 0; + param->decoderVbvMaxRate = 0; /* Video Usability Information (VUI) */ param->vui.aspectRatioIdc = 0; @@ -546,7 +548,7 @@ param->maxNumMergeCand = 5; param->searchMethod = X265_STAR_SEARCH; param->bEnableTransformSkip = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->maxNumReferences = 5; param->limitReferences = 0; param->lookaheadSlices = 0; // disabled for best quality @@ -598,7 +600,7 @@ param->rc.hevcAq = 0; param->rc.qpStep = 1; param->rc.bEnableGrain = 1; - param->bEnableRecursionSkip = 0; + param->recursionSkipMode = 0; param->psyRd = 4.0; param->psyRdoq = 10.0; param->bEnableSAO = 0; @@ -702,8 +704,9 @@ OPT("ref") p->maxNumReferences = atoi(value); OPT("fast-intra") p->bEnableFastIntra = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); - OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError); + OPT("rskip") p->recursionSkipMode = atoi(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; + OPT("me") p->searchMethod = parseName(value, x265_motion_est_names, bError); OPT("subme") p->subpelRefine = atoi(value); OPT("merange") p->searchRange = atoi(value); OPT("rect") p->bEnableRectInter = atobool(value); @@ -919,7 +922,7 @@ OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value); OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value); OPT("early-skip") p->bEnableEarlySkip = atobool(value); - OPT("rskip") p->bEnableRecursionSkip = atobool(value); + OPT("rskip") p->recursionSkipMode = atoi(value); OPT("rdpenalty") p->rdPenalty = atoi(value); OPT("tskip") p->bEnableTransformSkip = atobool(value); OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value); @@ -1221,6 +1224,7 @@ } } OPT("hist-threshold") p->edgeTransitionThreshold = atof(value); + OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f; OPT("lookahead-threads") p->lookaheadThreads = atoi(value); OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value); OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value); @@ -1596,9 +1600,16 @@ CHECK(param->rdLevel < 1 || param->rdLevel > 6, "RD Level is out of range"); CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2, - "RDOQ Level is out of range"); + "RDOQ Level is out of range"); CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH, - "Dynamic RD strength must be between 0 and 4"); + "Dynamic RD strength must be between 0 and 4"); + CHECK(param->recursionSkipMode > 2 || param->recursionSkipMode < 0, + "Invalid Recursion skip mode. Valid modes 0,1,2"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + { + CHECK(param->edgeVarThreshold < 0.0f || param->edgeVarThreshold > 1.0f, + "Minimum edge density percentage for a CU should be an integer between 0 to 100"); + } CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead, "Lookahead depth must be greater than the max consecutive bframe count"); CHECK(param->bframes < 0, @@ -1789,6 +1800,7 @@ } CHECK(param->confWinRightOffset < 0, "Conformance Window Right Offset must be 0 or greater"); CHECK(param->confWinBottomOffset < 0, "Conformance Window Bottom Offset must be 0 or greater"); + CHECK(param->decoderVbvMaxRate < 0, "Invalid Decoder Vbv Maxrate. Value can not be less than zero"); return check_failed; } @@ -1908,7 +1920,9 @@ TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf"); TOOLOPT(param->bEnableRdRefine, "rd-refine"); TOOLOPT(param->bEnableEarlySkip, "early-skip"); - TOOLOPT(param->bEnableRecursionSkip, "rskip"); + TOOLVAL(param->recursionSkipMode, "rskip mode=%d"); + if (param->recursionSkipMode == EDGE_BASED_RSKIP) + TOOLVAL(param->edgeVarThreshold, "rskip-edge-threshold=%.2f"); TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip"); TOOLVAL(param->noiseReductionIntra, "nr-intra=%d"); TOOLVAL(param->noiseReductionInter, "nr-inter=%d"); @@ -2066,7 +2080,10 @@ s += sprintf(s, " rd=%d", p->rdLevel); s += sprintf(s, " selective-sao=%d", p->selectiveSAO); BOOL(p->bEnableEarlySkip, "early-skip"); - BOOL(p->bEnableRecursionSkip, "rskip"); + BOOL(p->recursionSkipMode, "rskip"); + if (p->recursionSkipMode == EDGE_BASED_RSKIP) + s += sprintf(s, " rskip-edge-threshold=%f", p->edgeVarThreshold); + BOOL(p->bEnableFastIntra, "fast-intra"); BOOL(p->bEnableTSkipFast, "tskip-fast"); BOOL(p->bCULossless, "cu-lossless"); @@ -2204,6 +2221,7 @@ if (p->bEnableSceneCutAwareQp) s += sprintf(s, " scenecut-window=%d max-qp-delta=%d", p->scenecutWindow, p->maxQpDelta); s += sprintf(s, "conformance-window-offsets right=%d bottom=%d", p->confWinRightOffset, p->confWinBottomOffset); + s += sprintf(s, " decoder-max-rate=%d", p->decoderVbvMaxRate); #undef BOOL return buf; } @@ -2373,7 +2391,8 @@ dst->bSaoNonDeblocked = src->bSaoNonDeblocked; dst->rdLevel = src->rdLevel; dst->bEnableEarlySkip = src->bEnableEarlySkip; - dst->bEnableRecursionSkip = src->bEnableRecursionSkip; + dst->recursionSkipMode = src->recursionSkipMode; + dst->edgeVarThreshold = src->edgeVarThreshold; dst->bEnableFastIntra = src->bEnableFastIntra; dst->bEnableTSkipFast = src->bEnableTSkipFast; dst->bCULossless = src->bCULossless; @@ -2419,8 +2438,9 @@ dst->rc.zonefileCount = src->rc.zonefileCount; dst->reconfigWindowSize = src->reconfigWindowSize; dst->bResetZoneConfig = src->bResetZoneConfig; + dst->decoderVbvMaxRate = src->decoderVbvMaxRate; - if (src->rc.zonefileCount && src->rc.zones) + if (src->rc.zonefileCount && src->rc.zones && src->bResetZoneConfig) { for (int i = 0; i < src->rc.zonefileCount; i++) {
View file
x265_3.3.tar.gz/source/common/pixel.cpp -> x265_3.4.tar.gz/source/common/pixel.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <min.chen@multicorewareinc.com> + * Hongbin Liu<liuhongbin1@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -265,6 +266,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 4) satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -279,6 +284,10 @@ { int satd = 0; +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 + pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon; +#endif + for (int row = 0; row < h; row += 4) for (int col = 0; col < w; col += 8) satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1, @@ -876,6 +885,18 @@ } } +static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift) +{ + for (int r = 0; r < height; r++) + { + for (int c = 0; c < width; c++) + dst[c] = (pixel)((src[c] >> shift)); + + dst += dstStride; + src += srcStride; + } +} + static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask) { for (int r = 0; r < height; r++) @@ -1316,6 +1337,7 @@ p.planecopy_cp = planecopy_cp_c; p.planecopy_sp = planecopy_sp_c; p.planecopy_sp_shl = planecopy_sp_shl_c; + p.planecopy_pp_shr = planecopy_pp_shr_c; #if HIGH_BIT_DEPTH p.planeClipAndMax = planeClipAndMax_c; #endif
View file
x265_3.3.tar.gz/source/common/primitives.h -> x265_3.4.tar.gz/source/common/primitives.h
Changed
@@ -8,6 +8,8 @@ * Rajesh Paulraj <rajesh@multicorewareinc.com> * Praveen Kumar Tiwari <praveen@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Hongbin Liu<liuhongbin1@huawei.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -204,6 +206,7 @@ typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX); typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask); +typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift); typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix); typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len); @@ -358,6 +361,7 @@ planecopy_cp_t planecopy_cp; planecopy_sp_t planecopy_sp; planecopy_sp_t planecopy_sp_shl; + planecopy_pp_t planecopy_pp_shr; planeClipAndMax_t planeClipAndMax; weightp_sp_t weight_sp; @@ -465,6 +469,9 @@ void setupInstrinsicPrimitives(EncoderPrimitives &p, int cpuMask); void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask); void setupAliasPrimitives(EncoderPrimitives &p); +#if X265_ARCH_ARM64 +void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask); +#endif #if HAVE_ALTIVEC void setupPixelPrimitives_altivec(EncoderPrimitives &p); void setupDCTPrimitives_altivec(EncoderPrimitives &p); @@ -479,4 +486,10 @@ extern const char* PFX(build_info_str); #endif +#if ENABLE_ASSEMBLY && X265_ARCH_ARM64 +extern "C" { +#include "aarch64/pixel-util.h" +} +#endif + #endif // ifndef X265_PRIMITIVES_H
View file
x265_3.4.tar.gz/source/common/scaler.cpp
Added
@@ -0,0 +1,1110 @@ +/***************************************************************************** +* Copyright (C) 2013-2020 MulticoreWare, Inc +* +* Authors: Pooja Venkatesan <pooja@multicorewareinc.com> +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software +* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. +* +* This program is also available under a commercial proprietary license. +* For more information, contact us at license @ x265.com. +*****************************************************************************/ + +#include "scaler.h" + +#if _MSC_VER +#pragma warning(disable: 4706) // assignment within conditional +#pragma warning(disable: 4244) // '=' : possible loss of data +#endif + +#define SHORT_MIN (-(1 << 15)) +#define SHORT_MAX ((1 << 15) - 1) +#define SHORT_MAX_10 ((1 << 10) - 1) + +namespace X265_NS{ + +ScalerFilterManager::ScalerFilterManager() : + m_bitDepth(0), + m_algorithmFlags(0), + m_srcW(0), + m_srcH(0), + m_dstW(0), + m_dstH(0), + m_crSrcW(0), + m_crSrcH(0), + m_crDstW(0), + m_crDstH(0), + m_crSrcHSubSample(0), + m_crSrcVSubSample(0), + m_crDstHSubSample(0), + m_crDstVSubSample(0) +{ + for (int i = 0; i < m_numSlice; i++) + m_slices[i] = NULL; + for (int i = 0; i < m_numFilter; i++) + m_ScalerFilters[i] = NULL; +} + +inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size) +{ + for (int i = 0; i < size; i++) + filter2[i] = filter[i]; +} + +#if X265_DEPTH == 8 +static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPos[i]; + for (int j = 0; j < filterSize; j++) + val += ((int)src[sourcePos + j]) * filter[filterSize * i + j]; + // the cubic equation does overflow ... + dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7); + } +} +static uint8_t clipUint8(int a) +{ + if (a&(~0xFF)) + return (-a) >> 31; + else + return a; +} + +static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 64 << 12; + for (int j = 0; j < filterSize; j++) + val += src[j][i] * filter[j]; + dest[i] = clipUint8(val >> 19); + } +} +#else +static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + for (int i = 0; i < dstW; i++) + { + int val = 1 << 16; + uint16_t* dst16bit = (uint16_t *)dest; + for (int j = 0; j < filterSize; j++) + val += src[j][i] * filter[j]; + uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17); + ((uint8_t*)(&dst16bit[i]))[0] = (d); + ((uint8_t*)(&dst16bit[i]))[1] = (d) >> 8; + } +} +static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) +{ + const uint16_t *srcLocal = (const uint16_t *)src; + for (int i = 0; i < dstW; i++) + { + int val = 0; + int sourcePos = filterPos[i]; + for (int j = 0; j < filterSize; j++) + val += ((int)srcLocal[sourcePos + j]) * filter[filterSize * i + j]; + // the cubic equation does overflow + dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9); + } +} +#endif + +ScalerFilter::ScalerFilter() : + m_filtLen(0), + m_filtPos(NULL), + m_filt(NULL), + m_sourceSlice(NULL), + m_destSlice(NULL) +{ +} + +ScalerFilter::~ScalerFilter() +{ + if (m_filtPos) { + delete[] m_filtPos; m_filtPos = NULL; + } + if (m_filt) { + delete[] m_filt; m_filt = NULL; + } +} + +void ScalerHLumFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src = m_sourceSlice->m_plane[0].lineBuf; + uint8_t ** dst = m_destSlice->m_plane[0].lineBuf; + int sourcePos = sliceVer - m_sourceSlice->m_plane[0].sliceVer; + int destPos = sliceVer - m_destSlice->m_plane[0].sliceVer; + int dstW = m_destSlice->m_width; + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dst[destPos + i], dstW, (const uint8_t *)src[sourcePos + i], m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane[0].sliceHor += 1; + } +} + +void ScalerHCrFilter::process(int sliceVer, int sliceHor) +{ + uint8_t ** src1 = m_sourceSlice->m_plane[1].lineBuf; + uint8_t ** dst1 = m_destSlice->m_plane[1].lineBuf; + uint8_t ** src2 = m_sourceSlice->m_plane[2].lineBuf; + uint8_t ** dst2 = m_destSlice->m_plane[2].lineBuf; + + int sourcePos1 = sliceVer - m_sourceSlice->m_plane[1].sliceVer; + int destPos1 = sliceVer - m_destSlice->m_plane[1].sliceVer; + int sourcePos2 = sliceVer - m_sourceSlice->m_plane[2].sliceVer; + int destPos2 = sliceVer - m_destSlice->m_plane[2].sliceVer; + + int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample; + + for (int i = 0; i < sliceHor; ++i) + { + m_hFilterScaler->doScaling((int16_t*)dst1[destPos1 + i], dstW, src1[sourcePos1 + i], m_filt, m_filtPos, m_filtLen); + m_hFilterScaler->doScaling((int16_t*)dst2[destPos2 + i], dstW, src2[sourcePos2 + i], m_filt, m_filtPos, m_filtLen); + m_destSlice->m_plane[1].sliceHor += 1; + m_destSlice->m_plane[2].sliceHor += 1; + } +} + +void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF; + + (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4); + (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4); + +#if X265_DEPTH == 8 + yuv2PlaneX_c(filter, filterSize, src, dest, dstW); +#else + yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW); +#endif +} + +void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) +{ + int IdxW = FACTOR_4; + int IdxF = FIL_DEF;
View file
x265_3.4.tar.gz/source/common/scaler.h
Added
@@ -0,0 +1,254 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Pooja Venkatesan <pooja@multicorewareinc.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ + +#ifndef X265_SCALER_H +#define X265_SCALER_H + +#include "common.h" + +namespace X265_NS { +//x265 private namespace + +class ScalerSlice; +class VideoDesc; + +#define MAX_NUM_LINES_AHEAD 4 +#define SCALER_ALIGN(x, j) (((x)+(j)-1)&~((j)-1)) +#define X265_ABS(j) ((j) >= 0 ? (j) : (-(j))) +#define SCALER_MAX_REDUCE_CUTOFF 0.002 +#define SCALER_BITEXACT 0x80000 +#define ROUNDED_DIVISION(i,j) (((i)>0 ? (i) + ((j)>>1) : (i) - ((j)>>1))/(j)) +#define UH_CEIL_SHIFTR(i,j) (!scale_builtin_constant_p(j) ? -((-(i)) >> (j)) \ + : ((i) + (1<<(j)) - 1) >> (j)) + +#if defined(__GNUC__) || defined(__clang__) +# define scale_builtin_constant_p __builtin_constant_p +#else +# define scale_builtin_constant_p(x) 0 +#endif + +enum ResFactor +{ + RES_FACTOR_64, RES_FACTOR_32, RES_FACTOR_16, RES_FACTOR_8, + RES_FACTOR_4, RES_FACTOR_DEF, NUM_RES_FACTOR +}; + +enum ScalerFactor +{ + FACTOR_4, FACTOR_8, NUM_FACTOR +}; + +enum FilterSize +{ + FIL_4, FIL_6, FIL_8, FIL_9, FIL_10, FIL_11, FIL_13, FIL_15, + FIL_16, FIL_17, FIL_19, FIL_22, FIL_24, FIL_DEF, NUM_FIL +}; + +class ScalerFilter { +public: + int m_filtLen; + int32_t* m_filtPos; // Array of horizontal/vertical starting pos for each dst for luma / chroma planes. + int16_t* m_filt; // Array of horizontal/vertical filter coefficients for luma / chroma planes. + ScalerSlice* m_sourceSlice; // Source slice + ScalerSlice* m_destSlice; // Output slice + ScalerFilter(); + virtual ~ScalerFilter(); + virtual void process(int sliceVer, int sliceHor) = 0; + int initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos); + void setSlice(ScalerSlice* source, ScalerSlice* dest) { m_sourceSlice = source; m_destSlice = dest; } +}; + +class VideoDesc { +public: + int m_width; + int m_height; + int m_csp; + int m_inputDepth; + + VideoDesc(int w, int h, int csp, int bitDepth) + { + m_width = w; + m_height = h; + m_csp = csp; + m_inputDepth = bitDepth; + } +}; + +typedef struct ScalerPlane +{ + int availLines; // max number of lines that can be held by this plane + int sliceVer; // index of first line + int sliceHor; // number of lines + uint8_t** lineBuf; // line buffer +} ScalerPlane; + +// Assist horizontal filtering, base class +class HFilterScaler { +public: + int m_bitDepth; +public: + HFilterScaler() :m_bitDepth(0) {}; + virtual ~HFilterScaler() {}; + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) = 0; +}; + +// Assist vertical filtering, base class +class VFilterScaler { +public: + int m_bitDepth; +public: + VFilterScaler() :m_bitDepth(0) {}; + virtual ~VFilterScaler() {}; + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) = 0; +}; + +// Assist horizontal filtering, process 8 bit case +class HFilterScaler8Bit : public HFilterScaler { +public: + HFilterScaler8Bit() { m_bitDepth = 8; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist horizontal filtering, process 10 bit case +class HFilterScaler10Bit : public HFilterScaler { +public: + HFilterScaler10Bit() { m_bitDepth = 10; } + virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize); +}; + +// Assist vertical filtering, process 8 bit case +class VFilterScaler8Bit : public VFilterScaler { +public: + VFilterScaler8Bit() { m_bitDepth = 8; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Assist vertical filtering, process 10 bit case +class VFilterScaler10Bit : public VFilterScaler { +public: + VFilterScaler10Bit() { m_bitDepth = 10; } + virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW); +}; + +// Horizontal filter for luma +class ScalerHLumFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHLumFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHLumFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Horizontal filter for chroma +class ScalerHCrFilter : public ScalerFilter { +private: + HFilterScaler* m_hFilterScaler; +public: + ScalerHCrFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;} + ~ScalerHCrFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for luma +class ScalerVLumFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVLumFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVLumFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +// Vertical filter for chroma +class ScalerVCrFilter : public ScalerFilter { +private: + VFilterScaler* m_vFilterScaler; +public: + ScalerVCrFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;} + ~ScalerVCrFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); } + virtual void process(int sliceVer, int sliceHor); +}; + +class ScalerSlice +{ +private: + enum ScalerSlicePlaneNum { m_numSlicePlane = 4 }; +public: + int m_width; // Slice line width + int m_hCrSubSample; // horizontal Chroma subsampling factor
View file
x265_3.3.tar.gz/source/common/threading.h -> x265_3.4.tar.gz/source/common/threading.h
Changed
@@ -238,6 +238,14 @@ LeaveCriticalSection(&m_cs); } + void decr() + { + EnterCriticalSection(&m_cs); + m_val--; + WakeAllConditionVariable(&m_cv); + LeaveCriticalSection(&m_cs); + } + protected: CRITICAL_SECTION m_cs; @@ -436,6 +444,14 @@ pthread_mutex_unlock(&m_mutex); } + void decr() + { + pthread_mutex_lock(&m_mutex); + m_val--; + pthread_cond_broadcast(&m_cond); + pthread_mutex_unlock(&m_mutex); + } + protected: pthread_mutex_t m_mutex;
View file
x265_3.3.tar.gz/source/encoder/analysis.cpp -> x265_3.4.tar.gz/source/encoder/analysis.cpp
Changed
@@ -1272,7 +1272,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1296,7 +1296,7 @@ md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp); checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom); - skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode; + skipRecursion = !!m_param->recursionSkipMode && md.bestMode; if (m_param->rdLevel) skipModes = m_param->bEnableEarlySkip && md.bestMode; } @@ -1314,15 +1314,23 @@ skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2) && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth } - if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) + if (md.bestMode && m_param->recursionSkipMode && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1]))) { skipRecursion = md.bestMode->cu.isSkipped(0); - if (mightSplit && depth >= minDepth && !skipRecursion) + if (mightSplit && !skipRecursion) { - if (depth) - skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); - if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + if (depth >= minDepth && m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + if (depth) + skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode); + if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE) + skipRecursion = complexityCheckCU(*md.bestMode); + } + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { skipRecursion = complexityCheckCU(*md.bestMode); + } + } } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) @@ -1972,7 +1980,7 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } if (m_param->analysisLoadReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N) @@ -1996,7 +2004,7 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); } } @@ -2015,8 +2023,10 @@ checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks); checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth); - if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode) + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP && depth && m_modeDepth[depth - 1].bestMode) skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0); + else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + skipRecursion = md.bestMode && complexityCheckCU(*md.bestMode); } if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7) skipRecursion = true; @@ -3525,27 +3535,47 @@ bool Analysis::complexityCheckCU(const Mode& bestMode) { - uint32_t mean = 0; - uint32_t homo = 0; - uint32_t cuSize = bestMode.fencYuv->m_size; - for (uint32_t y = 0; y < cuSize; y++) { - for (uint32_t x = 0; x < cuSize; x++) { - mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]); + if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP) + { + uint32_t mean = 0; + uint32_t homo = 0; + uint32_t cuSize = bestMode.fencYuv->m_size; + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]); + } } - } - mean = mean / (cuSize * cuSize); - for (uint32_t y = 0 ; y < cuSize; y++){ - for (uint32_t x = 0 ; x < cuSize; x++){ - homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean)); + mean = mean / (cuSize * cuSize); + for (uint32_t y = 0; y < cuSize; y++) { + for (uint32_t x = 0; x < cuSize; x++) { + homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean)); + } } - } - homo = homo / (cuSize * cuSize); + homo = homo / (cuSize * cuSize); - if (homo < (.1 * mean)) - return true; + if (homo < (.1 * mean)) + return true; - return false; -} + return false; + } + else + { + int blockType = bestMode.cu.m_log2CUSize[0] - LOG2_UNIT_SIZE; + int shift = bestMode.cu.m_log2CUSize[0] * LOG2_UNIT_SIZE; + intptr_t stride = m_frame->m_fencPic->m_stride; + intptr_t blockOffsetLuma = bestMode.cu.m_cuPelX + bestMode.cu.m_cuPelY * stride; + uint64_t sum_ss = primitives.cu[blockType].var(m_frame->m_edgeBitPic + blockOffsetLuma, stride); + uint32_t sum = (uint32_t)sum_ss; + uint32_t ss = (uint32_t)(sum_ss >> 32); + uint32_t pixelCount = 1 << shift; + double cuEdgeVariance = (ss - ((double)sum * sum / pixelCount)) / pixelCount; + + if (cuEdgeVariance > (double)m_param->edgeVarThreshold) + return false; + else + return true; + } + } uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom) { @@ -3570,7 +3600,6 @@ cnt++; } } - return cuVariance / cnt; }
View file
x265_3.3.tar.gz/source/encoder/analysis.h -> x265_3.4.tar.gz/source/encoder/analysis.h
Changed
@@ -52,7 +52,7 @@ splitRefs = 0; mvCost[0] = 0; // L0 mvCost[1] = 0; // L1 - sa8dCost = 0; + sa8dCost = 0; } }; @@ -120,7 +120,6 @@ Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext); int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU); - protected: /* Analysis data for save/load mode, writes/reads data based on absPartIdx */ x265_analysis_inter_data* m_reuseInterDataCTU;
View file
x265_3.3.tar.gz/source/encoder/api.cpp -> x265_3.4.tar.gz/source/encoder/api.cpp
Changed
@@ -1016,12 +1016,12 @@ void x265_zone_free(x265_param *param) { - if (param && param->rc.zonefileCount) { + if (param && param->rc.zones && (param->rc.zoneCount || param->rc.zonefileCount)) + { for (int i = 0; i < param->rc.zonefileCount; i++) x265_free(param->rc.zones[i].zoneParam); - } - if (param && (param->rc.zoneCount || param->rc.zonefileCount)) x265_free(param->rc.zones); + } } static const x265_api libapi = @@ -1294,6 +1294,8 @@ fprintf(csvfp, "RateFactor, "); if (param->rc.vbvBufferSize) fprintf(csvfp, "BufferFill, BufferFillFinal, "); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(csvfp, "UnclippedBufferFillFinal, "); if (param->bEnablePsnr) fprintf(csvfp, "Y PSNR, U PSNR, V PSNR, YUV PSNR, "); if (param->bEnableSsim) @@ -1405,6 +1407,8 @@ fprintf(param->csvfpt, "%.3lf,", frameStats->rateFactor); if (param->rc.vbvBufferSize) fprintf(param->csvfpt, "%.3lf, %.3lf,", frameStats->bufferFill, frameStats->bufferFillFinal); + if (param->rc.vbvBufferSize && param->csvLogLevel >= 2) + fprintf(param->csvfpt, "%.3lf,", frameStats->unclippedBufferFillFinal); if (param->bEnablePsnr) fprintf(param->csvfpt, "%.3lf, %.3lf, %.3lf, %.3lf,", frameStats->psnrY, frameStats->psnrU, frameStats->psnrV, frameStats->psnr); if (param->bEnableSsim)
View file
x265_3.3.tar.gz/source/encoder/encoder.cpp -> x265_3.4.tar.gz/source/encoder/encoder.cpp
Changed
@@ -218,10 +218,7 @@ if (m_param->bHistBasedSceneCut) { - for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; i++) - { - m_planeSizes[i] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]); - } + m_planeSizes[0] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[0]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[0]); uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1; m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes); m_edgeHistThreshold = m_param->edgeTransitionThreshold; @@ -1443,9 +1440,9 @@ int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes; memset(m_edgePic, 0, bufSize); - if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false)) + if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1)) { - x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!"); + x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!"); return false; } @@ -1605,6 +1602,14 @@ if (m_param->bHistBasedSceneCut && pic_in) { x265_picture *pic = (x265_picture *) pic_in; + + if (pic->poc == 0) + { + /* for entire encode compute the chroma plane sizes only once */ + for (int i = 1; i < x265_cli_csps[m_param->internalCsp].planes; i++) + m_planeSizes[i] = (pic->width >> x265_cli_csps[m_param->internalCsp].width[i]) * (pic->height >> x265_cli_csps[m_param->internalCsp].height[i]); + } + if (computeHistograms(pic)) { double maxUVSad = 0.0, edgeSad = 0.0; @@ -1752,6 +1757,12 @@ } } } + if (m_param->recursionSkipMode == EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut) + { + pixel* src = m_edgePic; + primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, inFrame->m_edgeBitPic, inFrame->m_fencPic->m_stride, + inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0); + } } else { @@ -2414,7 +2425,7 @@ encParam->maxNumReferences = param->maxNumReferences; // never uses more refs than specified in stream headers encParam->bEnableFastIntra = param->bEnableFastIntra; encParam->bEnableEarlySkip = param->bEnableEarlySkip; - encParam->bEnableRecursionSkip = param->bEnableRecursionSkip; + encParam->recursionSkipMode = param->recursionSkipMode; encParam->searchMethod = param->searchMethod; /* Scratch buffer prevents me_range from being increased for esa/tesa */ if (param->searchRange < encParam->searchRange) @@ -3006,6 +3017,8 @@ frameStats->ipCostRatio = curFrame->m_lowres.ipCostRatio; frameStats->bufferFill = m_rateControl->m_bufferFillActual; frameStats->bufferFillFinal = m_rateControl->m_bufferFillFinal; + if (m_param->csvLogLevel >= 2) + frameStats->unclippedBufferFillFinal = m_rateControl->m_unclippedBufferFillFinal; frameStats->frameLatency = inPoc - poc; if (m_param->rc.rateControlMode == X265_RC_CRF) frameStats->rateFactor = curEncData.m_rateFactor; @@ -3400,7 +3413,7 @@ p->maxNumReferences = zone->maxNumReferences; p->bEnableFastIntra = zone->bEnableFastIntra; p->bEnableEarlySkip = zone->bEnableEarlySkip; - p->bEnableRecursionSkip = zone->bEnableRecursionSkip; + p->recursionSkipMode = zone->recursionSkipMode; p->searchMethod = zone->searchMethod; p->searchRange = zone->searchRange; p->subpelRefine = zone->subpelRefine; @@ -3681,20 +3694,6 @@ if (p->analysisLoad && !p->analysisLoadReuseLevel) p->analysisLoadReuseLevel = 5; - if ((p->bAnalysisType == DEFAULT) && p->rc.cuTree) - { - if (p->analysisSaveReuseLevel && p->analysisSaveReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-save-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - if (p->analysisLoadReuseLevel && p->analysisLoadReuseLevel < 10) - { - x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-load-reuse-level 10, Disabling cu-tree\n"); - p->rc.cuTree = 0; - } - } - if ((p->analysisLoad || p->analysisSave) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation)) { x265_log(p, X265_LOG_WARNING, "Analysis load/save options incompatible with pmode/pme, Disabling pmode/pme\n"); @@ -3867,29 +3866,30 @@ } else { - if (fread(&m_conformanceWindow.rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) + int rightOffset, bottomOffset; + if (fread(&rightOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window right offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.rightOffset && p->analysisLoadReuseLevel > 1) + else if (rightOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.rightOffset * scaleFactor; + padsize = rightOffset * scaleFactor; p->sourceWidth += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.rightOffset = padsize; } - if (fread(&m_conformanceWindow.bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) + if (fread(&bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1) { x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window bottom offset missing\n"); m_aborted = true; } - else if (m_conformanceWindow.bottomOffset && p->analysisLoadReuseLevel > 1) + else if (bottomOffset && p->analysisLoadReuseLevel > 1) { int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor; - padsize = m_conformanceWindow.bottomOffset * scaleFactor; + padsize = bottomOffset * scaleFactor; p->sourceHeight += padsize; m_conformanceWindow.bEnabled = true; m_conformanceWindow.bottomOffset = padsize; @@ -4196,7 +4196,7 @@ x265_log(p, X265_LOG_WARNING, "Radl requires fixed gop-length (keyint == min-keyint). Disabling radl.\n"); } - if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP) + if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP && m_param->bResetZoneConfig) { p->chunkStart = p->chunkEnd = 0; x265_log(p, X265_LOG_WARNING, "Chunking requires closed gop structure. Disabling chunking.\n"); @@ -4229,12 +4229,6 @@ x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n"); } - if (!m_param->bResetZoneConfig && (p->keyframeMax != p->keyframeMin)) - x265_log(p, X265_LOG_WARNING, "External zone reconfiguration requires a fixed GOP size to enable appropriate signaling of HRD info\n"); - - if (!m_param->bResetZoneConfig && (p->reconfigWindowSize != (uint64_t)p->keyframeMax)) - x265_log(p, X265_LOG_WARNING, "Zone size must be multiple of GOP size to enable appropriate signaling of HRD info\n"); - if (m_param->bEnableHME) { if (m_param->sourceHeight < 540) @@ -4311,18 +4305,27 @@ } } + uint32_t numCUsLoad, numCUsInHeightLoad; + /* Now arrived at the right frame, read the record */ analysis->poc = poc; analysis->frameRecordSize = frameRecordSize; X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn, &(picData->sliceType)); X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn, &(picData->bScenecut)); X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFileIn, &(picData->satdCost)); - X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); + X265_FREAD(&numCUsLoad, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame)); X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFileIn, &(picData->numPartitions)); + /* Update analysis info to save current settings */ + uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize; + uint32_t numCUsInFrame = widthInCU * heightInCU; + analysis->numCUsInFrame = numCUsInFrame; + analysis->numCuInHeight = heightInCU; + if (m_param->bDisableLookahead) { - X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); + X265_FREAD(&numCUsInHeightLoad, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight)); X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFileIn, &(picData->lookahead)); } int scaledNumPartition = analysis->numPartitions; @@ -4335,16 +4338,16 @@ if (m_param->ctuDistortionRefine == CTU_DISTORTION_INTERNAL) { - X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), analysis->numCUsInFrame, m_analysisFileIn, picDistortion); + X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), numCUsLoad, m_analysisFileIn, picDistortion); computeDistortionOffset(analysis); } if (m_param->bDisableLookahead && m_rateControl->m_isVbv) { size_t vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;
View file
x265_3.3.tar.gz/source/encoder/frameencoder.cpp -> x265_3.4.tar.gz/source/encoder/frameencoder.cpp
Changed
@@ -130,7 +130,7 @@ { rowSum += sliceGroupSizeAccu; m_sliceBaseRow[++sidx] = i; - } + } } X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!"); m_sliceBaseRow[0] = 0; @@ -448,6 +448,18 @@ m_ssimCnt = 0; memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats)); + if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP) + { + int height = m_frame->m_fencPic->m_picHeight; + int width = m_frame->m_fencPic->m_picWidth; + intptr_t stride = m_frame->m_fencPic->m_stride; + + if (!computeEdge(m_frame->m_edgeBitPic, m_frame->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1)) + { + x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !"); + } + } + /* Emit access unit delimiter unless this is the first frame and the user is * not repeating headers (since AUD is supposed to be the first NAL in the access * unit) */
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.cpp -> x265_3.4.tar.gz/source/encoder/ratecontrol.cpp
Changed
@@ -269,7 +269,7 @@ x265_log(m_param, X265_LOG_WARNING, "NAL HRD parameters require VBV parameters, ignored\n"); m_param->bEmitHRDSEI = 0; } - m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && !m_2pass && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; + m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate; if (m_param->rc.bStrictCbr && !m_isCbr) { x265_log(m_param, X265_LOG_WARNING, "strict CBR set without CBR mode, ignored\n"); @@ -335,7 +335,7 @@ int vbvBufferSize = m_param->rc.vbvBufferSize * 1000; int vbvMaxBitrate = m_param->rc.vbvMaxBitrate * 1000; - if (m_param->bEmitHRDSEI) + if (m_param->bEmitHRDSEI && !m_param->decoderVbvMaxRate) { const HRDInfo* hrd = &sps.vuiParameters.hrdParameters; vbvBufferSize = hrd->cpbSizeValue << (hrd->cpbSizeScale + CPB_SHIFT); @@ -509,6 +509,7 @@ CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax); CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold); CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh); + CMP_OPT_FIRST_PASS("frame-dup", m_param->bEnableFrameDuplication); if (m_param->bMultiPassOptRPS) { CMP_OPT_FIRST_PASS("multi-pass-opt-rps", m_param->bMultiPassOptRPS); @@ -546,7 +547,7 @@ x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); } - if (m_param->totalFrames > m_numEntries) + if (m_param->totalFrames > m_numEntries && !m_param->bEnableFrameDuplication) { x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n", m_param->totalFrames, m_numEntries); @@ -781,6 +782,10 @@ // Init HRD HRDInfo* hrd = &sps.vuiParameters.hrdParameters; hrd->cbrFlag = m_isCbr; + if (m_param->reconfigWindowSize) { + hrd->cbrFlag = 0; + vbvMaxBitrate = m_param->decoderVbvMaxRate * 1000; + } // normalize HRD size and rate to the value / scale notation hrd->bitRateScale = x265_clip3(0, 15, calcScale(vbvMaxBitrate) - BR_SHIFT); @@ -829,7 +834,7 @@ /* weighted average of cplx of future frames */ for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++) { - int index = m_encOrder[i + j]; + int index = i+j; RateControlEntry *rcj = &m_rce2Pass[index]; weight *= 1 - pow(rcj->iCuCount / m_ncu, 2); if (weight < 0.0001) @@ -842,7 +847,7 @@ weight = 1.0; for (int j = 0; j <= cplxBlur * 2 && j <= i; j++) { - int index = m_encOrder[i - j]; + int index = i-j; RateControlEntry *rcj = &m_rce2Pass[index]; gaussianWeight = weight * exp(-j * j / 200.0); weightSum += gaussianWeight; @@ -851,7 +856,7 @@ if (weight < .0001) break; } - m_rce2Pass[m_encOrder[i]].blurredComplexity = cplxSum / weightSum; + m_rce2Pass[i].blurredComplexity= cplxSum / weightSum; } CHECKED_MALLOC(qScale, double, m_numEntries); if (filterSize > 1) @@ -870,7 +875,7 @@ expectedBits = 1; for (int i = 0; i < m_numEntries; i++) { - RateControlEntry* rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry* rce = &m_rce2Pass[i]; double q = getQScale(rce, 1.0); expectedBits += qScale2bits(rce, q); m_lastQScaleFor[rce->sliceType] = q; @@ -893,15 +898,15 @@ /* find qscale */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry *rce = &m_rce2Pass[i]; qScale[i] = getQScale(rce, rateFactor); m_lastQScaleFor[rce->sliceType] = qScale[i]; } /* fixed I/B qscale relative to P */ - for (int i = m_numEntries - 1; i >= 0; i--) + for (int i = 0; i < m_numEntries; i++) { - qScale[i] = getDiffLimitedQScale(&m_rce2Pass[m_encOrder[i]], qScale[i]); + qScale[i] = getDiffLimitedQScale(&m_rce2Pass[i], qScale[i]); X265_CHECK(qScale[i] >= 0, "qScale became negative\n"); } @@ -912,7 +917,6 @@ for (int i = 0; i < m_numEntries; i++) { double q = 0.0, sum = 0.0; - for (int j = 0; j < filterSize; j++) { int idx = i + j - filterSize / 2; @@ -920,7 +924,7 @@ double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur)); if (idx < 0 || idx >= m_numEntries) continue; - if (m_rce2Pass[m_encOrder[i]].sliceType != m_rce2Pass[m_encOrder[idx]].sliceType) + if (m_rce2Pass[i].sliceType != m_rce2Pass[idx].sliceType) continue; q += qScale[idx] * coeff; sum += coeff; @@ -932,7 +936,7 @@ /* find expected bits */ for (int i = 0; i < m_numEntries; i++) { - RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]]; + RateControlEntry *rce = &m_rce2Pass[i]; rce->newQScale = clipQscale(NULL, rce, blurredQscale[i]); // check if needed X265_CHECK(rce->newQScale >= 0, "new Qscale is negative\n"); expectedBits += qScale2bits(rce, rce->newQScale); @@ -1279,6 +1283,7 @@ m_param->rc.vbvMaxBitrate = m_param->rc.zones[i].zoneParam->rc.vbvMaxBitrate; memcpy(m_relativeComplexity, m_param->rc.zones[i].relativeComplexity, sizeof(double) * m_param->reconfigWindowSize); reconfigureRC(); + m_isCbr = 1; /* Always vbvmaxrate == bitrate here*/ m_top->zoneReadCount[i].incr(); } } @@ -1951,7 +1956,7 @@ /* Adjust quant based on the difference between * achieved and expected bitrate so far */ double curTime = (double)rce->encodeOrder / m_numEntries; - double w = x265_clip3(0.0, 1.0, curTime * 100); + double w = x265_clip3(0.0, 1.0, curTime); q *= pow((double)m_totalBits / m_expectedBitsSum, w); } if (m_framesDone == 0 && m_param->rc.rateControlMode == X265_RC_ABR && m_isGrainEnabled) @@ -2742,7 +2747,9 @@ x265_log(m_param, X265_LOG_WARNING, "poc:%d, VBV underflow (%.0f bits)\n", rce->poc, m_bufferFillFinal); m_bufferFillFinal = X265_MAX(m_bufferFillFinal, 0); - m_bufferFillFinal += m_bufferRate; + m_bufferFillFinal += rce->bufferRate; + if (m_param->csvLogLevel >= 2) + m_unclippedBufferFillFinal = m_bufferFillFinal; if (m_param->rc.bStrictCbr) { @@ -2752,14 +2759,14 @@ filler += FILLER_OVERHEAD * 8; } m_bufferFillFinal -= filler; - bufferBits = X265_MIN(bits + filler + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + filler + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits + filler, 0); m_bufferFillActual += bufferBits - bits - filler; } else { m_bufferFillFinal = X265_MIN(m_bufferFillFinal, m_bufferSize); - bufferBits = X265_MIN(bits + m_bufferExcess, m_bufferRate); + bufferBits = X265_MIN(bits + m_bufferExcess, rce->bufferRate); m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits, 0); m_bufferFillActual += bufferBits - bits; m_bufferFillActual = X265_MIN(m_bufferFillActual, m_bufferSize);
View file
x265_3.3.tar.gz/source/encoder/ratecontrol.h -> x265_3.4.tar.gz/source/encoder/ratecontrol.h
Changed
@@ -157,6 +157,7 @@ double m_rateFactorConstant; double m_bufferSize; double m_bufferFillFinal; /* real buffer as of the last finished frame */ + double m_unclippedBufferFillFinal; /* real unclipped buffer as of the last finished frame used to log in CSV*/ double m_bufferFill; /* planned buffer, if all in-progress frames hit their bit budget */ double m_bufferRate; /* # of bits added to buffer_fill after each frame */ double m_vbvMaxRate; /* in kbps */
View file
x265_3.3.tar.gz/source/encoder/slicetype.cpp -> x265_3.4.tar.gz/source/encoder/slicetype.cpp
Changed
@@ -87,7 +87,7 @@ namespace X265_NS { -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta) +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel) { intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0; intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0; @@ -141,7 +141,7 @@ theta = 180 + theta; edgeTheta[middle] = (pixel)theta; } - edgePic[middle] = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel); + edgePic[middle] = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel); } } return true; @@ -519,6 +519,13 @@ if (param->rc.aqMode == X265_AQ_EDGE) edgeFilter(curFrame, param); + if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->recursionSkipMode == EDGE_BASED_RSKIP) + { + pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX; + primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, curFrame->m_edgeBitPic, + curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE); + } + if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE) { double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
View file
x265_3.3.tar.gz/source/encoder/slicetype.h -> x265_3.4.tar.gz/source/encoder/slicetype.h
Changed
@@ -44,9 +44,9 @@ #define EDGE_INCLINATION 45 #if HIGH_BIT_DEPTH -#define edgeThreshold 1023.0 +#define EDGE_THRESHOLD 1023.0 #else -#define edgeThreshold 255.0 +#define EDGE_THRESHOLD 255.0 #endif #define PI 3.14159265 @@ -101,7 +101,7 @@ protected: uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize); - uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); + uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize); uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp); bool allocWeightedRef(Lowres& fenc); @@ -265,7 +265,6 @@ CostEstimateGroup& operator=(const CostEstimateGroup&); }; -bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta); - +bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD); } #endif // ifndef X265_SLICETYPE_H
View file
x265_3.3.tar.gz/source/test/CMakeLists.txt -> x265_3.4.tar.gz/source/test/CMakeLists.txt
Changed
@@ -23,13 +23,15 @@ # add ARM assembly files if(ARM OR CROSS_COMPILE_ARM) - enable_language(ASM) - set(NASM_SRC checkasm-arm.S) - add_custom_command( - OUTPUT checkasm-arm.obj - COMMAND ${CMAKE_CXX_COMPILER} - ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj - DEPENDS checkasm-arm.S) + if(NOT ARM64) + enable_language(ASM) + set(NASM_SRC checkasm-arm.S) + add_custom_command( + OUTPUT checkasm-arm.obj + COMMAND ${CMAKE_CXX_COMPILER} + ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj + DEPENDS checkasm-arm.S) + endif() endif(ARM OR CROSS_COMPILE_ARM) # add PowerPC assembly files
View file
x265_3.3.tar.gz/source/test/regression-tests.txt -> x265_3.4.tar.gz/source/test/regression-tests.txt
Changed
@@ -75,7 +75,7 @@ News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0 News-4k.y4m,--preset superfast --slices 4 --aq-mode 0 News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 16 -News-4k.y4m,--preset veryslow --no-rskip +News-4k.y4m,--preset veryslow --rskip 0 News-4k.y4m,--preset veryslow --pme --crf 40 OldTownCross_1920x1080_50_10bit_422.yuv,--preset superfast --weightp OldTownCross_1920x1080_50_10bit_422.yuv,--preset medium --no-weightp @@ -162,7 +162,11 @@ sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 sintel_trailer_2k_1920x1080_24.yuv, --preset ultrafast --hist-scenecut --hist-threshold 0.02 - +crowd_run_1920x1080_50.yuv, --preset faster --ctu 32 --rskip 2 --rskip-edge-threshold 5 +crowd_run_1920x1080_50.yuv, --preset fast --ctu 64 --rskip 2 --rskip-edge-threshold 5 --aq-mode 4 +crowd_run_1920x1080_50.yuv, --preset slow --ctu 32 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 +crowd_run_1920x1080_50.yuv, --preset slower --ctu 16 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4 + # Main12 intraCost overflow bug test 720p50_parkrun_ter.y4m,--preset medium
View file
x265_3.3.tar.gz/source/test/save-load-tests.txt -> x265_3.4.tar.gz/source/test/save-load-tests.txt
Changed
@@ -18,3 +18,4 @@ RaceHorses_416x240_30.y4m, --preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22 --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m, --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m, --preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2 crowd_run_540p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 crowd_run_540p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000 +News-4k.y4m, --preset medium --analysis-save x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000::News-4k.y4m, --analysis-load x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
View file
x265_3.3.tar.gz/source/test/testbench.cpp -> x265_3.4.tar.gz/source/test/testbench.cpp
Changed
@@ -5,6 +5,7 @@ * Mandar Gurav <mandar@multicorewareinc.com> * Mahesh Pittala <mahesh@multicorewareinc.com> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -208,6 +209,14 @@ EncoderPrimitives asmprim; memset(&asmprim, 0, sizeof(asmprim)); setupAssemblyPrimitives(asmprim, test_arch[i].flag); + +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, asmprim, test_arch[i].flag); +#endif + setupAliasPrimitives(asmprim); memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives)); for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++) @@ -232,6 +241,13 @@ #endif setupAssemblyPrimitives(optprim, cpuid); +#if X265_ARCH_ARM64 + /* Temporary workaround because luma_vsp assembly primitive has not been completed + * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive. + * Otherwise, segment fault occurs. */ + setupAliasCPrimitives(cprim, optprim, cpuid); +#endif + /* Note that we do not setup aliases for performance tests, that would be * redundant. The testbench only verifies they are correctly aliased */
View file
x265_3.3.tar.gz/source/test/testharness.h -> x265_3.4.tar.gz/source/test/testharness.h
Changed
@@ -3,6 +3,7 @@ * * Authors: Steve Borho <steve@borho.org> * Min Chen <chenm003@163.com> + * Yimeng Su <yimeng.su@huawei.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -81,12 +82,16 @@ #if X265_ARCH_X86 asm volatile("rdtsc" : "=a" (a) ::"edx"); #elif X265_ARCH_ARM +#if X265_ARCH_ARM64 + asm volatile("mrs %0, cntvct_el0" : "=r"(a)); +#else // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a)); // TO-DO: replace clock() function with appropriate ARM cpu instructions a = clock(); #endif +#endif return a; } #endif // ifdef _MSC_VER
View file
x265_3.3.tar.gz/source/x265.cpp -> x265_3.4.tar.gz/source/x265.cpp
Changed
@@ -27,11 +27,7 @@ #include "x265.h" #include "x265cli.h" - -#include "input/input.h" -#include "output/output.h" -#include "output/reconplay.h" -#include "svt.h" +#include "abrEncApp.h" #if HAVE_VLD /* Visual Leak Detector */ @@ -47,191 +43,59 @@ #include <fstream> #include <queue> -#define CONSOLE_TITLE_SIZE 200 -#ifdef _WIN32 -#include <windows.h> -#define SetThreadExecutionState(es) -static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = ""; -#else -#define GetConsoleTitle(t, n) -#define SetConsoleTitle(t) -#define SetThreadExecutionState(es) -#endif - using namespace X265_NS; -/* Ctrl-C handler */ -static volatile sig_atomic_t b_ctrl_c /* = 0 */; -static void sigint_handler(int) -{ - b_ctrl_c = 1; -} -#define START_CODE 0x00000001 -#define START_CODE_BYTES 4 - -struct CLIOptions -{ - InputFile* input; - ReconFile* recon; - OutputFile* output; - FILE* qpfile; - FILE* zoneFile; - FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ - const char* reconPlayCmd; - const x265_api* api; - x265_param* param; - x265_vmaf_data* vmafData; - bool bProgress; - bool bForceY4m; - bool bDither; - uint32_t seek; // number of frames to skip from the beginning - uint32_t framesToBeEncoded; // number of frames to encode - uint64_t totalbytes; - int64_t startTime; - int64_t prevUpdateTime; - - /* in microseconds */ - static const int UPDATE_INTERVAL = 250000; - - CLIOptions() - { - input = NULL; - recon = NULL; - output = NULL; - qpfile = NULL; - zoneFile = NULL; - dolbyVisionRpu = NULL; - reconPlayCmd = NULL; - api = NULL; - param = NULL; - vmafData = NULL; - framesToBeEncoded = seek = 0; - totalbytes = 0; - bProgress = true; - bForceY4m = false; - startTime = x265_mdate(); - prevUpdateTime = 0; - bDither = false; - } +#define X265_HEAD_ENTRIES 3 - void destroy(); - void printStatus(uint32_t frameNum); - bool parse(int argc, char **argv); - bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount); - bool parseQPFile(x265_picture &pic_org); - bool parseZoneFile(); -}; - -void CLIOptions::destroy() -{ - if (input) - input->release(); - input = NULL; - if (recon) - recon->release(); - recon = NULL; - if (qpfile) - fclose(qpfile); - qpfile = NULL; - if (zoneFile) - fclose(zoneFile); - zoneFile = NULL; - if (dolbyVisionRpu) - fclose(dolbyVisionRpu); - dolbyVisionRpu = NULL; - if (output) - output->release(); - output = NULL; -} - -void CLIOptions::printStatus(uint32_t frameNum) -{ - char buf[200]; - int64_t time = x265_mdate(); - - if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL)) - return; - - int64_t elapsed = time - startTime; - double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0; - float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum); - if (framesToBeEncoded) - { - int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000)); - sprintf(buf, "x265 [%.1f%%] %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d", - 100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate, - eta / 3600, (eta / 60) % 60, eta % 60); - } - else - sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate); - - fprintf(stderr, "%s \r", buf + 5); - SetConsoleTitle(buf); - fflush(stderr); // needed in windows - prevUpdateTime = time; -} +#ifdef _WIN32 +#define strdup _strdup +#endif -bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount) +#ifdef _WIN32 +/* Copy of x264 code, which allows for Unicode characters in the command line. + * Retrieve command line arguments as UTF-8. */ +static int get_argv_utf8(int *argc_ptr, char ***argv_ptr) { - bool bError = false; - int bShowHelp = false; - int outputBitDepth = 0; - const char *profile = NULL; - - /* Presets are applied before all other options. */ - for (optind = 0;;) - { - int c = getopt_long(argc, argv, short_options, long_options, NULL); - if (c == -1) - break; - else if (c == 'D') - outputBitDepth = atoi(optarg); - else if (c == 'P') - profile = optarg; - else if (c == '?') - bShowHelp = true; - } - - if (!outputBitDepth && profile) - { - /* try to derive the output bit depth from the requested profile */ - if (strstr(profile, "10")) - outputBitDepth = 10; - else if (strstr(profile, "12")) - outputBitDepth = 12; - else - outputBitDepth = 8; - } - - api = x265_api_get(outputBitDepth); - if (!api) + int ret = 0; + wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr); + if (argv_utf16) { - x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n"); - api = x265_api_get(0); - } + int argc = *argc_ptr; + int offset = (argc + 1) * sizeof(char*); + int size = offset; - if (bShowHelp) - { - printVersion(globalParam, api); - showHelp(globalParam); - } + for (int i = 0; i < argc; i++)
View file
x265_3.3.tar.gz/source/x265.h -> x265_3.4.tar.gz/source/x265.h
Changed
@@ -134,6 +134,7 @@ int ctuDistortionRefine; int rightOffset; int bottomOffset; + int frameDuplication; }x265_analysis_validate; /* Stores intra analysis data for a single frame. This struct needs better packing */ @@ -304,6 +305,7 @@ double totalFrameTime; double vmafFrameScore; double bufferFillFinal; + double unclippedBufferFillFinal; } x265_frame_stats; typedef struct x265_ctu_info_t @@ -1255,9 +1257,9 @@ * skip blocks. Default is disabled */ int bEnableEarlySkip; - /* Enable early CU size decisions to avoid recursing to higher depths. + /* Enable early CU size decisions to avoid recursing to higher depths. * Default is enabled */ - int bEnableRecursionSkip; + int recursionSkipMode; /* Use a faster search method to find the best intra mode. Default is 0 */ int bEnableFastIntra; @@ -1857,7 +1859,7 @@ double edgeTransitionThreshold; /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */ - int bHistBasedSceneCut; + int bHistBasedSceneCut; /* Enable HME search ranges for L0, L1 and L2 respectively. */ int hmeRange[3]; @@ -1874,7 +1876,7 @@ * analysis information stored in analysis-save. Higher the refine level higher * the information stored. Default is 5 */ int analysisSaveReuseLevel; - + /* A value between 1 and 10 (both inclusive) determines the level of * analysis information reused in analysis-load. Higher the refine level higher * the information reused. Default is 5 */ @@ -1901,6 +1903,12 @@ * info is available from the corresponding analysis-save. */ int confWinBottomOffset; + + /* Edge variance threshold for quad tree establishment. */ + float edgeVarThreshold; + + /* Maxrate that could be signaled to the decoder. Default 0. API only. */ + int decoderVbvMaxRate; } x265_param; /* x265_param_alloc:
View file
x265_3.4.tar.gz/source/x265cli.cpp
Added
@@ -0,0 +1,1062 @@ +/***************************************************************************** + * Copyright (C) 2013-2020 MulticoreWare, Inc + * + * Authors: Steve Borho <steve@borho.org> + * Min Chen <chenm003@163.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at license @ x265.com. + *****************************************************************************/ +#if _MSC_VER +#pragma warning(disable: 4127) // conditional expression is constant, yes I know +#endif + +#include "x265cli.h" +#include "svt.h" + +#define START_CODE 0x00000001 +#define START_CODE_BYTES 4 + +#ifdef __cplusplus +namespace X265_NS { +#endif + + static void printVersion(x265_param *param, const x265_api* api) + { + x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); + x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); + } + + static void showHelp(x265_param *param) + { + int level = param->logLevel; + +#define OPT(value) (value ? "enabled" : "disabled") +#define H0 printf +#define H1 if (level >= X265_LOG_DEBUG) printf + + H0("\nSyntax: x265 [options] infile [-o] outfile\n"); + H0(" infile can be YUV or Y4M\n"); + H0(" outfile is raw HEVC bitstream\n"); + H0("\nExecutable Options:\n"); + H0("-h/--help Show this help text and exit\n"); + H0(" --fullhelp Show all options and exit\n"); + H0("-V/--version Show version info and exit\n"); + H0("\nOutput Options:\n"); + H0("-o/--output <filename> Bitstream output file name\n"); + H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); + H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]); + H0(" --no-progress Disable CLI progress reports\n"); + H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); + H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); + H0("\nInput Options:\n"); + H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); + H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); + H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); + H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n"); + H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); + H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); + H1(" 0 - i400 (4:0:0 monochrome)\n"); + H1(" 1 - i420 (4:2:0 default)\n"); + H1(" 2 - i422 (4:2:2)\n"); + H1(" 3 - i444 (4:4:4)\n"); +#if ENABLE_HDR10_PLUS + H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); + H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); +#endif + H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); + H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" + " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); + H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); + H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); + H0(" --seek <integer> First frame to encode\n"); + H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); + H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT(param->bField)); + H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); + H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); + H0("\nQuality reporting metrics:\n"); + H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); + H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); + H0("\nProfile, Level, Tier:\n"); + H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); + H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); + H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); + H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); + H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); + H0("\nThreading, performance:\n"); + H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); + H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); + H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); + H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); + H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); + H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); + H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); + H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n"); + H0("\nPresets:\n"); + H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); + H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); + H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); + H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); + H0("\nQuad-Tree size and depth:\n"); + H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); + H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); + H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); + H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); + H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); + H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); + H0("\nAnalysis:\n"); + H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); + H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); + H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); + H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); + H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); + H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); + H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); + H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); + H0(" --rskip <mode> Set mode for early exit from recursion. Mode 1: exit using rdcost & CU homogenity. Mode 2: exit using CU edge density.\n" + " Mode 0: disabled. Default %d\n", param->recursionSkipMode); + H1(" --rskip-edge-threshold Threshold in terms of percentage (integer of range [0,100]) for minimum edge density in CUs used to prun the recursion depth. Applicable only for rskip mode 2. Value is preset dependent. Default: %.f\n", param->edgeVarThreshold*100.0f); + H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); + H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); + H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); + H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); + H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" + " - 1: force the partitions if CTU information is present\n" + " - 2: functionality of (1) and reduce qp if CTU information has changed\n" + " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" + " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); + H0("\nCoding tools:\n"); + H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); + H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); + H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); + H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); + H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); + H0("\nTemporal / motion search options:\n"); + H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); + H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); + H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); + H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); + H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); + H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); + H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); + H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); + H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); + H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); + H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); + H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]); + H1(" --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange[0], param->hmeRange[1], param->hmeRange[2]); + H0("\nSpatial / intra options:\n"); + H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing)); + H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra)); + H0(" --[no-]b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames)); + H0(" --[no-]fast-intra Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra)); + H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty); + H0("\nSlice decision options:\n"); + H0(" --[no-]open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP)); + H0("-I/--keyint <integer> Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax); + H0("-i/--min-keyint <integer> Scenecuts closer together than this are coded as I, not IDR. Default: auto\n"); + H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n"); + H0(" --no-scenecut Disable adaptive I-frame decision\n"); + H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold); + H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias); + H0(" --hist-scenecut Enables histogram based scene-cut detection using histogram based algorithm.\n"); + H0(" --no-hist-scenecut Disables histogram based scene-cut detection using histogram based algorithm.\n"); + H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold); + H0(" --[no-]fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades)); + H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp)); + H1(" --scenecut-window <0..1000> QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow); + H1(" --max-qp-delta <0..10> QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta); + H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl); + H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n"); + H0(" --rc-lookahead <integer> Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth); + H1(" --lookahead-slices <0..16> Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices); + H0(" --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads); + H0("-b/--bframes <0..16> Maximum number of consecutive b-frames. Default %d\n", param->bframes); + H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias); + H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive); + H0(" --[no-]b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid)); + H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n"); + H1(" Format of each line: framenumber frametype QP\n"); + H1(" QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n"); + H1(" QPs are restricted by qpmin/qpmax.\n"); + H1(" --force-flush <integer> Force the encoder to flush frames. Default %d\n", param->forceFlush); + H1(" 0 - flush the encoder only when all the input pictures are over.\n"); + H1(" 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n"); + H1(" 2 - flush the slicetype decided frames only.\n");
View file
x265_3.3.tar.gz/source/x265cli.h -> x265_3.4.tar.gz/source/x265cli.h
Changed
@@ -27,9 +27,23 @@ #include "common.h" #include "param.h" +#include "input/input.h" +#include "output/output.h" +#include "output/reconplay.h" #include <getopt.h> +#define CONSOLE_TITLE_SIZE 200 +#ifdef _WIN32 +#include <windows.h> +#define SetThreadExecutionState(es) +static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = ""; +#else +#define GetConsoleTitle(t, n) +#define SetConsoleTitle(t) +#define SetThreadExecutionState(es) +#endif + #ifdef __cplusplus namespace X265_NS { #endif @@ -105,8 +119,8 @@ { "amp", no_argument, NULL, 0 }, { "no-early-skip", no_argument, NULL, 0 }, { "early-skip", no_argument, NULL, 0 }, - { "no-rskip", no_argument, NULL, 0 }, - { "rskip", no_argument, NULL, 0 }, + { "rskip", required_argument, NULL, 0 }, + { "rskip-edge-threshold", required_argument, NULL, 0 }, { "no-fast-cbf", no_argument, NULL, 0 }, { "fast-cbf", no_argument, NULL, 0 }, { "no-tskip", no_argument, NULL, 0 }, @@ -358,6 +372,7 @@ { "cll", no_argument, NULL, 0 }, { "no-cll", no_argument, NULL, 0 }, { "hme-range", required_argument, NULL, 0 }, + { "abr-ladder", required_argument, NULL, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, @@ -365,336 +380,82 @@ { 0, 0, 0, 0 } }; -static void printVersion(x265_param *param, const x265_api* api) -{ - x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str); - x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str); -} + struct CLIOptions + { + InputFile* input; + ReconFile* recon; + OutputFile* output; + FILE* qpfile; + FILE* zoneFile; + FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */ + const char* reconPlayCmd; + const x265_api* api; + x265_param* param; + x265_vmaf_data* vmafData; + bool bProgress; + bool bForceY4m; + bool bDither; + uint32_t seek; // number of frames to skip from the beginning + uint32_t framesToBeEncoded; // number of frames to encode + uint64_t totalbytes; + int64_t startTime; + int64_t prevUpdateTime; -static void showHelp(x265_param *param) -{ - int level = param->logLevel; + int argCnt; + char** argString; -#define OPT(value) (value ? "enabled" : "disabled") -#define H0 printf -#define H1 if (level >= X265_LOG_DEBUG) printf + /* ABR ladder settings */ + bool isAbrLadderConfig; + bool enableScaler; + char* encName; + char* reuseName; + uint32_t encId; + int refId; + uint32_t loadLevel; + uint32_t saveLevel; + uint32_t numRefs; - H0("\nSyntax: x265 [options] infile [-o] outfile\n"); - H0(" infile can be YUV or Y4M\n"); - H0(" outfile is raw HEVC bitstream\n"); - H0("\nExecutable Options:\n"); - H0("-h/--help Show this help text and exit\n"); - H0(" --fullhelp Show all options and exit\n"); - H0("-V/--version Show version info and exit\n"); - H0("\nOutput Options:\n"); - H0("-o/--output <filename> Bitstream output file name\n"); - H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth); - H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]); - H0(" --no-progress Disable CLI progress reports\n"); - H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n"); - H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n"); - H0("\nInput Options:\n"); - H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n"); - H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n"); - H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n"); - H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n"); - H1(" --input-depth <integer> Bit-depth of input file. Default 8\n"); - H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n"); - H1(" 0 - i400 (4:0:0 monochrome)\n"); - H1(" 1 - i420 (4:2:0 default)\n"); - H1(" 2 - i422 (4:2:2)\n"); - H1(" 3 - i444 (4:4:4)\n"); -#if ENABLE_HDR10_PLUS - H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n"); - H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n"); -#endif - H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n"); - H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n" - " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n"); - H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n"); - H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n"); - H0(" --seek <integer> First frame to encode\n"); - H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n"); - H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT( param->bField)); - H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n"); - H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame)); - H0("\nQuality reporting metrics:\n"); - H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim)); - H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr)); - H0("\nProfile, Level, Tier:\n"); - H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n"); - H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n"); - H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n"); - H0(" --uhd-bd Enable UHD Bluray compatibility support\n"); - H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance)); - H0("\nThreading, performance:\n"); - H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n"); - H0(" '-' implies no threads on node, '+' implies one thread per core on node\n"); - H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n"); - H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront)); - H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices); - H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis)); - H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation)); - H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n"); - H0("\nPresets:\n"); - H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n"); - H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n"); - H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n"); - H0(" psnr, ssim, grain, zerolatency, fastdecode\n"); - H0("\nQuad-Tree size and depth:\n"); - H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize); - H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize); - H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize); - H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth); - H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth); - H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU); - H0("\nAnalysis:\n"); - H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel); - H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd); - H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel); - H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq); - H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd); - H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd)); - H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine)); - H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip)); - H0(" --[no-]rskip Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip)); - H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast)); - H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip)); - H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n"); - H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n"); - H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n" - " - 1: force the partitions if CTU information is present\n" - " - 2: functionality of (1) and reduce qp if CTU information has changed\n" - " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n" - " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n"); - H0("\nCoding tools:\n"); - H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred)); - H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred)); - H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless)); - H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding)); - H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip)); - H0("\nTemporal / motion search options:\n"); - H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand); - H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences); - H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences); - H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod); - H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine); - H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange); - H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter)); - H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP)); - H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes); - H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp)); - H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME)); - H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.