We truncated the diff of some files because they were too big.
If you want to see the full diff for every file, click here.
Changes of Revision 39
x265.changes
Changed
x
1
2
-------------------------------------------------------------------
3
+Mon Jun 1 17:51:22 UTC 2020 - Luigi Baldoni <aloisio@gmx.com>
4
+
5
+- Update to version 3.4
6
+ New features:
7
+ * Edge-aware quadtree partitioning to terminate CU depth
8
+ recursion based on edge information. --rskip level 2 enables
9
+ the feature and --rskip-edge-threshold denotes the minimum
10
+ expected edge-density percentage within the CU, below which
11
+ the recursion is skipped. Experimental feature.
12
+ * Application-level feature --abr-ladder for automating
13
+ efficient ABR ladder generation. Shows ~65% savings in the
14
+ over-all turn-around time required for the generation of a
15
+ typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280
16
+ CPU @ 2.70GHz over a sequential ABR-ladder generation
17
+ approach that leverages save-load architecture.
18
+ Enhancements to existing features:
19
+ * Improved efficiency in 2-pass rate-control algorithm. The
20
+ savings in the bitrate is ~1.72% with visual improvement in
21
+ quality in the initial 1-2 secs.
22
+ Encoder enhancements:
23
+ * Faster ARM64 encodes enabled by ASM contributions from
24
+ Huawei. The speed-up over no-asm version for 1080p encodes @
25
+ medium preset is ~15% in a 16 core H/W.
26
+ * Strict VBV conformance in zone encoding.
27
+ Bug fixes:
28
+ * Multi-pass encode failures with --frame-dup.
29
+ * Corrupted bitstreams with --hist-scenecut when input depth
30
+ and internal bit-depth differ.
31
+ * Incorrect analysis propagation in multi-level save-load
32
+ architecture.
33
+ * Failure in detecting NUMA packages installed in non-standard
34
+ directories.
35
+
36
+- Refreshed arm.patch
37
+
38
+-------------------------------------------------------------------
39
Sat Mar 28 14:28:56 UTC 2020 - Luigi Baldoni <aloisio@gmx.com>
40
41
- Update to version 3.3
42
x265.spec
Changed
23
1
2
#
3
4
5
-%define sover 188
6
+%define sover 192
7
%define libname lib%{name}
8
%define libsoname %{libname}-%{sover}
9
Name: x265
10
-Version: 3.3
11
+Version: 3.4
12
Release: 0
13
Summary: A free h265/HEVC encoder - encoder binary
14
License: GPL-2.0-or-later
15
16
%patch0 -p1
17
%patch1 -p1
18
%patch2 -p1
19
-
20
sed -i -e "s/0.0/%{sover}.0/g" source/cmake/version.cmake
21
22
23
arm.patch
Changed
129
1
2
-Index: x265_2.2/source/CMakeLists.txt
3
+Index: x265_3.4/source/CMakeLists.txt
4
===================================================================
5
---- x265_2.2.orig/source/CMakeLists.txt
6
-+++ x265_2.2/source/CMakeLists.txt
7
-@@ -65,15 +65,22 @@ elseif(POWERMATCH GREATER "-1")
8
+--- x265_3.4.orig/source/CMakeLists.txt
9
++++ x265_3.4/source/CMakeLists.txt
10
+@@ -64,26 +64,26 @@ elseif(POWERMATCH GREATER "-1")
11
add_definitions(-DPPC64=1)
12
message(STATUS "Detected POWER PPC64 target processor")
13
endif()
14
15
- else()
16
- set(CROSS_COMPILE_ARM 0)
17
- endif()
18
-- message(STATUS "Detected ARM target processor")
19
- set(ARM 1)
20
-- add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1)
21
+- if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
22
+- message(STATUS "Detected ARM64 target processor")
23
+- set(ARM64 1)
24
+- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0)
25
+- else()
26
+- message(STATUS "Detected ARM target processor")
27
+- add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1)
28
+- endif()
29
+elseif(${SYSPROC} MATCHES "armv5.*")
30
+ message(STATUS "Detected ARMV5 system processor")
31
+ set(ARMV5 1)
32
-+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0)
33
++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=0 -DHAVE_NEON=0)
34
+elseif(${SYSPROC} STREQUAL "armv6l")
35
+ message(STATUS "Detected ARMV6 system processor")
36
+ set(ARMV6 1)
37
-+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
38
++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
39
+elseif(${SYSPROC} STREQUAL "armv7l")
40
+ message(STATUS "Detected ARMV7 system processor")
41
+ set(ARMV7 1)
42
-+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
43
++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
44
+elseif(${SYSPROC} STREQUAL "aarch64")
45
+ message(STATUS "Detected AArch64 system processor")
46
+ set(ARMV7 1)
47
-+ add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
48
++ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0)
49
else()
50
message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
51
message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
52
-@@ -208,18 +215,9 @@ if(GCC)
53
+ endif()
54
+-
55
+ if(UNIX)
56
+ list(APPEND PLATFORM_LIBS pthread)
57
+ find_library(LIBRT rt)
58
+@@ -238,28 +238,9 @@ if(GCC)
59
endif()
60
endif()
61
endif()
62
- if(ARM AND CROSS_COMPILE_ARM)
63
-- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
64
+- if(ARM64)
65
+- set(ARM_ARGS -fPIC)
66
+- else()
67
+- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
68
+- endif()
69
+- message(STATUS "cross compile arm")
70
- elseif(ARM)
71
-- find_package(Neon)
72
-- if(CPU_HAS_NEON)
73
-- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
74
+- if(ARM64)
75
+- set(ARM_ARGS -fPIC)
76
- add_definitions(-DHAVE_NEON)
77
- else()
78
-- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
79
+- find_package(Neon)
80
+- if(CPU_HAS_NEON)
81
+- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
82
+- add_definitions(-DHAVE_NEON)
83
+- else()
84
+- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
85
+- endif()
86
- endif()
87
+ if(ARMV7)
88
+ add_definitions(-fPIC)
89
90
if(FPROFILE_GENERATE)
91
if(INTEL_CXX)
92
add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}")
93
-Index: x265_2.2/source/common/cpu.cpp
94
+Index: x265_3.4/source/common/cpu.cpp
95
===================================================================
96
---- x265_2.2.orig/source/common/cpu.cpp
97
-+++ x265_2.2/source/common/cpu.cpp
98
-@@ -37,7 +37,7 @@
99
+--- x265_3.4.orig/source/common/cpu.cpp
100
++++ x265_3.4/source/common/cpu.cpp
101
+@@ -39,7 +39,7 @@
102
#include <machine/cpu.h>
103
#endif
104
105
106
#include <signal.h>
107
#include <setjmp.h>
108
static sigjmp_buf jmpbuf;
109
-@@ -344,7 +344,6 @@ uint32_t cpu_detect(void)
110
+@@ -350,7 +350,6 @@ uint32_t cpu_detect(bool benableavx512)
111
}
112
113
canjump = 1;
114
115
canjump = 0;
116
signal(SIGILL, oldsig);
117
#endif // if !HAVE_NEON
118
-@@ -360,7 +359,7 @@ uint32_t cpu_detect(void)
119
+@@ -366,7 +365,7 @@ uint32_t cpu_detect(bool benableavx512)
120
// which may result in incorrect detection and the counters stuck enabled.
121
// right now Apple does not seem to support performance counters for this test
122
#ifndef __MACH__
123
124
+ //flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
125
#endif
126
// TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
127
- #endif // if HAVE_ARMV6
128
+ #elif X265_ARCH_ARM64
129
baselibs.conf
Changed
4
1
2
-libx265-179
3
+libx265-192
4
x265_3.3.tar.gz/.hg_archival.txt -> x265_3.4.tar.gz/.hg_archival.txt
Changed
10
1
2
repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf
3
-node: f94b0d32737d40b2b9a9d74df57fee45e6be5cb0
4
-branch: Release_3.3
5
-latesttag: 3.3
6
-latesttagdistance: 1
7
+node: 2a65b720985096bcb1664f7cb05c3d04aeb576f5
8
+branch: Release_3.4
9
+tag: 3.4
10
x265_3.3.tar.gz/.hgtags -> x265_3.4.tar.gz/.hgtags
Changed
6
1
2
5ee3593ebd82b4d8957909bbc1b68b99b59ba773 3.3_RC1
3
96a10df63c0b778b480330bdf3be8da7db8a5fb1 3.3_RC2
4
057215961bc4b51b6260a584ff3d506e6d65cfd6 3.3
5
+ee92f36782800f145970131e01c79955a3ed5c10 3.4_RC1
6
x265_3.4.tar.gz/build/aarch64-linux/crosscompile.cmake
Added
17
1
2
+# CMake toolchain file for cross compiling x265 for aarch64
3
+# This feature is only supported as experimental. Use with caution.
4
+# Please report bugs on bitbucket
5
+# Run cmake with: cmake -DCMAKE_TOOLCHAIN_FILE=crosscompile.cmake -G "Unix Makefiles" ../../source && ccmake ../../source
6
+
7
+set(CROSS_COMPILE_ARM 1)
8
+set(CMAKE_SYSTEM_NAME Linux)
9
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
10
+
11
+# specify the cross compiler
12
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
13
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
14
+
15
+# specify the target environment
16
+SET(CMAKE_FIND_ROOT_PATH /usr/aarch64-linux-gnu)
17
x265_3.4.tar.gz/build/aarch64-linux/make-Makefiles.bash
Added
6
1
2
+#!/bin/bash
3
+# Run this from within a bash shell
4
+
5
+cmake -DCMAKE_TOOLCHAIN_FILE="crosscompile.cmake" -G "Unix Makefiles" ../../source && ccmake ../../source
6
x265_3.3.tar.gz/doc/reST/cli.rst -> x265_3.4.tar.gz/doc/reST/cli.rst
Changed
79
1
2
3
**BufferFillFinal** Buffer bits available after removing the frame out of CPB.
4
5
+ **UnclippedBufferFillFinal** Unclipped buffer bits available after removing the frame
6
+ out of CPB only used for csv logging purpose.
7
+
8
**Latency** Latency in terms of number of frames between when the frame
9
was given in and when the frame is given out.
10
11
12
Measure 2Nx2N merge candidates first; if no residual is found,
13
additional modes at that depth are not analysed. Default disabled
14
15
-.. option:: --rskip, --no-rskip
16
+.. option:: --rskip <0|1|2>
17
+
18
+ This option determines early exit from CU depth recursion in modes 1 and 2. When a skip CU is
19
+ found, additional heuristics (depending on the RD level and rskip mode) are used to decide whether
20
+ to terminate recursion. The following table summarizes the behavior.
21
+
22
+ +----------+------------+----------------------------------------------------------------+
23
+ | RD Level | Rskip Mode | Skip Recursion Heuristic |
24
+ +==========+============+================================================================+
25
+ | 0 - 4 | 1 | Neighbour costs and CU homogenity. |
26
+ +----------+------------+----------------------------------------------------------------+
27
+ | 5 - 6 | 1 | Comparison with inter2Nx2N. |
28
+ +----------+------------+----------------------------------------------------------------+
29
+ | 0 - 6 | 2 | CU edge density. |
30
+ +----------+------------+----------------------------------------------------------------+
31
+
32
+ Provides minimal quality degradation at good performance gains for non-zero modes.
33
+ :option:`--rskip mode 0` means disabled. Default: 1, disabled when :option:`--tune grain` is used.
34
+ This is a integer value representing the edge-density percentage within the CU. Internally normalized to a number between 0.0 to 1.0 in x265.
35
+ Recommended low thresholds for slow encodes and high for fast encodes.
36
37
- This option determines early exit from CU depth recursion. When a skip CU is
38
- found, additional heuristics (depending on rd-level) are used to decide whether
39
- to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used,
40
- while at rdlevels 4 and neighbour costs are used to skip recursion.
41
- Provides minimal quality degradation at good performance gains when enabled.
42
+.. option:: --rskip-edge-threshold <0..100>
43
44
- Default: enabled, disabled for :option:`--tune grain`
45
+ Denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped.
46
+ Default: 5, requires :option:`--rskip mode 2` to be enabled.
47
48
.. option:: --splitrd-skip, --no-splitrd-skip
49
50
51
--recon-y4m-exec "ffplay -i pipe:0 -autoexit"
52
53
**CLI ONLY**
54
+
55
+ABR-ladder Options
56
+==================
57
+
58
+.. option:: --abr-ladder <filename>
59
+
60
+ File containing the encoder configurations to generate ABR ladder.
61
+ The format of each line is:
62
+
63
+ **<encID:reuse-level:refID> <CLI>**
64
+
65
+ where, encID indicates the unique name given to the encode, refID indicates
66
+ the name of the encode from which analysis info has to be re-used ( set to 'nil'
67
+ if analysis reuse isn't preferred ), and reuse-level indicates the level ( :option:`--analysis-load-reuse-level`)
68
+ at which analysis info has to be reused.
69
+
70
+ A sample config file is available in `the downloads page <https://bitbucket.org/multicoreware/x265/downloads/Sample_ABR_ladder_config>`_
71
+
72
+ Default: Disabled ( Conventional single encode generation ). Experimental feature.
73
+
74
+ **CLI ONLY**
75
+
76
77
SVT-HEVC Encoder Options
78
========================
79
x265_3.3.tar.gz/doc/reST/releasenotes.rst -> x265_3.4.tar.gz/doc/reST/releasenotes.rst
Changed
34
1
2
Release Notes
3
*************
4
5
+Version 3.4
6
+===========
7
+
8
+Release date - 29th May, 2020.
9
+
10
+New features
11
+------------
12
+1. **Edge-aware quadtree partitioning** to terminate CU depth recursion based on edge information. :option:`--rskip` level 2 enables the feature and :option:`--rskip-edge-threshold` denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. Experimental feature.
13
+2. Application-level feature :option:`--abr-ladder` for automating efficient ABR ladder generation. Shows ~65% savings in the over-all turn-around time required for the generation of a typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz over a sequential ABR-ladder generation approach that leverages save-load architecture.
14
+
15
+Enhancements to existing features
16
+---------------------------------
17
+1. Improved efficiency in 2-pass rate-control algorithm. The savings in the bitrate is ~1.72% with visual improvement in quality in the initial 1-2 secs.
18
+
19
+Encoder enhancements
20
+--------------------
21
+1. Faster ARM64 encodes enabled by ASM contributions from Huawei. The speed-up over no-asm version for 1080p encodes @ medium preset is ~15% in a 16 core H/W.
22
+2. Strict VBV conformance in zone encoding.
23
+
24
+Bug fixes
25
+---------
26
+1. Multi-pass encode failures with :option:`--frame-dup`.
27
+2. Corrupted bitstreams with :option:`--hist-scenecut` when input depth and internal bit-depth differ.
28
+3. Incorrect analysis propagation in multi-level save-load architecture.
29
+4. Failure in detecting NUMA packages installed in non-standard directories.
30
+
31
Version 3.3
32
===========
33
34
x265_3.3.tar.gz/source/CMakeLists.txt -> x265_3.4.tar.gz/source/CMakeLists.txt
Changed
109
1
2
option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
3
mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
4
# X265_BUILD must be incremented each time the public API is changed
5
-set(X265_BUILD 188)
6
+set(X265_BUILD 192)
7
configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
8
"${PROJECT_BINARY_DIR}/x265.def")
9
configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
10
11
# System architecture detection
12
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC)
13
set(X86_ALIASES x86 i386 i686 x86_64 amd64)
14
-set(ARM_ALIASES armv6l armv7l)
15
+set(ARM_ALIASES armv6l armv7l aarch64)
16
list(FIND X86_ALIASES "${SYSPROC}" X86MATCH)
17
list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH)
18
set(POWER_ALIASES ppc64 ppc64le)
19
20
else()
21
set(CROSS_COMPILE_ARM 0)
22
endif()
23
- message(STATUS "Detected ARM target processor")
24
set(ARM 1)
25
- add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1)
26
+ if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
27
+ message(STATUS "Detected ARM64 target processor")
28
+ set(ARM64 1)
29
+ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0)
30
+ else()
31
+ message(STATUS "Detected ARM target processor")
32
+ add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1)
33
+ endif()
34
else()
35
message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
36
message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
37
38
if(NUMA_FOUND)
39
link_directories(${NUMA_LIBRARY_DIR})
40
list(APPEND CMAKE_REQUIRED_LIBRARIES numa)
41
+ list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR})
42
+ list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}")
43
check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2)
44
if(NUMA_V2)
45
add_definitions(-DHAVE_LIBNUMA)
46
47
endif()
48
endif()
49
if(ARM AND CROSS_COMPILE_ARM)
50
- set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
51
+ if(ARM64)
52
+ set(ARM_ARGS -fPIC)
53
+ else()
54
+ set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
55
+ endif()
56
+ message(STATUS "cross compile arm")
57
elseif(ARM)
58
- find_package(Neon)
59
- if(CPU_HAS_NEON)
60
- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
61
+ if(ARM64)
62
+ set(ARM_ARGS -fPIC)
63
add_definitions(-DHAVE_NEON)
64
else()
65
- set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
66
+ find_package(Neon)
67
+ if(CPU_HAS_NEON)
68
+ set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
69
+ add_definitions(-DHAVE_NEON)
70
+ else()
71
+ set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
72
+ endif()
73
endif()
74
endif()
75
add_definitions(${ARM_ARGS})
76
77
# compile ARM arch asm files here
78
enable_language(ASM)
79
foreach(ASM ${ARM_ASMS})
80
- set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})
81
+ if(ARM64)
82
+ set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/aarch64/${ASM})
83
+ else()
84
+ set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})
85
+ endif()
86
list(APPEND ASM_SRCS ${ASM_SRC})
87
list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
88
add_custom_command(
89
90
# Xcode seems unable to link the CLI with libs, so link as one targget
91
if(ENABLE_HDR10_PLUS)
92
add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
93
- x265.cpp x265.h x265cli.h
94
+ x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h
95
$<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS})
96
else()
97
add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
98
- x265.cpp x265.h x265cli.h
99
+ x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h
100
$<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS})
101
endif()
102
else()
103
add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
104
- ${ExportDefs} x265.cpp x265.h x265cli.h)
105
+ ${ExportDefs} x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h)
106
if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX)
107
# The CLI cannot link to the shared library on Windows, it
108
# requires internal APIs not exported from the DLL
109
x265_3.4.tar.gz/source/abrEncApp.cpp
Added
201
1
2
+/*****************************************************************************
3
+* Copyright (C) 2013-2020 MulticoreWare, Inc
4
+*
5
+* Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+* Aruna Matheswaran <aruna@multicorewareinc.com>
7
+*
8
+* This program is free software; you can redistribute it and/or modify
9
+* it under the terms of the GNU General Public License as published by
10
+* the Free Software Foundation; either version 2 of the License, or
11
+* (at your option) any later version.
12
+*
13
+* This program is distributed in the hope that it will be useful,
14
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+* GNU General Public License for more details.
17
+*
18
+* You should have received a copy of the GNU General Public License
19
+* along with this program; if not, write to the Free Software
20
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+*
22
+* This program is also available under a commercial proprietary license.
23
+* For more information, contact us at license @ x265.com.
24
+*****************************************************************************/
25
+
26
+#include "abrEncApp.h"
27
+#include "mv.h"
28
+#include "slice.h"
29
+#include "param.h"
30
+
31
+#include <signal.h>
32
+#include <errno.h>
33
+
34
+#include <queue>
35
+
36
+using namespace X265_NS;
37
+
38
+/* Ctrl-C handler */
39
+static volatile sig_atomic_t b_ctrl_c /* = 0 */;
40
+static void sigint_handler(int)
41
+{
42
+ b_ctrl_c = 1;
43
+}
44
+
45
+namespace X265_NS {
46
+ // private namespace
47
+#define X265_INPUT_QUEUE_SIZE 250
48
+
49
+ AbrEncoder::AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int &ret)
50
+ {
51
+ m_numEncodes = numEncodes;
52
+ m_numActiveEncodes.set(numEncodes);
53
+ m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1;
54
+ m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes);
55
+
56
+ for (uint8_t i = 0; i < m_numEncodes; i++)
57
+ {
58
+ m_passEnc[i] = new PassEncoder(i, cliopt[i], this);
59
+ if (!m_passEnc[i])
60
+ {
61
+ x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n");
62
+ ret = 4;
63
+ }
64
+ m_passEnc[i]->init(ret);
65
+ }
66
+
67
+ if (!allocBuffers())
68
+ {
69
+ x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n");
70
+ ret = 4;
71
+ }
72
+
73
+ /* start passEncoder worker threads */
74
+ for (uint8_t pass = 0; pass < m_numEncodes; pass++)
75
+ m_passEnc[pass]->startThreads();
76
+ }
77
+
78
+ bool AbrEncoder::allocBuffers()
79
+ {
80
+ m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes);
81
+ m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes);
82
+
83
+ m_picWriteCnt = new ThreadSafeInteger[m_numEncodes];
84
+ m_picReadCnt = new ThreadSafeInteger[m_numEncodes];
85
+ m_analysisWriteCnt = new ThreadSafeInteger[m_numEncodes];
86
+ m_analysisReadCnt = new ThreadSafeInteger[m_numEncodes];
87
+
88
+ m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
89
+ m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
90
+ m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
91
+ m_readFlag = X265_MALLOC(int*, m_numEncodes);
92
+
93
+ for (uint8_t pass = 0; pass < m_numEncodes; pass++)
94
+ {
95
+ m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*, m_queueSize);
96
+ for (uint32_t idx = 0; idx < m_queueSize; idx++)
97
+ {
98
+ m_inputPicBuffer[pass][idx] = x265_picture_alloc();
99
+ x265_picture_init(m_passEnc[pass]->m_param, m_inputPicBuffer[pass][idx]);
100
+ }
101
+
102
+ m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data, m_queueSize);
103
+ m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize];
104
+ m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize];
105
+ m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize];
106
+ m_readFlag[pass] = X265_MALLOC(int, m_queueSize);
107
+ }
108
+ return true;
109
+ }
110
+
111
+ void AbrEncoder::destroy()
112
+ {
113
+ x265_cleanup(); /* Free library singletons */
114
+ for (uint8_t pass = 0; pass < m_numEncodes; pass++)
115
+ {
116
+ for (uint32_t index = 0; index < m_queueSize; index++)
117
+ {
118
+ X265_FREE(m_inputPicBuffer[pass][index]->planes[0]);
119
+ x265_picture_free(m_inputPicBuffer[pass][index]);
120
+ }
121
+
122
+ X265_FREE(m_inputPicBuffer[pass]);
123
+ X265_FREE(m_analysisBuffer[pass]);
124
+ X265_FREE(m_readFlag[pass]);
125
+ delete[] m_picIdxReadCnt[pass];
126
+ delete[] m_analysisWrite[pass];
127
+ delete[] m_analysisRead[pass];
128
+ m_passEnc[pass]->destroy();
129
+ delete m_passEnc[pass];
130
+ }
131
+ X265_FREE(m_inputPicBuffer);
132
+ X265_FREE(m_analysisBuffer);
133
+ X265_FREE(m_readFlag);
134
+
135
+ delete[] m_picWriteCnt;
136
+ delete[] m_picReadCnt;
137
+ delete[] m_analysisWriteCnt;
138
+ delete[] m_analysisReadCnt;
139
+
140
+ X265_FREE(m_picIdxReadCnt);
141
+ X265_FREE(m_analysisWrite);
142
+ X265_FREE(m_analysisRead);
143
+
144
+ X265_FREE(m_passEnc);
145
+ }
146
+
147
+ PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent)
148
+ {
149
+ m_id = id;
150
+ m_cliopt = cliopt;
151
+ m_parent = parent;
152
+ if(!(m_cliopt.enableScaler && m_id))
153
+ m_input = m_cliopt.input;
154
+ m_param = cliopt.param;
155
+ m_inputOver = false;
156
+ m_lastIdx = -1;
157
+ m_encoder = NULL;
158
+ m_scaler = NULL;
159
+ m_reader = NULL;
160
+ m_ret = 0;
161
+ }
162
+
163
+ int PassEncoder::init(int &result)
164
+ {
165
+ if (m_parent->m_numEncodes > 1)
166
+ setReuseLevel();
167
+
168
+ if (!(m_cliopt.enableScaler && m_id))
169
+ m_reader = new Reader(m_id, this);
170
+ else
171
+ {
172
+ VideoDesc *src = NULL, *dst = NULL;
173
+ dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth);
174
+ int dstW = m_parent->m_passEnc[m_id - 1]->m_param->sourceWidth;
175
+ int dstH = m_parent->m_passEnc[m_id - 1]->m_param->sourceHeight;
176
+ src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth);
177
+ if (src != NULL && dst != NULL)
178
+ {
179
+ m_scaler = new Scaler(0, 1, m_id, src, dst, this);
180
+ if (!m_scaler)
181
+ {
182
+ x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler");
183
+ result = 4;
184
+ }
185
+ }
186
+ }
187
+
188
+ /* note: we could try to acquire a different libx265 API here based on
189
+ * the profile found during option parsing, but it must be done before
190
+ * opening an encoder */
191
+
192
+ if (m_param)
193
+ m_encoder = m_cliopt.api->encoder_open(m_param);
194
+ if (!m_encoder)
195
+ {
196
+ x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n");
197
+ m_ret = 2;
198
+ return -1;
199
+ }
200
+
201
x265_3.4.tar.gz/source/abrEncApp.h
Added
155
1
2
+/*****************************************************************************
3
+* Copyright (C) 2013-2020 MulticoreWare, Inc
4
+*
5
+* Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+* Aruna Matheswaran <aruna@multicorewareinc.com>
7
+*
8
+*
9
+* This program is free software; you can redistribute it and/or modify
10
+* it under the terms of the GNU General Public License as published by
11
+* the Free Software Foundation; either version 2 of the License, or
12
+* (at your option) any later version.
13
+*
14
+* This program is distributed in the hope that it will be useful,
15
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+* GNU General Public License for more details.
18
+*
19
+* You should have received a copy of the GNU General Public License
20
+* along with this program; if not, write to the Free Software
21
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22
+*
23
+* This program is also available under a commercial proprietary license.
24
+* For more information, contact us at license @ x265.com.
25
+*****************************************************************************/
26
+
27
+#ifndef ABR_ENCODE_H
28
+#define ABR_ENCODE_H
29
+
30
+#include "x265.h"
31
+#include "scaler.h"
32
+#include "threading.h"
33
+#include "x265cli.h"
34
+
35
+namespace X265_NS {
36
+ // private namespace
37
+
38
+ class PassEncoder;
39
+ class Scaler;
40
+ class Reader;
41
+
42
+ class AbrEncoder
43
+ {
44
+ public:
45
+ uint8_t m_numEncodes;
46
+ PassEncoder **m_passEnc;
47
+ uint32_t m_queueSize;
48
+ ThreadSafeInteger m_numActiveEncodes;
49
+
50
+ x265_picture ***m_inputPicBuffer; //[numEncodes][queueSize]
51
+ x265_analysis_data **m_analysisBuffer; //[numEncodes][queueSize]
52
+ int **m_readFlag;
53
+
54
+ ThreadSafeInteger *m_picWriteCnt;
55
+ ThreadSafeInteger *m_picReadCnt;
56
+ ThreadSafeInteger **m_picIdxReadCnt;
57
+ ThreadSafeInteger *m_analysisWriteCnt; //[numEncodes][queueSize]
58
+ ThreadSafeInteger *m_analysisReadCnt; //[numEncodes][queueSize]
59
+ ThreadSafeInteger **m_analysisWrite; //[numEncodes][queueSize]
60
+ ThreadSafeInteger **m_analysisRead; //[numEncodes][queueSize]
61
+
62
+ AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int& ret);
63
+ bool allocBuffers();
64
+ void destroy();
65
+
66
+ };
67
+
68
+ class PassEncoder : public Thread
69
+ {
70
+ public:
71
+
72
+ uint32_t m_id;
73
+ x265_param *m_param;
74
+ AbrEncoder *m_parent;
75
+ x265_encoder *m_encoder;
76
+ Reader *m_reader;
77
+ Scaler *m_scaler;
78
+ bool m_inputOver;
79
+
80
+ int m_threadActive;
81
+ int m_lastIdx;
82
+ uint32_t m_outputNalsCount;
83
+
84
+ x265_picture **m_inputPicBuffer;
85
+ x265_analysis_data **m_analysisBuffer;
86
+ x265_nal **m_outputNals;
87
+ x265_picture **m_outputRecon;
88
+
89
+ CLIOptions m_cliopt;
90
+ InputFile* m_input;
91
+ const char* m_reconPlayCmd;
92
+ FILE* m_qpfile;
93
+ FILE* m_zoneFile;
94
+ FILE* m_dolbyVisionRpu;/* File containing Dolby Vision BL RPU metadata */
95
+
96
+ int m_ret;
97
+
98
+ PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent);
99
+ int init(int &result);
100
+ void setReuseLevel();
101
+
102
+ void startThreads();
103
+ void copyInfo(x265_analysis_data *src);
104
+
105
+ bool readPicture(x265_picture*);
106
+ void destroy();
107
+
108
+ private:
109
+ void threadMain();
110
+ };
111
+
112
+ class Scaler : public Thread
113
+ {
114
+ public:
115
+ PassEncoder *m_parentEnc;
116
+ int m_id;
117
+ int m_scalePlanes[3];
118
+ int m_scaleFrameSize;
119
+ uint32_t m_threadId;
120
+ uint32_t m_threadTotal;
121
+ ThreadSafeInteger m_scaledWriteCnt;
122
+ VideoDesc* m_srcFormat;
123
+ VideoDesc* m_dstFormat;
124
+ int m_threadActive;
125
+ ScalerFilterManager* m_filterManager;
126
+
127
+ Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc * dst, PassEncoder *parentEnc);
128
+ bool scalePic(x265_picture *destination, x265_picture *source);
129
+ void threadMain();
130
+ void destroy()
131
+ {
132
+ if (m_filterManager)
133
+ {
134
+ delete m_filterManager;
135
+ m_filterManager = NULL;
136
+ }
137
+ }
138
+ };
139
+
140
+ class Reader : public Thread
141
+ {
142
+ public:
143
+ PassEncoder *m_parentEnc;
144
+ int m_id;
145
+ InputFile* m_input;
146
+ int m_threadActive;
147
+
148
+ Reader(int id, PassEncoder *parentEnc);
149
+ void threadMain();
150
+ };
151
+}
152
+
153
+#endif // ifndef ABR_ENCODE_H
154
+#pragma once
155
x265_3.3.tar.gz/source/common/CMakeLists.txt -> x265_3.4.tar.gz/source/common/CMakeLists.txt
Changed
59
1
2
endif(EXTRA_LIB)
3
4
if(ENABLE_ASSEMBLY)
5
- set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1)
6
+ set_source_files_properties(threading.cpp primitives.cpp pixel.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1)
7
list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1")
8
endif(ENABLE_ASSEMBLY)
9
10
11
endif(ENABLE_ASSEMBLY AND X86)
12
13
if(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM))
14
- set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
15
+ if(ARM64)
16
+ if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3"))
17
+ message(STATUS "Detected CXX compiler using -O3 optimization level")
18
+ add_definitions(-DAUTO_VECTORIZE=1)
19
+ endif()
20
+ set(C_SRCS asm-primitives.cpp pixel.h ipfilter8.h)
21
22
- # add ARM assembly/intrinsic files here
23
- set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S)
24
- set(VEC_PRIMITIVES)
25
+ # add ARM assembly/intrinsic files here
26
+ set(A_SRCS asm.S mc-a.S sad-a.S pixel-util.S ipfilter8.S)
27
+ set(VEC_PRIMITIVES)
28
29
- set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
30
- foreach(SRC ${C_SRCS})
31
- set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC})
32
- endforeach()
33
+ set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
34
+ foreach(SRC ${C_SRCS})
35
+ set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
36
+ endforeach()
37
+ else()
38
+ set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
39
+
40
+ # add ARM assembly/intrinsic files here
41
+ set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S)
42
+ set(VEC_PRIMITIVES)
43
+
44
+ set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
45
+ foreach(SRC ${C_SRCS})
46
+ set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC})
47
+ endforeach()
48
+ endif()
49
source_group(Assembly FILES ${ASM_PRIMITIVES})
50
endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM))
51
52
53
predict.cpp predict.h
54
scalinglist.cpp scalinglist.h
55
quant.cpp quant.h contexts.h
56
- deblock.cpp deblock.h)
57
+ deblock.cpp deblock.h
58
+ scaler.cpp scaler.h)
59
x265_3.4.tar.gz/source/common/aarch64/asm-primitives.cpp
Added
201
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ * Yimeng Su <yimeng.su@huawei.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+
26
+#include "common.h"
27
+#include "primitives.h"
28
+#include "x265.h"
29
+#include "cpu.h"
30
+
31
+
32
+#if defined(__GNUC__)
33
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
34
+#endif
35
+
36
+#define GCC_4_9_0 40900
37
+#define GCC_5_1_0 50100
38
+
39
+extern "C" {
40
+#include "pixel.h"
41
+#include "pixel-util.h"
42
+#include "ipfilter8.h"
43
+}
44
+
45
+namespace X265_NS {
46
+// private x265 namespace
47
+
48
+
49
+template<int size>
50
+void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
51
+{
52
+ ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
53
+ const int halfFilterSize = NTAPS_LUMA >> 1;
54
+ const int immedStride = MAX_CU_SIZE;
55
+
56
+ primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1);
57
+ primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY);
58
+}
59
+
60
+
61
+/* Temporary workaround because luma_vsp assembly primitive has not been completed
62
+ * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive.
63
+ * Otherwise, segment fault occurs. */
64
+void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask)
65
+{
66
+ if (cpuMask & X265_CPU_NEON)
67
+ {
68
+ asmp.pu[LUMA_8x4].luma_vsp = cp.pu[LUMA_8x4].luma_vsp;
69
+ asmp.pu[LUMA_8x8].luma_vsp = cp.pu[LUMA_8x8].luma_vsp;
70
+ asmp.pu[LUMA_8x16].luma_vsp = cp.pu[LUMA_8x16].luma_vsp;
71
+ asmp.pu[LUMA_8x32].luma_vsp = cp.pu[LUMA_8x32].luma_vsp;
72
+ asmp.pu[LUMA_12x16].luma_vsp = cp.pu[LUMA_12x16].luma_vsp;
73
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */
74
+ asmp.pu[LUMA_16x4].luma_vsp = cp.pu[LUMA_16x4].luma_vsp;
75
+ asmp.pu[LUMA_16x8].luma_vsp = cp.pu[LUMA_16x8].luma_vsp;
76
+ asmp.pu[LUMA_16x12].luma_vsp = cp.pu[LUMA_16x12].luma_vsp;
77
+ asmp.pu[LUMA_16x16].luma_vsp = cp.pu[LUMA_16x16].luma_vsp;
78
+ asmp.pu[LUMA_16x32].luma_vsp = cp.pu[LUMA_16x32].luma_vsp;
79
+ asmp.pu[LUMA_16x64].luma_vsp = cp.pu[LUMA_16x64].luma_vsp;
80
+ asmp.pu[LUMA_32x16].luma_vsp = cp.pu[LUMA_32x16].luma_vsp;
81
+ asmp.pu[LUMA_32x24].luma_vsp = cp.pu[LUMA_32x24].luma_vsp;
82
+ asmp.pu[LUMA_32x32].luma_vsp = cp.pu[LUMA_32x32].luma_vsp;
83
+ asmp.pu[LUMA_32x64].luma_vsp = cp.pu[LUMA_32x64].luma_vsp;
84
+ asmp.pu[LUMA_48x64].luma_vsp = cp.pu[LUMA_48x64].luma_vsp;
85
+ asmp.pu[LUMA_64x16].luma_vsp = cp.pu[LUMA_64x16].luma_vsp;
86
+ asmp.pu[LUMA_64x32].luma_vsp = cp.pu[LUMA_64x32].luma_vsp;
87
+ asmp.pu[LUMA_64x48].luma_vsp = cp.pu[LUMA_64x48].luma_vsp;
88
+ asmp.pu[LUMA_64x64].luma_vsp = cp.pu[LUMA_64x64].luma_vsp;
89
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */
90
+ asmp.pu[LUMA_4x4].luma_vsp = cp.pu[LUMA_4x4].luma_vsp;
91
+ asmp.pu[LUMA_4x8].luma_vsp = cp.pu[LUMA_4x8].luma_vsp;
92
+ asmp.pu[LUMA_4x16].luma_vsp = cp.pu[LUMA_4x16].luma_vsp;
93
+ asmp.pu[LUMA_24x32].luma_vsp = cp.pu[LUMA_24x32].luma_vsp;
94
+ asmp.pu[LUMA_32x8].luma_vsp = cp.pu[LUMA_32x8].luma_vsp;
95
+#endif
96
+#endif
97
+ }
98
+}
99
+
100
+
101
+void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask)
102
+{
103
+ if (cpuMask & X265_CPU_NEON)
104
+ {
105
+ p.pu[LUMA_4x4].satd = PFX(pixel_satd_4x4_neon);
106
+ p.pu[LUMA_4x8].satd = PFX(pixel_satd_4x8_neon);
107
+ p.pu[LUMA_4x16].satd = PFX(pixel_satd_4x16_neon);
108
+ p.pu[LUMA_8x4].satd = PFX(pixel_satd_8x4_neon);
109
+ p.pu[LUMA_8x8].satd = PFX(pixel_satd_8x8_neon);
110
+ p.pu[LUMA_12x16].satd = PFX(pixel_satd_12x16_neon);
111
+
112
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd = PFX(pixel_satd_4x4_neon);
113
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd = PFX(pixel_satd_4x8_neon);
114
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd = PFX(pixel_satd_4x16_neon);
115
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd = PFX(pixel_satd_8x4_neon);
116
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd = PFX(pixel_satd_8x8_neon);
117
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd = PFX(pixel_satd_12x16_neon);
118
+
119
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd = PFX(pixel_satd_4x4_neon);
120
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd = PFX(pixel_satd_4x8_neon);
121
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd = PFX(pixel_satd_4x16_neon);
122
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd = PFX(pixel_satd_4x32_neon);
123
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd = PFX(pixel_satd_8x4_neon);
124
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd = PFX(pixel_satd_8x8_neon);
125
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd = PFX(pixel_satd_12x32_neon);
126
+
127
+ p.pu[LUMA_4x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x4_neon);
128
+ p.pu[LUMA_4x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x8_neon);
129
+ p.pu[LUMA_4x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_4x16_neon);
130
+ p.pu[LUMA_8x4].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x4_neon);
131
+ p.pu[LUMA_8x8].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x8_neon);
132
+ p.pu[LUMA_8x16].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x16_neon);
133
+ p.pu[LUMA_8x32].pixelavg_pp[NONALIGNED] = PFX(pixel_avg_pp_8x32_neon);
134
+
135
+ p.pu[LUMA_4x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x4_neon);
136
+ p.pu[LUMA_4x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x8_neon);
137
+ p.pu[LUMA_4x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_4x16_neon);
138
+ p.pu[LUMA_8x4].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x4_neon);
139
+ p.pu[LUMA_8x8].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x8_neon);
140
+ p.pu[LUMA_8x16].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x16_neon);
141
+ p.pu[LUMA_8x32].pixelavg_pp[ALIGNED] = PFX(pixel_avg_pp_8x32_neon);
142
+
143
+ p.pu[LUMA_8x4].sad_x3 = PFX(sad_x3_8x4_neon);
144
+ p.pu[LUMA_8x8].sad_x3 = PFX(sad_x3_8x8_neon);
145
+ p.pu[LUMA_8x16].sad_x3 = PFX(sad_x3_8x16_neon);
146
+ p.pu[LUMA_8x32].sad_x3 = PFX(sad_x3_8x32_neon);
147
+
148
+ p.pu[LUMA_8x4].sad_x4 = PFX(sad_x4_8x4_neon);
149
+ p.pu[LUMA_8x8].sad_x4 = PFX(sad_x4_8x8_neon);
150
+ p.pu[LUMA_8x16].sad_x4 = PFX(sad_x4_8x16_neon);
151
+ p.pu[LUMA_8x32].sad_x4 = PFX(sad_x4_8x32_neon);
152
+
153
+ // quant
154
+ p.quant = PFX(quant_neon);
155
+ // luma_hps
156
+ p.pu[LUMA_4x4].luma_hps = PFX(interp_8tap_horiz_ps_4x4_neon);
157
+ p.pu[LUMA_4x8].luma_hps = PFX(interp_8tap_horiz_ps_4x8_neon);
158
+ p.pu[LUMA_4x16].luma_hps = PFX(interp_8tap_horiz_ps_4x16_neon);
159
+ p.pu[LUMA_8x4].luma_hps = PFX(interp_8tap_horiz_ps_8x4_neon);
160
+ p.pu[LUMA_8x8].luma_hps = PFX(interp_8tap_horiz_ps_8x8_neon);
161
+ p.pu[LUMA_8x16].luma_hps = PFX(interp_8tap_horiz_ps_8x16_neon);
162
+ p.pu[LUMA_8x32].luma_hps = PFX(interp_8tap_horiz_ps_8x32_neon);
163
+ p.pu[LUMA_12x16].luma_hps = PFX(interp_8tap_horiz_ps_12x16_neon);
164
+ p.pu[LUMA_24x32].luma_hps = PFX(interp_8tap_horiz_ps_24x32_neon);
165
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */
166
+ p.pu[LUMA_16x4].luma_hps = PFX(interp_8tap_horiz_ps_16x4_neon);
167
+ p.pu[LUMA_16x8].luma_hps = PFX(interp_8tap_horiz_ps_16x8_neon);
168
+ p.pu[LUMA_16x12].luma_hps = PFX(interp_8tap_horiz_ps_16x12_neon);
169
+ p.pu[LUMA_16x16].luma_hps = PFX(interp_8tap_horiz_ps_16x16_neon);
170
+ p.pu[LUMA_16x32].luma_hps = PFX(interp_8tap_horiz_ps_16x32_neon);
171
+ p.pu[LUMA_16x64].luma_hps = PFX(interp_8tap_horiz_ps_16x64_neon);
172
+ p.pu[LUMA_32x8].luma_hps = PFX(interp_8tap_horiz_ps_32x8_neon);
173
+ p.pu[LUMA_32x16].luma_hps = PFX(interp_8tap_horiz_ps_32x16_neon);
174
+ p.pu[LUMA_32x24].luma_hps = PFX(interp_8tap_horiz_ps_32x24_neon);
175
+ p.pu[LUMA_32x32].luma_hps = PFX(interp_8tap_horiz_ps_32x32_neon);
176
+ p.pu[LUMA_32x64].luma_hps = PFX(interp_8tap_horiz_ps_32x64_neon);
177
+ p.pu[LUMA_48x64].luma_hps = PFX(interp_8tap_horiz_ps_48x64_neon);
178
+ p.pu[LUMA_64x16].luma_hps = PFX(interp_8tap_horiz_ps_64x16_neon);
179
+ p.pu[LUMA_64x32].luma_hps = PFX(interp_8tap_horiz_ps_64x32_neon);
180
+ p.pu[LUMA_64x48].luma_hps = PFX(interp_8tap_horiz_ps_64x48_neon);
181
+ p.pu[LUMA_64x64].luma_hps = PFX(interp_8tap_horiz_ps_64x64_neon);
182
+#endif
183
+
184
+ p.pu[LUMA_8x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x4>;
185
+ p.pu[LUMA_8x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x8>;
186
+ p.pu[LUMA_8x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x16>;
187
+ p.pu[LUMA_8x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_8x32>;
188
+ p.pu[LUMA_12x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_12x16>;
189
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */
190
+ p.pu[LUMA_16x4].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x4>;
191
+ p.pu[LUMA_16x8].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x8>;
192
+ p.pu[LUMA_16x12].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x12>;
193
+ p.pu[LUMA_16x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x16>;
194
+ p.pu[LUMA_16x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x32>;
195
+ p.pu[LUMA_16x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_16x64>;
196
+ p.pu[LUMA_32x16].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x16>;
197
+ p.pu[LUMA_32x24].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x24>;
198
+ p.pu[LUMA_32x32].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x32>;
199
+ p.pu[LUMA_32x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_32x64>;
200
+ p.pu[LUMA_48x64].luma_hvpp = interp_8tap_hv_pp_cpu<LUMA_48x64>;
201
x265_3.4.tar.gz/source/common/aarch64/asm.S
Added
71
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+.arch armv8-a
26
+
27
+#ifdef PREFIX
28
+#define EXTERN_ASM _
29
+#else
30
+#define EXTERN_ASM
31
+#endif
32
+
33
+#ifdef __ELF__
34
+#define ELF
35
+#else
36
+#define ELF @
37
+#endif
38
+
39
+#define HAVE_AS_FUNC 1
40
+
41
+#if HAVE_AS_FUNC
42
+#define FUNC
43
+#else
44
+#define FUNC @
45
+#endif
46
+
47
+.macro function name, export=1
48
+ .macro endfunc
49
+ELF .size \name, . - \name
50
+FUNC .endfunc
51
+ .purgem endfunc
52
+ .endm
53
+ .align 2
54
+.if \export == 1
55
+ .global EXTERN_ASM\name
56
+ELF .hidden EXTERN_ASM\name
57
+ELF .type EXTERN_ASM\name, %function
58
+FUNC .func EXTERN_ASM\name
59
+EXTERN_ASM\name:
60
+.else
61
+ELF .hidden \name
62
+ELF .type \name, %function
63
+FUNC .func \name
64
+\name:
65
+.endif
66
+.endm
67
+
68
+
69
+#define FENC_STRIDE 64
70
+#define FDEC_STRIDE 32
71
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.S
Added
201
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#include "asm.S"
26
+
27
+.section .rodata
28
+
29
+.align 4
30
+
31
+.text
32
+
33
+
34
+
35
+.macro qpel_filter_0_32b
36
+ movi v24.8h, #64
37
+ uxtl v19.8h, v5.8b
38
+ smull v17.4s, v19.4h, v24.4h
39
+ smull2 v18.4s, v19.8h, v24.8h
40
+.endm
41
+
42
+.macro qpel_filter_1_32b
43
+ movi v16.8h, #58
44
+ uxtl v19.8h, v5.8b
45
+ smull v17.4s, v19.4h, v16.4h
46
+ smull2 v18.4s, v19.8h, v16.8h
47
+
48
+ movi v24.8h, #10
49
+ uxtl v21.8h, v1.8b
50
+ smull v19.4s, v21.4h, v24.4h
51
+ smull2 v20.4s, v21.8h, v24.8h
52
+
53
+ movi v16.8h, #17
54
+ uxtl v23.8h, v2.8b
55
+ smull v21.4s, v23.4h, v16.4h
56
+ smull2 v22.4s, v23.8h, v16.8h
57
+
58
+ movi v24.8h, #5
59
+ uxtl v1.8h, v6.8b
60
+ smull v23.4s, v1.4h, v24.4h
61
+ smull2 v16.4s, v1.8h, v24.8h
62
+
63
+ sub v17.4s, v17.4s, v19.4s
64
+ sub v18.4s, v18.4s, v20.4s
65
+
66
+ uxtl v1.8h, v4.8b
67
+ sshll v19.4s, v1.4h, #2
68
+ sshll2 v20.4s, v1.8h, #2
69
+
70
+ add v17.4s, v17.4s, v21.4s
71
+ add v18.4s, v18.4s, v22.4s
72
+
73
+ uxtl v1.8h, v0.8b
74
+ uxtl v2.8h, v3.8b
75
+ ssubl v21.4s, v2.4h, v1.4h
76
+ ssubl2 v22.4s, v2.8h, v1.8h
77
+
78
+ add v17.4s, v17.4s, v19.4s
79
+ add v18.4s, v18.4s, v20.4s
80
+ sub v21.4s, v21.4s, v23.4s
81
+ sub v22.4s, v22.4s, v16.4s
82
+ add v17.4s, v17.4s, v21.4s
83
+ add v18.4s, v18.4s, v22.4s
84
+.endm
85
+
86
+.macro qpel_filter_2_32b
87
+ movi v16.4s, #11
88
+ uxtl v19.8h, v5.8b
89
+ uxtl v20.8h, v2.8b
90
+ saddl v17.4s, v19.4h, v20.4h
91
+ saddl2 v18.4s, v19.8h, v20.8h
92
+
93
+ uxtl v21.8h, v1.8b
94
+ uxtl v22.8h, v6.8b
95
+ saddl v19.4s, v21.4h, v22.4h
96
+ saddl2 v20.4s, v21.8h, v22.8h
97
+
98
+ mul v19.4s, v19.4s, v16.4s
99
+ mul v20.4s, v20.4s, v16.4s
100
+
101
+ movi v16.4s, #40
102
+ mul v17.4s, v17.4s, v16.4s
103
+ mul v18.4s, v18.4s, v16.4s
104
+
105
+ uxtl v21.8h, v4.8b
106
+ uxtl v22.8h, v3.8b
107
+ saddl v23.4s, v21.4h, v22.4h
108
+ saddl2 v16.4s, v21.8h, v22.8h
109
+
110
+ uxtl v1.8h, v0.8b
111
+ uxtl v2.8h, v7.8b
112
+ saddl v21.4s, v1.4h, v2.4h
113
+ saddl2 v22.4s, v1.8h, v2.8h
114
+
115
+ shl v23.4s, v23.4s, #2
116
+ shl v16.4s, v16.4s, #2
117
+
118
+ add v19.4s, v19.4s, v21.4s
119
+ add v20.4s, v20.4s, v22.4s
120
+ add v17.4s, v17.4s, v23.4s
121
+ add v18.4s, v18.4s, v16.4s
122
+ sub v17.4s, v17.4s, v19.4s
123
+ sub v18.4s, v18.4s, v20.4s
124
+.endm
125
+
126
+.macro qpel_filter_3_32b
127
+ movi v16.8h, #17
128
+ movi v24.8h, #5
129
+
130
+ uxtl v19.8h, v5.8b
131
+ smull v17.4s, v19.4h, v16.4h
132
+ smull2 v18.4s, v19.8h, v16.8h
133
+
134
+ uxtl v21.8h, v1.8b
135
+ smull v19.4s, v21.4h, v24.4h
136
+ smull2 v20.4s, v21.8h, v24.8h
137
+
138
+ movi v16.8h, #58
139
+ uxtl v23.8h, v2.8b
140
+ smull v21.4s, v23.4h, v16.4h
141
+ smull2 v22.4s, v23.8h, v16.8h
142
+
143
+ movi v24.8h, #10
144
+ uxtl v1.8h, v6.8b
145
+ smull v23.4s, v1.4h, v24.4h
146
+ smull2 v16.4s, v1.8h, v24.8h
147
+
148
+ sub v17.4s, v17.4s, v19.4s
149
+ sub v18.4s, v18.4s, v20.4s
150
+
151
+ uxtl v1.8h, v3.8b
152
+ sshll v19.4s, v1.4h, #2
153
+ sshll2 v20.4s, v1.8h, #2
154
+
155
+ add v17.4s, v17.4s, v21.4s
156
+ add v18.4s, v18.4s, v22.4s
157
+
158
+ uxtl v1.8h, v4.8b
159
+ uxtl v2.8h, v7.8b
160
+ ssubl v21.4s, v1.4h, v2.4h
161
+ ssubl2 v22.4s, v1.8h, v2.8h
162
+
163
+ add v17.4s, v17.4s, v19.4s
164
+ add v18.4s, v18.4s, v20.4s
165
+ sub v21.4s, v21.4s, v23.4s
166
+ sub v22.4s, v22.4s, v16.4s
167
+ add v17.4s, v17.4s, v21.4s
168
+ add v18.4s, v18.4s, v22.4s
169
+.endm
170
+
171
+
172
+
173
+
174
+.macro vextin8
175
+ ld1 {v3.16b}, [x11], #16
176
+ mov v7.d[0], v3.d[1]
177
+ ext v0.8b, v3.8b, v7.8b, #1
178
+ ext v4.8b, v3.8b, v7.8b, #2
179
+ ext v1.8b, v3.8b, v7.8b, #3
180
+ ext v5.8b, v3.8b, v7.8b, #4
181
+ ext v2.8b, v3.8b, v7.8b, #5
182
+ ext v6.8b, v3.8b, v7.8b, #6
183
+ ext v3.8b, v3.8b, v7.8b, #7
184
+.endm
185
+
186
+
187
+
188
+// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
189
+.macro HPS_FILTER a b filterhps
190
+ mov w12, #8192
191
+ mov w6, w10
192
+ sub x3, x3, #\a
193
+ lsl x3, x3, #1
194
+ mov w9, #\a
195
+ cmp w9, #4
196
+ b.eq 14f
197
+ cmp w9, #12
198
+ b.eq 15f
199
+ b 7f
200
+14:
201
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.h
Added
57
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#ifndef X265_IPFILTER8_AARCH64_H
26
+#define X265_IPFILTER8_AARCH64_H
27
+
28
+
29
+void x265_interp_8tap_horiz_ps_4x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
30
+void x265_interp_8tap_horiz_ps_4x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
31
+void x265_interp_8tap_horiz_ps_4x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
32
+void x265_interp_8tap_horiz_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
33
+void x265_interp_8tap_horiz_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
34
+void x265_interp_8tap_horiz_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
35
+void x265_interp_8tap_horiz_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
36
+void x265_interp_8tap_horiz_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
37
+void x265_interp_8tap_horiz_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
38
+void x265_interp_8tap_horiz_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
39
+void x265_interp_8tap_horiz_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
40
+void x265_interp_8tap_horiz_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
41
+void x265_interp_8tap_horiz_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
42
+void x265_interp_8tap_horiz_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
43
+void x265_interp_8tap_horiz_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
44
+void x265_interp_8tap_horiz_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
45
+void x265_interp_8tap_horiz_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
46
+void x265_interp_8tap_horiz_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
47
+void x265_interp_8tap_horiz_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
48
+void x265_interp_8tap_horiz_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
49
+void x265_interp_8tap_horiz_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
50
+void x265_interp_8tap_horiz_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
51
+void x265_interp_8tap_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
52
+void x265_interp_8tap_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
53
+void x265_interp_8tap_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
54
+
55
+
56
+#endif // ifndef X265_IPFILTER8_AARCH64_H
57
x265_3.4.tar.gz/source/common/aarch64/mc-a.S
Added
65
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#include "asm.S"
26
+
27
+.section .rodata
28
+
29
+.align 4
30
+
31
+.text
32
+
33
+.macro pixel_avg_pp_4xN_neon h
34
+function x265_pixel_avg_pp_4x\h\()_neon
35
+.rept \h
36
+ ld1 {v0.s}[0], [x2], x3
37
+ ld1 {v1.s}[0], [x4], x5
38
+ urhadd v2.8b, v0.8b, v1.8b
39
+ st1 {v2.s}[0], [x0], x1
40
+.endr
41
+ ret
42
+endfunc
43
+.endm
44
+
45
+pixel_avg_pp_4xN_neon 4
46
+pixel_avg_pp_4xN_neon 8
47
+pixel_avg_pp_4xN_neon 16
48
+
49
+.macro pixel_avg_pp_8xN_neon h
50
+function x265_pixel_avg_pp_8x\h\()_neon
51
+.rept \h
52
+ ld1 {v0.8b}, [x2], x3
53
+ ld1 {v1.8b}, [x4], x5
54
+ urhadd v2.8b, v0.8b, v1.8b
55
+ st1 {v2.8b}, [x0], x1
56
+.endr
57
+ ret
58
+endfunc
59
+.endm
60
+
61
+pixel_avg_pp_8xN_neon 4
62
+pixel_avg_pp_8xN_neon 8
63
+pixel_avg_pp_8xN_neon 16
64
+pixel_avg_pp_8xN_neon 32
65
x265_3.4.tar.gz/source/common/aarch64/pixel-util.S
Added
201
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ * Hongbin Liu <liuhongbin1@huawei.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+
26
+#include "asm.S"
27
+
28
+.section .rodata
29
+
30
+.align 4
31
+
32
+.text
33
+
34
+.macro x265_satd_4x8_8x4_end_neon
35
+ add v0.8h, v4.8h, v6.8h
36
+ add v1.8h, v5.8h, v7.8h
37
+ sub v2.8h, v4.8h, v6.8h
38
+ sub v3.8h, v5.8h, v7.8h
39
+
40
+ trn1 v16.8h, v0.8h, v1.8h
41
+ trn2 v17.8h, v0.8h, v1.8h
42
+ add v4.8h, v16.8h, v17.8h
43
+ trn1 v18.8h, v2.8h, v3.8h
44
+ trn2 v19.8h, v2.8h, v3.8h
45
+ sub v5.8h, v16.8h, v17.8h
46
+ add v6.8h, v18.8h, v19.8h
47
+ sub v7.8h, v18.8h, v19.8h
48
+ trn1 v0.4s, v4.4s, v6.4s
49
+ trn2 v2.4s, v4.4s, v6.4s
50
+ abs v0.8h, v0.8h
51
+ trn1 v1.4s, v5.4s, v7.4s
52
+ trn2 v3.4s, v5.4s, v7.4s
53
+ abs v2.8h, v2.8h
54
+ abs v1.8h, v1.8h
55
+ abs v3.8h, v3.8h
56
+ umax v0.8h, v0.8h, v2.8h
57
+ umax v1.8h, v1.8h, v3.8h
58
+ add v0.8h, v0.8h, v1.8h
59
+ uaddlv s0, v0.8h
60
+.endm
61
+
62
+.macro pixel_satd_4x8_neon
63
+ ld1r {v1.2s}, [x2], x3
64
+ ld1r {v0.2s}, [x0], x1
65
+ ld1r {v3.2s}, [x2], x3
66
+ ld1r {v2.2s}, [x0], x1
67
+ ld1r {v5.2s}, [x2], x3
68
+ ld1r {v4.2s}, [x0], x1
69
+ ld1r {v7.2s}, [x2], x3
70
+ ld1r {v6.2s}, [x0], x1
71
+
72
+ ld1 {v1.s}[1], [x2], x3
73
+ ld1 {v0.s}[1], [x0], x1
74
+ usubl v0.8h, v0.8b, v1.8b
75
+ ld1 {v3.s}[1], [x2], x3
76
+ ld1 {v2.s}[1], [x0], x1
77
+ usubl v1.8h, v2.8b, v3.8b
78
+ ld1 {v5.s}[1], [x2], x3
79
+ ld1 {v4.s}[1], [x0], x1
80
+ usubl v2.8h, v4.8b, v5.8b
81
+ ld1 {v7.s}[1], [x2], x3
82
+ add v4.8h, v0.8h, v1.8h
83
+ sub v5.8h, v0.8h, v1.8h
84
+ ld1 {v6.s}[1], [x0], x1
85
+ usubl v3.8h, v6.8b, v7.8b
86
+ add v6.8h, v2.8h, v3.8h
87
+ sub v7.8h, v2.8h, v3.8h
88
+ x265_satd_4x8_8x4_end_neon
89
+.endm
90
+
91
+// template<int w, int h>
92
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
93
+function x265_pixel_satd_4x8_neon
94
+ pixel_satd_4x8_neon
95
+ mov w0, v0.s[0]
96
+ ret
97
+endfunc
98
+
99
+// template<int w, int h>
100
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
101
+function x265_pixel_satd_4x16_neon
102
+ eor w4, w4, w4
103
+ pixel_satd_4x8_neon
104
+ mov w5, v0.s[0]
105
+ add w4, w4, w5
106
+ pixel_satd_4x8_neon
107
+ mov w5, v0.s[0]
108
+ add w0, w5, w4
109
+ ret
110
+endfunc
111
+
112
+// template<int w, int h>
113
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
114
+function x265_pixel_satd_4x32_neon
115
+ eor w4, w4, w4
116
+.rept 4
117
+ pixel_satd_4x8_neon
118
+ mov w5, v0.s[0]
119
+ add w4, w4, w5
120
+.endr
121
+ mov w0, w4
122
+ ret
123
+endfunc
124
+
125
+// template<int w, int h>
126
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
127
+function x265_pixel_satd_12x16_neon
128
+ mov x4, x0
129
+ mov x5, x2
130
+ eor w7, w7, w7
131
+ pixel_satd_4x8_neon
132
+ mov w6, v0.s[0]
133
+ add w7, w7, w6
134
+ pixel_satd_4x8_neon
135
+ mov w6, v0.s[0]
136
+ add w7, w7, w6
137
+
138
+ add x0, x4, #4
139
+ add x2, x5, #4
140
+ pixel_satd_4x8_neon
141
+ mov w6, v0.s[0]
142
+ add w7, w7, w6
143
+ pixel_satd_4x8_neon
144
+ mov w6, v0.s[0]
145
+ add w7, w7, w6
146
+
147
+ add x0, x4, #8
148
+ add x2, x5, #8
149
+ pixel_satd_4x8_neon
150
+ mov w6, v0.s[0]
151
+ add w7, w7, w6
152
+ pixel_satd_4x8_neon
153
+ mov w6, v0.s[0]
154
+ add w0, w7, w6
155
+ ret
156
+endfunc
157
+
158
+// template<int w, int h>
159
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
160
+function x265_pixel_satd_12x32_neon
161
+ mov x4, x0
162
+ mov x5, x2
163
+ eor w7, w7, w7
164
+.rept 4
165
+ pixel_satd_4x8_neon
166
+ mov w6, v0.s[0]
167
+ add w7, w7, w6
168
+.endr
169
+
170
+ add x0, x4, #4
171
+ add x2, x5, #4
172
+.rept 4
173
+ pixel_satd_4x8_neon
174
+ mov w6, v0.s[0]
175
+ add w7, w7, w6
176
+.endr
177
+
178
+ add x0, x4, #8
179
+ add x2, x5, #8
180
+.rept 4
181
+ pixel_satd_4x8_neon
182
+ mov w6, v0.s[0]
183
+ add w7, w7, w6
184
+.endr
185
+
186
+ mov w0, w7
187
+ ret
188
+endfunc
189
+
190
+// template<int w, int h>
191
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
192
+function x265_pixel_satd_8x8_neon
193
+ eor w4, w4, w4
194
+ mov x6, x0
195
+ mov x7, x2
196
+ pixel_satd_4x8_neon
197
+ mov w5, v0.s[0]
198
+ add w4, w4, w5
199
+ add x0, x6, #4
200
+ add x2, x7, #4
201
x265_3.4.tar.gz/source/common/aarch64/pixel-util.h
Added
42
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ * Hongbin Liu <liuhongbin1@huawei.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+
26
+#ifndef X265_PIXEL_UTIL_AARCH64_H
27
+#define X265_PIXEL_UTIL_AARCH64_H
28
+
29
+int x265_pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
30
+int x265_pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
31
+int x265_pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
32
+int x265_pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
33
+int x265_pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
34
+int x265_pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
35
+int x265_pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
36
+int x265_pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
37
+
38
+uint32_t x265_quant_neon(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
39
+int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
40
+
41
+#endif // ifndef X265_PIXEL_UTIL_AARCH64_H
42
x265_3.4.tar.gz/source/common/aarch64/pixel.h
Added
107
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#ifndef X265_I386_PIXEL_AARCH64_H
26
+#define X265_I386_PIXEL_AARCH64_H
27
+
28
+void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
29
+void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
30
+void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
31
+void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
32
+void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
33
+void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
34
+void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
35
+void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
36
+void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
37
+void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
38
+void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
39
+void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
40
+void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
41
+void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
42
+void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
43
+void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
44
+void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
45
+void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
46
+void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
47
+void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
48
+void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
49
+void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
50
+void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
51
+void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
52
+void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
53
+
54
+void x265_sad_x3_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
55
+void x265_sad_x3_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
56
+void x265_sad_x3_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
57
+void x265_sad_x3_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
58
+void x265_sad_x3_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
59
+void x265_sad_x3_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
60
+void x265_sad_x3_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
61
+void x265_sad_x3_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
62
+void x265_sad_x3_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
63
+void x265_sad_x3_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
64
+void x265_sad_x3_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
65
+void x265_sad_x3_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
66
+void x265_sad_x3_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
67
+void x265_sad_x3_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
68
+void x265_sad_x3_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
69
+void x265_sad_x3_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
70
+void x265_sad_x3_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
71
+void x265_sad_x3_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
72
+void x265_sad_x3_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
73
+void x265_sad_x3_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
74
+void x265_sad_x3_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
75
+void x265_sad_x3_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
76
+void x265_sad_x3_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
77
+void x265_sad_x3_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
78
+void x265_sad_x3_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
79
+
80
+void x265_sad_x4_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
81
+void x265_sad_x4_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
82
+void x265_sad_x4_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
83
+void x265_sad_x4_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
84
+void x265_sad_x4_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
85
+void x265_sad_x4_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
86
+void x265_sad_x4_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
87
+void x265_sad_x4_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
88
+void x265_sad_x4_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
89
+void x265_sad_x4_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
90
+void x265_sad_x4_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
91
+void x265_sad_x4_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
92
+void x265_sad_x4_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
93
+void x265_sad_x4_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
94
+void x265_sad_x4_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
95
+void x265_sad_x4_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
96
+void x265_sad_x4_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
97
+void x265_sad_x4_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
98
+void x265_sad_x4_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
99
+void x265_sad_x4_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
100
+void x265_sad_x4_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
101
+void x265_sad_x4_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
102
+void x265_sad_x4_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
103
+void x265_sad_x4_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
104
+void x265_sad_x4_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
105
+
106
+#endif // ifndef X265_I386_PIXEL_AARCH64_H
107
x265_3.4.tar.gz/source/common/aarch64/sad-a.S
Added
107
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#include "asm.S"
26
+
27
+.section .rodata
28
+
29
+.align 4
30
+
31
+.text
32
+
33
+.macro SAD_X_START_8 x
34
+ ld1 {v0.8b}, [x0], x9
35
+.if \x == 3
36
+ ld1 {v1.8b}, [x1], x4
37
+ ld1 {v2.8b}, [x2], x4
38
+ ld1 {v3.8b}, [x3], x4
39
+.elseif \x == 4
40
+ ld1 {v1.8b}, [x1], x5
41
+ ld1 {v2.8b}, [x2], x5
42
+ ld1 {v3.8b}, [x3], x5
43
+ ld1 {v4.8b}, [x4], x5
44
+.endif
45
+ uabdl v16.8h, v0.8b, v1.8b
46
+ uabdl v17.8h, v0.8b, v2.8b
47
+ uabdl v18.8h, v0.8b, v3.8b
48
+.if \x == 4
49
+ uabdl v19.8h, v0.8b, v4.8b
50
+.endif
51
+.endm
52
+
53
+.macro SAD_X_8 x
54
+ ld1 {v0.8b}, [x0], x9
55
+.if \x == 3
56
+ ld1 {v1.8b}, [x1], x4
57
+ ld1 {v2.8b}, [x2], x4
58
+ ld1 {v3.8b}, [x3], x4
59
+.elseif \x == 4
60
+ ld1 {v1.8b}, [x1], x5
61
+ ld1 {v2.8b}, [x2], x5
62
+ ld1 {v3.8b}, [x3], x5
63
+ ld1 {v4.8b}, [x4], x5
64
+.endif
65
+ uabal v16.8h, v0.8b, v1.8b
66
+ uabal v17.8h, v0.8b, v2.8b
67
+ uabal v18.8h, v0.8b, v3.8b
68
+.if \x == 4
69
+ uabal v19.8h, v0.8b, v4.8b
70
+.endif
71
+.endm
72
+
73
+.macro SAD_X_8xN x, h
74
+function x265_sad_x\x\()_8x\h\()_neon
75
+ mov x9, #FENC_STRIDE
76
+ SAD_X_START_8 \x
77
+.rept \h - 1
78
+ SAD_X_8 \x
79
+.endr
80
+ uaddlv s0, v16.8h
81
+ uaddlv s1, v17.8h
82
+ uaddlv s2, v18.8h
83
+.if \x == 4
84
+ uaddlv s3, v19.8h
85
+.endif
86
+
87
+.if \x == 3
88
+ stp s0, s1, [x5]
89
+ str s2, [x5, #8]
90
+.elseif \x == 4
91
+ stp s0, s1, [x6]
92
+ stp s2, s3, [x6, #8]
93
+.endif
94
+ ret
95
+endfunc
96
+.endm
97
+
98
+SAD_X_8xN 3 4
99
+SAD_X_8xN 3 8
100
+SAD_X_8xN 3 16
101
+SAD_X_8xN 3 32
102
+
103
+SAD_X_8xN 4 4
104
+SAD_X_8xN 4 8
105
+SAD_X_8xN 4 16
106
+SAD_X_8xN 4 32
107
x265_3.3.tar.gz/source/common/arm/asm-primitives.cpp -> x265_3.4.tar.gz/source/common/arm/asm-primitives.cpp
Changed
201
1
2
* Praveen Kumar Tiwari <praveen@multicorewareinc.com>
3
* Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com>
4
* Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
5
+ * Hongbin Liu<liuhongbin1@huawei.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
10
p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon);
11
12
// addAvg
13
- p.pu[LUMA_4x4].addAvg = PFX(addAvg_4x4_neon);
14
- p.pu[LUMA_4x8].addAvg = PFX(addAvg_4x8_neon);
15
- p.pu[LUMA_4x16].addAvg = PFX(addAvg_4x16_neon);
16
- p.pu[LUMA_8x4].addAvg = PFX(addAvg_8x4_neon);
17
- p.pu[LUMA_8x8].addAvg = PFX(addAvg_8x8_neon);
18
- p.pu[LUMA_8x16].addAvg = PFX(addAvg_8x16_neon);
19
- p.pu[LUMA_8x32].addAvg = PFX(addAvg_8x32_neon);
20
- p.pu[LUMA_12x16].addAvg = PFX(addAvg_12x16_neon);
21
- p.pu[LUMA_16x4].addAvg = PFX(addAvg_16x4_neon);
22
- p.pu[LUMA_16x8].addAvg = PFX(addAvg_16x8_neon);
23
- p.pu[LUMA_16x12].addAvg = PFX(addAvg_16x12_neon);
24
- p.pu[LUMA_16x16].addAvg = PFX(addAvg_16x16_neon);
25
- p.pu[LUMA_16x32].addAvg = PFX(addAvg_16x32_neon);
26
- p.pu[LUMA_16x64].addAvg = PFX(addAvg_16x64_neon);
27
- p.pu[LUMA_24x32].addAvg = PFX(addAvg_24x32_neon);
28
- p.pu[LUMA_32x8].addAvg = PFX(addAvg_32x8_neon);
29
- p.pu[LUMA_32x16].addAvg = PFX(addAvg_32x16_neon);
30
- p.pu[LUMA_32x24].addAvg = PFX(addAvg_32x24_neon);
31
- p.pu[LUMA_32x32].addAvg = PFX(addAvg_32x32_neon);
32
- p.pu[LUMA_32x64].addAvg = PFX(addAvg_32x64_neon);
33
- p.pu[LUMA_48x64].addAvg = PFX(addAvg_48x64_neon);
34
- p.pu[LUMA_64x16].addAvg = PFX(addAvg_64x16_neon);
35
- p.pu[LUMA_64x32].addAvg = PFX(addAvg_64x32_neon);
36
- p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_neon);
37
- p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_neon);
38
+ p.pu[LUMA_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon);
39
+ p.pu[LUMA_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon);
40
+ p.pu[LUMA_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon);
41
+ p.pu[LUMA_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon);
42
+ p.pu[LUMA_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon);
43
+ p.pu[LUMA_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon);
44
+ p.pu[LUMA_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon);
45
+ p.pu[LUMA_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon);
46
+ p.pu[LUMA_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon);
47
+ p.pu[LUMA_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon);
48
+ p.pu[LUMA_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon);
49
+ p.pu[LUMA_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon);
50
+ p.pu[LUMA_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon);
51
+ p.pu[LUMA_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon);
52
+ p.pu[LUMA_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon);
53
+ p.pu[LUMA_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon);
54
+ p.pu[LUMA_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon);
55
+ p.pu[LUMA_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon);
56
+ p.pu[LUMA_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon);
57
+ p.pu[LUMA_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon);
58
+ p.pu[LUMA_48x64].addAvg[NONALIGNED] = PFX(addAvg_48x64_neon);
59
+ p.pu[LUMA_64x16].addAvg[NONALIGNED] = PFX(addAvg_64x16_neon);
60
+ p.pu[LUMA_64x32].addAvg[NONALIGNED] = PFX(addAvg_64x32_neon);
61
+ p.pu[LUMA_64x48].addAvg[NONALIGNED] = PFX(addAvg_64x48_neon);
62
+ p.pu[LUMA_64x64].addAvg[NONALIGNED] = PFX(addAvg_64x64_neon);
63
64
// chroma addAvg
65
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg = PFX(addAvg_4x2_neon);
66
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg = PFX(addAvg_4x4_neon);
67
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg = PFX(addAvg_4x8_neon);
68
- p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg = PFX(addAvg_4x16_neon);
69
- p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg = PFX(addAvg_6x8_neon);
70
- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg = PFX(addAvg_8x2_neon);
71
- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg = PFX(addAvg_8x4_neon);
72
- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg = PFX(addAvg_8x6_neon);
73
- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg = PFX(addAvg_8x8_neon);
74
- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg = PFX(addAvg_8x16_neon);
75
- p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg = PFX(addAvg_8x32_neon);
76
- p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg = PFX(addAvg_12x16_neon);
77
- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg = PFX(addAvg_16x4_neon);
78
- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg = PFX(addAvg_16x8_neon);
79
- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg = PFX(addAvg_16x12_neon);
80
- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg = PFX(addAvg_16x16_neon);
81
- p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg = PFX(addAvg_16x32_neon);
82
- p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg = PFX(addAvg_24x32_neon);
83
- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg = PFX(addAvg_32x8_neon);
84
- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg = PFX(addAvg_32x16_neon);
85
- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = PFX(addAvg_32x24_neon);
86
- p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = PFX(addAvg_32x32_neon);
87
-
88
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg = PFX(addAvg_4x8_neon);
89
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg = PFX(addAvg_4x16_neon);
90
- p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg = PFX(addAvg_4x32_neon);
91
- p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg = PFX(addAvg_6x16_neon);
92
- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg = PFX(addAvg_8x4_neon);
93
- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg = PFX(addAvg_8x8_neon);
94
- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg = PFX(addAvg_8x12_neon);
95
- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg = PFX(addAvg_8x16_neon);
96
- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg = PFX(addAvg_8x32_neon);
97
- p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg = PFX(addAvg_8x64_neon);
98
- p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg = PFX(addAvg_12x32_neon);
99
- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg = PFX(addAvg_16x8_neon);
100
- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = PFX(addAvg_16x16_neon);
101
- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = PFX(addAvg_16x24_neon);
102
- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = PFX(addAvg_16x32_neon);
103
- p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = PFX(addAvg_16x64_neon);
104
- p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg = PFX(addAvg_24x64_neon);
105
- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg = PFX(addAvg_32x16_neon);
106
- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = PFX(addAvg_32x32_neon);
107
- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_neon);
108
- p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_neon);
109
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg[NONALIGNED] = PFX(addAvg_4x2_neon);
110
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg[NONALIGNED] = PFX(addAvg_4x4_neon);
111
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon);
112
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon);
113
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg[NONALIGNED] = PFX(addAvg_6x8_neon);
114
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg[NONALIGNED] = PFX(addAvg_8x2_neon);
115
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon);
116
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg[NONALIGNED] = PFX(addAvg_8x6_neon);
117
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon);
118
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon);
119
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon);
120
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon);
121
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg[NONALIGNED] = PFX(addAvg_16x4_neon);
122
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon);
123
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon);
124
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon);
125
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon);
126
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon);
127
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg[NONALIGNED] = PFX(addAvg_32x8_neon);
128
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon);
129
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon);
130
+ p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon);
131
+
132
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg[NONALIGNED] = PFX(addAvg_4x8_neon);
133
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg[NONALIGNED] = PFX(addAvg_4x16_neon);
134
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg[NONALIGNED] = PFX(addAvg_4x32_neon);
135
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg[NONALIGNED] = PFX(addAvg_6x16_neon);
136
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg[NONALIGNED] = PFX(addAvg_8x4_neon);
137
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg[NONALIGNED] = PFX(addAvg_8x8_neon);
138
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg[NONALIGNED] = PFX(addAvg_8x12_neon);
139
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg[NONALIGNED] = PFX(addAvg_8x16_neon);
140
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg[NONALIGNED] = PFX(addAvg_8x32_neon);
141
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg[NONALIGNED] = PFX(addAvg_8x64_neon);
142
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg[NONALIGNED] = PFX(addAvg_12x32_neon);
143
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg[NONALIGNED] = PFX(addAvg_16x8_neon);
144
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon);
145
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg[NONALIGNED] = PFX(addAvg_16x24_neon);
146
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon);
147
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon);
148
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg[NONALIGNED] = PFX(addAvg_24x64_neon);
149
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon);
150
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon);
151
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg[NONALIGNED] = PFX(addAvg_32x48_neon);
152
+ p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon);
153
154
// quant
155
p.quant = PFX(quant_neon);
156
157
p.scale2D_64to32 = PFX(scale2D_64to32_neon);
158
159
// scale1D_128to64
160
- p.scale1D_128to64 = PFX(scale1D_128to64_neon);
161
+ p.scale1D_128to64[NONALIGNED] = PFX(scale1D_128to64_neon);
162
163
// copy_count
164
p.cu[BLOCK_4x4].copy_cnt = PFX(copy_cnt_4_neon);
165
166
p.cu[BLOCK_32x32].copy_cnt = PFX(copy_cnt_32_neon);
167
168
// filterPixelToShort
169
- p.pu[LUMA_4x4].convert_p2s = PFX(filterPixelToShort_4x4_neon);
170
- p.pu[LUMA_4x8].convert_p2s = PFX(filterPixelToShort_4x8_neon);
171
- p.pu[LUMA_4x16].convert_p2s = PFX(filterPixelToShort_4x16_neon);
172
- p.pu[LUMA_8x4].convert_p2s = PFX(filterPixelToShort_8x4_neon);
173
- p.pu[LUMA_8x8].convert_p2s = PFX(filterPixelToShort_8x8_neon);
174
- p.pu[LUMA_8x16].convert_p2s = PFX(filterPixelToShort_8x16_neon);
175
- p.pu[LUMA_8x32].convert_p2s = PFX(filterPixelToShort_8x32_neon);
176
- p.pu[LUMA_12x16].convert_p2s = PFX(filterPixelToShort_12x16_neon);
177
- p.pu[LUMA_16x4].convert_p2s = PFX(filterPixelToShort_16x4_neon);
178
- p.pu[LUMA_16x8].convert_p2s = PFX(filterPixelToShort_16x8_neon);
179
- p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_neon);
180
- p.pu[LUMA_16x16].convert_p2s = PFX(filterPixelToShort_16x16_neon);
181
- p.pu[LUMA_16x32].convert_p2s = PFX(filterPixelToShort_16x32_neon);
182
- p.pu[LUMA_16x64].convert_p2s = PFX(filterPixelToShort_16x64_neon);
183
- p.pu[LUMA_24x32].convert_p2s = PFX(filterPixelToShort_24x32_neon);
184
- p.pu[LUMA_32x8].convert_p2s = PFX(filterPixelToShort_32x8_neon);
185
- p.pu[LUMA_32x16].convert_p2s = PFX(filterPixelToShort_32x16_neon);
186
- p.pu[LUMA_32x24].convert_p2s = PFX(filterPixelToShort_32x24_neon);
187
- p.pu[LUMA_32x32].convert_p2s = PFX(filterPixelToShort_32x32_neon);
188
- p.pu[LUMA_32x64].convert_p2s = PFX(filterPixelToShort_32x64_neon);
189
- p.pu[LUMA_48x64].convert_p2s = PFX(filterPixelToShort_48x64_neon);
190
- p.pu[LUMA_64x16].convert_p2s = PFX(filterPixelToShort_64x16_neon);
191
- p.pu[LUMA_64x32].convert_p2s = PFX(filterPixelToShort_64x32_neon);
192
- p.pu[LUMA_64x48].convert_p2s = PFX(filterPixelToShort_64x48_neon);
193
- p.pu[LUMA_64x64].convert_p2s = PFX(filterPixelToShort_64x64_neon);
194
+ p.pu[LUMA_4x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x4_neon);
195
+ p.pu[LUMA_4x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x8_neon);
196
+ p.pu[LUMA_4x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_4x16_neon);
197
+ p.pu[LUMA_8x4].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x4_neon);
198
+ p.pu[LUMA_8x8].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x8_neon);
199
+ p.pu[LUMA_8x16].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x16_neon);
200
+ p.pu[LUMA_8x32].convert_p2s[NONALIGNED] = PFX(filterPixelToShort_8x32_neon);
201
x265_3.3.tar.gz/source/common/common.h -> x265_3.4.tar.gz/source/common/common.h
Changed
27
1
2
typedef uint64_t sum2_t;
3
typedef uint64_t pixel4;
4
typedef int64_t ssum2_t;
5
+#define SHIFT_TO_BITPLANE 9
6
#define HISTOGRAM_BINS 1024
7
#else
8
typedef uint8_t pixel;
9
10
typedef uint32_t sum2_t;
11
typedef uint32_t pixel4;
12
typedef int32_t ssum2_t; // Signed sum
13
+#define SHIFT_TO_BITPLANE 7
14
#define HISTOGRAM_BINS 256
15
#endif // if HIGH_BIT_DEPTH
16
17
18
#define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
19
#define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
20
21
+#define RDCOST_BASED_RSKIP 1
22
+#define EDGE_BASED_RSKIP 2
23
+
24
#define COEF_REMAIN_BIN_REDUCTION 3 // indicates the level at which the VLC
25
// transitions from Golomb-Rice to TU+EG(k)
26
27
x265_3.3.tar.gz/source/common/cpu.cpp -> x265_3.4.tar.gz/source/common/cpu.cpp
Changed
19
1
2
* Laurent Aimar <fenrir@via.ecp.fr>
3
* Fiona Glaser <fiona@x264.com>
4
* Steve Borho <steve@borho.org>
5
+ * Hongbin Liu <liuhongbin1@huawei.com>
6
+ * Yimeng Su <yimeng.su@huawei.com>
7
*
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
11
flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
12
#endif
13
// TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
14
+#elif X265_ARCH_ARM64
15
+ flags |= X265_CPU_NEON;
16
#endif // if HAVE_ARMV6
17
return flags;
18
}
19
x265_3.3.tar.gz/source/common/frame.cpp -> x265_3.4.tar.gz/source/common/frame.cpp
Changed
41
1
2
m_edgePic = NULL;
3
m_gaussianPic = NULL;
4
m_thetaPic = NULL;
5
+ m_edgeBitPlane = NULL;
6
+ m_edgeBitPic = NULL;
7
}
8
9
bool Frame::create(x265_param *param, float* quantOffsets)
10
11
m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
12
}
13
14
+ if (param->recursionSkipMode == EDGE_BASED_RSKIP)
15
+ {
16
+ uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize;
17
+ uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize;
18
+ uint32_t lumaMarginX = param->maxCUSize + 32;
19
+ uint32_t lumaMarginY = param->maxCUSize + 16;
20
+ uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1);
21
+ uint32_t maxHeight = numCuInHeight * param->maxCUSize;
22
+ uint32_t bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2));
23
+ CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, bitPlaneSize);
24
+ m_edgeBitPic = m_edgeBitPlane + lumaMarginY * stride + lumaMarginX;
25
+ }
26
+
27
if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))
28
{
29
X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
30
31
X265_FREE(m_gaussianPic);
32
X265_FREE(m_thetaPic);
33
}
34
+
35
+ if (m_param->recursionSkipMode == EDGE_BASED_RSKIP)
36
+ {
37
+ X265_FREE_ZERO(m_edgeBitPlane);
38
+ m_edgeBitPic = NULL;
39
+ }
40
}
41
x265_3.3.tar.gz/source/common/frame.h -> x265_3.4.tar.gz/source/common/frame.h
Changed
21
1
2
float* m_quantOffsets; // points to quantOffsets in x265_picture
3
x265_sei m_userSEI;
4
uint32_t m_picStruct; // picture structure SEI message
5
- x265_dolby_vision_rpu m_rpu;
6
+ x265_dolby_vision_rpu m_rpu;
7
8
/* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
9
ThreadSafeInteger* m_reconRowFlag; // flag of CTU rows completely reconstructed and extended for motion reference
10
11
pixel* m_gaussianPic;
12
pixel* m_thetaPic;
13
14
+ /* edge bit plane for rskips 2 and 3 */
15
+ pixel* m_edgeBitPlane;
16
+ pixel* m_edgeBitPic;
17
+
18
Frame();
19
20
bool create(x265_param *param, float* quantOffsets);
21
x265_3.3.tar.gz/source/common/param.cpp -> x265_3.4.tar.gz/source/common/param.cpp
Changed
145
1
2
param->bEnableWeightedPred = 1;
3
param->bEnableWeightedBiPred = 0;
4
param->bEnableEarlySkip = 1;
5
- param->bEnableRecursionSkip = 1;
6
+ param->recursionSkipMode = 1;
7
+ param->edgeVarThreshold = 0.05f;
8
param->bEnableAMP = 0;
9
param->bEnableRectInter = 0;
10
param->rdLevel = 3;
11
12
param->rc.bEnableConstVbv = 0;
13
param->bResetZoneConfig = 1;
14
param->reconfigWindowSize = 0;
15
+ param->decoderVbvMaxRate = 0;
16
17
/* Video Usability Information (VUI) */
18
param->vui.aspectRatioIdc = 0;
19
20
param->maxNumMergeCand = 5;
21
param->searchMethod = X265_STAR_SEARCH;
22
param->bEnableTransformSkip = 1;
23
- param->bEnableRecursionSkip = 0;
24
+ param->recursionSkipMode = 0;
25
param->maxNumReferences = 5;
26
param->limitReferences = 0;
27
param->lookaheadSlices = 0; // disabled for best quality
28
29
param->rc.hevcAq = 0;
30
param->rc.qpStep = 1;
31
param->rc.bEnableGrain = 1;
32
- param->bEnableRecursionSkip = 0;
33
+ param->recursionSkipMode = 0;
34
param->psyRd = 4.0;
35
param->psyRdoq = 10.0;
36
param->bEnableSAO = 0;
37
38
OPT("ref") p->maxNumReferences = atoi(value);
39
OPT("fast-intra") p->bEnableFastIntra = atobool(value);
40
OPT("early-skip") p->bEnableEarlySkip = atobool(value);
41
- OPT("rskip") p->bEnableRecursionSkip = atobool(value);
42
- OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError);
43
+ OPT("rskip") p->recursionSkipMode = atoi(value);
44
+ OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f;
45
+ OPT("me") p->searchMethod = parseName(value, x265_motion_est_names, bError);
46
OPT("subme") p->subpelRefine = atoi(value);
47
OPT("merange") p->searchRange = atoi(value);
48
OPT("rect") p->bEnableRectInter = atobool(value);
49
50
OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value);
51
OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value);
52
OPT("early-skip") p->bEnableEarlySkip = atobool(value);
53
- OPT("rskip") p->bEnableRecursionSkip = atobool(value);
54
+ OPT("rskip") p->recursionSkipMode = atoi(value);
55
OPT("rdpenalty") p->rdPenalty = atoi(value);
56
OPT("tskip") p->bEnableTransformSkip = atobool(value);
57
OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value);
58
59
}
60
}
61
OPT("hist-threshold") p->edgeTransitionThreshold = atof(value);
62
+ OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f;
63
OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
64
OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
65
OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value);
66
67
CHECK(param->rdLevel < 1 || param->rdLevel > 6,
68
"RD Level is out of range");
69
CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2,
70
- "RDOQ Level is out of range");
71
+ "RDOQ Level is out of range");
72
CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH,
73
- "Dynamic RD strength must be between 0 and 4");
74
+ "Dynamic RD strength must be between 0 and 4");
75
+ CHECK(param->recursionSkipMode > 2 || param->recursionSkipMode < 0,
76
+ "Invalid Recursion skip mode. Valid modes 0,1,2");
77
+ if (param->recursionSkipMode == EDGE_BASED_RSKIP)
78
+ {
79
+ CHECK(param->edgeVarThreshold < 0.0f || param->edgeVarThreshold > 1.0f,
80
+ "Minimum edge density percentage for a CU should be an integer between 0 to 100");
81
+ }
82
CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead,
83
"Lookahead depth must be greater than the max consecutive bframe count");
84
CHECK(param->bframes < 0,
85
86
}
87
CHECK(param->confWinRightOffset < 0, "Conformance Window Right Offset must be 0 or greater");
88
CHECK(param->confWinBottomOffset < 0, "Conformance Window Bottom Offset must be 0 or greater");
89
+ CHECK(param->decoderVbvMaxRate < 0, "Invalid Decoder Vbv Maxrate. Value can not be less than zero");
90
return check_failed;
91
}
92
93
94
TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf");
95
TOOLOPT(param->bEnableRdRefine, "rd-refine");
96
TOOLOPT(param->bEnableEarlySkip, "early-skip");
97
- TOOLOPT(param->bEnableRecursionSkip, "rskip");
98
+ TOOLVAL(param->recursionSkipMode, "rskip mode=%d");
99
+ if (param->recursionSkipMode == EDGE_BASED_RSKIP)
100
+ TOOLVAL(param->edgeVarThreshold, "rskip-edge-threshold=%.2f");
101
TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip");
102
TOOLVAL(param->noiseReductionIntra, "nr-intra=%d");
103
TOOLVAL(param->noiseReductionInter, "nr-inter=%d");
104
105
s += sprintf(s, " rd=%d", p->rdLevel);
106
s += sprintf(s, " selective-sao=%d", p->selectiveSAO);
107
BOOL(p->bEnableEarlySkip, "early-skip");
108
- BOOL(p->bEnableRecursionSkip, "rskip");
109
+ BOOL(p->recursionSkipMode, "rskip");
110
+ if (p->recursionSkipMode == EDGE_BASED_RSKIP)
111
+ s += sprintf(s, " rskip-edge-threshold=%f", p->edgeVarThreshold);
112
+
113
BOOL(p->bEnableFastIntra, "fast-intra");
114
BOOL(p->bEnableTSkipFast, "tskip-fast");
115
BOOL(p->bCULossless, "cu-lossless");
116
117
if (p->bEnableSceneCutAwareQp)
118
s += sprintf(s, " scenecut-window=%d max-qp-delta=%d", p->scenecutWindow, p->maxQpDelta);
119
s += sprintf(s, "conformance-window-offsets right=%d bottom=%d", p->confWinRightOffset, p->confWinBottomOffset);
120
+ s += sprintf(s, " decoder-max-rate=%d", p->decoderVbvMaxRate);
121
#undef BOOL
122
return buf;
123
}
124
125
dst->bSaoNonDeblocked = src->bSaoNonDeblocked;
126
dst->rdLevel = src->rdLevel;
127
dst->bEnableEarlySkip = src->bEnableEarlySkip;
128
- dst->bEnableRecursionSkip = src->bEnableRecursionSkip;
129
+ dst->recursionSkipMode = src->recursionSkipMode;
130
+ dst->edgeVarThreshold = src->edgeVarThreshold;
131
dst->bEnableFastIntra = src->bEnableFastIntra;
132
dst->bEnableTSkipFast = src->bEnableTSkipFast;
133
dst->bCULossless = src->bCULossless;
134
135
dst->rc.zonefileCount = src->rc.zonefileCount;
136
dst->reconfigWindowSize = src->reconfigWindowSize;
137
dst->bResetZoneConfig = src->bResetZoneConfig;
138
+ dst->decoderVbvMaxRate = src->decoderVbvMaxRate;
139
140
- if (src->rc.zonefileCount && src->rc.zones)
141
+ if (src->rc.zonefileCount && src->rc.zones && src->bResetZoneConfig)
142
{
143
for (int i = 0; i < src->rc.zonefileCount; i++)
144
{
145
x265_3.3.tar.gz/source/common/pixel.cpp -> x265_3.4.tar.gz/source/common/pixel.cpp
Changed
58
1
2
* Mandar Gurav <mandar@multicorewareinc.com>
3
* Mahesh Pittala <mahesh@multicorewareinc.com>
4
* Min Chen <min.chen@multicorewareinc.com>
5
+ * Hongbin Liu<liuhongbin1@huawei.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
10
{
11
int satd = 0;
12
13
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
14
+ pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon;
15
+#endif
16
+
17
for (int row = 0; row < h; row += 4)
18
for (int col = 0; col < w; col += 4)
19
satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1,
20
21
{
22
int satd = 0;
23
24
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
25
+ pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon;
26
+#endif
27
+
28
for (int row = 0; row < h; row += 4)
29
for (int col = 0; col < w; col += 8)
30
satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1,
31
32
}
33
}
34
35
+static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
36
+{
37
+ for (int r = 0; r < height; r++)
38
+ {
39
+ for (int c = 0; c < width; c++)
40
+ dst[c] = (pixel)((src[c] >> shift));
41
+
42
+ dst += dstStride;
43
+ src += srcStride;
44
+ }
45
+}
46
+
47
static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
48
{
49
for (int r = 0; r < height; r++)
50
51
p.planecopy_cp = planecopy_cp_c;
52
p.planecopy_sp = planecopy_sp_c;
53
p.planecopy_sp_shl = planecopy_sp_shl_c;
54
+ p.planecopy_pp_shr = planecopy_pp_shr_c;
55
#if HIGH_BIT_DEPTH
56
p.planeClipAndMax = planeClipAndMax_c;
57
#endif
58
x265_3.3.tar.gz/source/common/primitives.h -> x265_3.4.tar.gz/source/common/primitives.h
Changed
47
1
2
* Rajesh Paulraj <rajesh@multicorewareinc.com>
3
* Praveen Kumar Tiwari <praveen@multicorewareinc.com>
4
* Min Chen <chenm003@163.com>
5
+ * Hongbin Liu<liuhongbin1@huawei.com>
6
+ * Yimeng Su <yimeng.su@huawei.com>
7
*
8
* This program is free software; you can redistribute it and/or modify
9
* it under the terms of the GNU General Public License as published by
10
11
typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
12
typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
13
typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
14
+typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
15
typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);
16
17
typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);
18
19
planecopy_cp_t planecopy_cp;
20
planecopy_sp_t planecopy_sp;
21
planecopy_sp_t planecopy_sp_shl;
22
+ planecopy_pp_t planecopy_pp_shr;
23
planeClipAndMax_t planeClipAndMax;
24
25
weightp_sp_t weight_sp;
26
27
void setupInstrinsicPrimitives(EncoderPrimitives &p, int cpuMask);
28
void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask);
29
void setupAliasPrimitives(EncoderPrimitives &p);
30
+#if X265_ARCH_ARM64
31
+void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask);
32
+#endif
33
#if HAVE_ALTIVEC
34
void setupPixelPrimitives_altivec(EncoderPrimitives &p);
35
void setupDCTPrimitives_altivec(EncoderPrimitives &p);
36
37
extern const char* PFX(build_info_str);
38
#endif
39
40
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
41
+extern "C" {
42
+#include "aarch64/pixel-util.h"
43
+}
44
+#endif
45
+
46
#endif // ifndef X265_PRIMITIVES_H
47
x265_3.4.tar.gz/source/common/scaler.cpp
Added
201
1
2
+/*****************************************************************************
3
+* Copyright (C) 2013-2020 MulticoreWare, Inc
4
+*
5
+* Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+*
7
+* This program is free software; you can redistribute it and/or modify
8
+* it under the terms of the GNU General Public License as published by
9
+* the Free Software Foundation; either version 2 of the License, or
10
+* (at your option) any later version.
11
+*
12
+* This program is distributed in the hope that it will be useful,
13
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+* GNU General Public License for more details.
16
+*
17
+* You should have received a copy of the GNU General Public License
18
+* along with this program; if not, write to the Free Software
19
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+*
21
+* This program is also available under a commercial proprietary license.
22
+* For more information, contact us at license @ x265.com.
23
+*****************************************************************************/
24
+
25
+#include "scaler.h"
26
+
27
+#if _MSC_VER
28
+#pragma warning(disable: 4706) // assignment within conditional
29
+#pragma warning(disable: 4244) // '=' : possible loss of data
30
+#endif
31
+
32
+#define SHORT_MIN (-(1 << 15))
33
+#define SHORT_MAX ((1 << 15) - 1)
34
+#define SHORT_MAX_10 ((1 << 10) - 1)
35
+
36
+namespace X265_NS{
37
+
38
+ScalerFilterManager::ScalerFilterManager() :
39
+ m_bitDepth(0),
40
+ m_algorithmFlags(0),
41
+ m_srcW(0),
42
+ m_srcH(0),
43
+ m_dstW(0),
44
+ m_dstH(0),
45
+ m_crSrcW(0),
46
+ m_crSrcH(0),
47
+ m_crDstW(0),
48
+ m_crDstH(0),
49
+ m_crSrcHSubSample(0),
50
+ m_crSrcVSubSample(0),
51
+ m_crDstHSubSample(0),
52
+ m_crDstVSubSample(0)
53
+{
54
+ for (int i = 0; i < m_numSlice; i++)
55
+ m_slices[i] = NULL;
56
+ for (int i = 0; i < m_numFilter; i++)
57
+ m_ScalerFilters[i] = NULL;
58
+}
59
+
60
+inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size)
61
+{
62
+ for (int i = 0; i < size; i++)
63
+ filter2[i] = filter[i];
64
+}
65
+
66
+#if X265_DEPTH == 8
67
+static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
68
+{
69
+ for (int i = 0; i < dstW; i++)
70
+ {
71
+ int val = 0;
72
+ int sourcePos = filterPos[i];
73
+ for (int j = 0; j < filterSize; j++)
74
+ val += ((int)src[sourcePos + j]) * filter[filterSize * i + j];
75
+ // the cubic equation does overflow ...
76
+ dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7);
77
+ }
78
+}
79
+static uint8_t clipUint8(int a)
80
+{
81
+ if (a&(~0xFF))
82
+ return (-a) >> 31;
83
+ else
84
+ return a;
85
+}
86
+
87
+static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
88
+{
89
+ for (int i = 0; i < dstW; i++)
90
+ {
91
+ int val = 64 << 12;
92
+ for (int j = 0; j < filterSize; j++)
93
+ val += src[j][i] * filter[j];
94
+ dest[i] = clipUint8(val >> 19);
95
+ }
96
+}
97
+#else
98
+static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
99
+{
100
+ for (int i = 0; i < dstW; i++)
101
+ {
102
+ int val = 1 << 16;
103
+ uint16_t* dst16bit = (uint16_t *)dest;
104
+ for (int j = 0; j < filterSize; j++)
105
+ val += src[j][i] * filter[j];
106
+ uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17);
107
+ ((uint8_t*)(&dst16bit[i]))[0] = (d);
108
+ ((uint8_t*)(&dst16bit[i]))[1] = (d) >> 8;
109
+ }
110
+}
111
+static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
112
+{
113
+ const uint16_t *srcLocal = (const uint16_t *)src;
114
+ for (int i = 0; i < dstW; i++)
115
+ {
116
+ int val = 0;
117
+ int sourcePos = filterPos[i];
118
+ for (int j = 0; j < filterSize; j++)
119
+ val += ((int)srcLocal[sourcePos + j]) * filter[filterSize * i + j];
120
+ // the cubic equation does overflow
121
+ dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9);
122
+ }
123
+}
124
+#endif
125
+
126
+ScalerFilter::ScalerFilter() :
127
+ m_filtLen(0),
128
+ m_filtPos(NULL),
129
+ m_filt(NULL),
130
+ m_sourceSlice(NULL),
131
+ m_destSlice(NULL)
132
+{
133
+}
134
+
135
+ScalerFilter::~ScalerFilter()
136
+{
137
+ if (m_filtPos) {
138
+ delete[] m_filtPos; m_filtPos = NULL;
139
+ }
140
+ if (m_filt) {
141
+ delete[] m_filt; m_filt = NULL;
142
+ }
143
+}
144
+
145
+void ScalerHLumFilter::process(int sliceVer, int sliceHor)
146
+{
147
+ uint8_t ** src = m_sourceSlice->m_plane[0].lineBuf;
148
+ uint8_t ** dst = m_destSlice->m_plane[0].lineBuf;
149
+ int sourcePos = sliceVer - m_sourceSlice->m_plane[0].sliceVer;
150
+ int destPos = sliceVer - m_destSlice->m_plane[0].sliceVer;
151
+ int dstW = m_destSlice->m_width;
152
+ for (int i = 0; i < sliceHor; ++i)
153
+ {
154
+ m_hFilterScaler->doScaling((int16_t*)dst[destPos + i], dstW, (const uint8_t *)src[sourcePos + i], m_filt, m_filtPos, m_filtLen);
155
+ m_destSlice->m_plane[0].sliceHor += 1;
156
+ }
157
+}
158
+
159
+void ScalerHCrFilter::process(int sliceVer, int sliceHor)
160
+{
161
+ uint8_t ** src1 = m_sourceSlice->m_plane[1].lineBuf;
162
+ uint8_t ** dst1 = m_destSlice->m_plane[1].lineBuf;
163
+ uint8_t ** src2 = m_sourceSlice->m_plane[2].lineBuf;
164
+ uint8_t ** dst2 = m_destSlice->m_plane[2].lineBuf;
165
+
166
+ int sourcePos1 = sliceVer - m_sourceSlice->m_plane[1].sliceVer;
167
+ int destPos1 = sliceVer - m_destSlice->m_plane[1].sliceVer;
168
+ int sourcePos2 = sliceVer - m_sourceSlice->m_plane[2].sliceVer;
169
+ int destPos2 = sliceVer - m_destSlice->m_plane[2].sliceVer;
170
+
171
+ int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample;
172
+
173
+ for (int i = 0; i < sliceHor; ++i)
174
+ {
175
+ m_hFilterScaler->doScaling((int16_t*)dst1[destPos1 + i], dstW, src1[sourcePos1 + i], m_filt, m_filtPos, m_filtLen);
176
+ m_hFilterScaler->doScaling((int16_t*)dst2[destPos2 + i], dstW, src2[sourcePos2 + i], m_filt, m_filtPos, m_filtLen);
177
+ m_destSlice->m_plane[1].sliceHor += 1;
178
+ m_destSlice->m_plane[2].sliceHor += 1;
179
+ }
180
+}
181
+
182
+void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
183
+{
184
+ int IdxW = FACTOR_4;
185
+ int IdxF = FIL_DEF;
186
+
187
+ (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
188
+ (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
189
+
190
+#if X265_DEPTH == 8
191
+ yuv2PlaneX_c(filter, filterSize, src, dest, dstW);
192
+#else
193
+ yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW);
194
+#endif
195
+}
196
+
197
+void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
198
+{
199
+ int IdxW = FACTOR_4;
200
+ int IdxF = FIL_DEF;
201
x265_3.4.tar.gz/source/common/scaler.h
Added
201
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2013-2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#ifndef X265_SCALER_H
26
+#define X265_SCALER_H
27
+
28
+#include "common.h"
29
+
30
+namespace X265_NS {
31
+//x265 private namespace
32
+
33
+class ScalerSlice;
34
+class VideoDesc;
35
+
36
+#define MAX_NUM_LINES_AHEAD 4
37
+#define SCALER_ALIGN(x, j) (((x)+(j)-1)&~((j)-1))
38
+#define X265_ABS(j) ((j) >= 0 ? (j) : (-(j)))
39
+#define SCALER_MAX_REDUCE_CUTOFF 0.002
40
+#define SCALER_BITEXACT 0x80000
41
+#define ROUNDED_DIVISION(i,j) (((i)>0 ? (i) + ((j)>>1) : (i) - ((j)>>1))/(j))
42
+#define UH_CEIL_SHIFTR(i,j) (!scale_builtin_constant_p(j) ? -((-(i)) >> (j)) \
43
+ : ((i) + (1<<(j)) - 1) >> (j))
44
+
45
+#if defined(__GNUC__) || defined(__clang__)
46
+# define scale_builtin_constant_p __builtin_constant_p
47
+#else
48
+# define scale_builtin_constant_p(x) 0
49
+#endif
50
+
51
+enum ResFactor
52
+{
53
+ RES_FACTOR_64, RES_FACTOR_32, RES_FACTOR_16, RES_FACTOR_8,
54
+ RES_FACTOR_4, RES_FACTOR_DEF, NUM_RES_FACTOR
55
+};
56
+
57
+enum ScalerFactor
58
+{
59
+ FACTOR_4, FACTOR_8, NUM_FACTOR
60
+};
61
+
62
+enum FilterSize
63
+{
64
+ FIL_4, FIL_6, FIL_8, FIL_9, FIL_10, FIL_11, FIL_13, FIL_15,
65
+ FIL_16, FIL_17, FIL_19, FIL_22, FIL_24, FIL_DEF, NUM_FIL
66
+};
67
+
68
+class ScalerFilter {
69
+public:
70
+ int m_filtLen;
71
+ int32_t* m_filtPos; // Array of horizontal/vertical starting pos for each dst for luma / chroma planes.
72
+ int16_t* m_filt; // Array of horizontal/vertical filter coefficients for luma / chroma planes.
73
+ ScalerSlice* m_sourceSlice; // Source slice
74
+ ScalerSlice* m_destSlice; // Output slice
75
+ ScalerFilter();
76
+ virtual ~ScalerFilter();
77
+ virtual void process(int sliceVer, int sliceHor) = 0;
78
+ int initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos);
79
+ void setSlice(ScalerSlice* source, ScalerSlice* dest) { m_sourceSlice = source; m_destSlice = dest; }
80
+};
81
+
82
+class VideoDesc {
83
+public:
84
+ int m_width;
85
+ int m_height;
86
+ int m_csp;
87
+ int m_inputDepth;
88
+
89
+ VideoDesc(int w, int h, int csp, int bitDepth)
90
+ {
91
+ m_width = w;
92
+ m_height = h;
93
+ m_csp = csp;
94
+ m_inputDepth = bitDepth;
95
+ }
96
+};
97
+
98
+typedef struct ScalerPlane
99
+{
100
+ int availLines; // max number of lines that can be held by this plane
101
+ int sliceVer; // index of first line
102
+ int sliceHor; // number of lines
103
+ uint8_t** lineBuf; // line buffer
104
+} ScalerPlane;
105
+
106
+// Assist horizontal filtering, base class
107
+class HFilterScaler {
108
+public:
109
+ int m_bitDepth;
110
+public:
111
+ HFilterScaler() :m_bitDepth(0) {};
112
+ virtual ~HFilterScaler() {};
113
+ virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) = 0;
114
+};
115
+
116
+// Assist vertical filtering, base class
117
+class VFilterScaler {
118
+public:
119
+ int m_bitDepth;
120
+public:
121
+ VFilterScaler() :m_bitDepth(0) {};
122
+ virtual ~VFilterScaler() {};
123
+ virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) = 0;
124
+};
125
+
126
+// Assist horizontal filtering, process 8 bit case
127
+class HFilterScaler8Bit : public HFilterScaler {
128
+public:
129
+ HFilterScaler8Bit() { m_bitDepth = 8; }
130
+ virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize);
131
+};
132
+
133
+// Assist horizontal filtering, process 10 bit case
134
+class HFilterScaler10Bit : public HFilterScaler {
135
+public:
136
+ HFilterScaler10Bit() { m_bitDepth = 10; }
137
+ virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize);
138
+};
139
+
140
+// Assist vertical filtering, process 8 bit case
141
+class VFilterScaler8Bit : public VFilterScaler {
142
+public:
143
+ VFilterScaler8Bit() { m_bitDepth = 8; }
144
+ virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW);
145
+};
146
+
147
+// Assist vertical filtering, process 10 bit case
148
+class VFilterScaler10Bit : public VFilterScaler {
149
+public:
150
+ VFilterScaler10Bit() { m_bitDepth = 10; }
151
+ virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW);
152
+};
153
+
154
+// Horizontal filter for luma
155
+class ScalerHLumFilter : public ScalerFilter {
156
+private:
157
+ HFilterScaler* m_hFilterScaler;
158
+public:
159
+ ScalerHLumFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;}
160
+ ~ScalerHLumFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); }
161
+ virtual void process(int sliceVer, int sliceHor);
162
+};
163
+
164
+// Horizontal filter for chroma
165
+class ScalerHCrFilter : public ScalerFilter {
166
+private:
167
+ HFilterScaler* m_hFilterScaler;
168
+public:
169
+ ScalerHCrFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;}
170
+ ~ScalerHCrFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); }
171
+ virtual void process(int sliceVer, int sliceHor);
172
+};
173
+
174
+// Vertical filter for luma
175
+class ScalerVLumFilter : public ScalerFilter {
176
+private:
177
+ VFilterScaler* m_vFilterScaler;
178
+public:
179
+ ScalerVLumFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;}
180
+ ~ScalerVLumFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); }
181
+ virtual void process(int sliceVer, int sliceHor);
182
+};
183
+
184
+// Vertical filter for chroma
185
+class ScalerVCrFilter : public ScalerFilter {
186
+private:
187
+ VFilterScaler* m_vFilterScaler;
188
+public:
189
+ ScalerVCrFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;}
190
+ ~ScalerVCrFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); }
191
+ virtual void process(int sliceVer, int sliceHor);
192
+};
193
+
194
+class ScalerSlice
195
+{
196
+private:
197
+ enum ScalerSlicePlaneNum { m_numSlicePlane = 4 };
198
+public:
199
+ int m_width; // Slice line width
200
+ int m_hCrSubSample; // horizontal Chroma subsampling factor
201
x265_3.3.tar.gz/source/common/threading.h -> x265_3.4.tar.gz/source/common/threading.h
Changed
31
1
2
LeaveCriticalSection(&m_cs);
3
}
4
5
+ void decr()
6
+ {
7
+ EnterCriticalSection(&m_cs);
8
+ m_val--;
9
+ WakeAllConditionVariable(&m_cv);
10
+ LeaveCriticalSection(&m_cs);
11
+ }
12
+
13
protected:
14
15
CRITICAL_SECTION m_cs;
16
17
pthread_mutex_unlock(&m_mutex);
18
}
19
20
+ void decr()
21
+ {
22
+ pthread_mutex_lock(&m_mutex);
23
+ m_val--;
24
+ pthread_cond_broadcast(&m_cond);
25
+ pthread_mutex_unlock(&m_mutex);
26
+ }
27
+
28
protected:
29
30
pthread_mutex_t m_mutex;
31
x265_3.3.tar.gz/source/encoder/analysis.cpp -> x265_3.4.tar.gz/source/encoder/analysis.cpp
Changed
151
1
2
md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
3
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
4
5
- skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
6
+ skipRecursion = !!m_param->recursionSkipMode && md.bestMode;
7
if (m_param->rdLevel)
8
skipModes = m_param->bEnableEarlySkip && md.bestMode;
9
}
10
11
md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
12
checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
13
14
- skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
15
+ skipRecursion = !!m_param->recursionSkipMode && md.bestMode;
16
if (m_param->rdLevel)
17
skipModes = m_param->bEnableEarlySkip && md.bestMode;
18
}
19
20
skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2)
21
&& md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
22
}
23
- if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))
24
+ if (md.bestMode && m_param->recursionSkipMode && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))
25
{
26
skipRecursion = md.bestMode->cu.isSkipped(0);
27
- if (mightSplit && depth >= minDepth && !skipRecursion)
28
+ if (mightSplit && !skipRecursion)
29
{
30
- if (depth)
31
- skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
32
- if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
33
+ if (depth >= minDepth && m_param->recursionSkipMode == RDCOST_BASED_RSKIP)
34
+ {
35
+ if (depth)
36
+ skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
37
+ if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
38
+ skipRecursion = complexityCheckCU(*md.bestMode);
39
+ }
40
+ else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
41
+ {
42
skipRecursion = complexityCheckCU(*md.bestMode);
43
+ }
44
+
45
}
46
}
47
if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7)
48
49
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
50
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
51
52
- if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
53
+ if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode)
54
skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
55
}
56
if (m_param->analysisLoadReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
57
58
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
59
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
60
61
- if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
62
+ if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode)
63
skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
64
}
65
}
66
67
checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
68
checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
69
70
- if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
71
+ if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP && depth && m_modeDepth[depth - 1].bestMode)
72
skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
73
+ else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
74
+ skipRecursion = md.bestMode && complexityCheckCU(*md.bestMode);
75
}
76
if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7)
77
skipRecursion = true;
78
79
80
bool Analysis::complexityCheckCU(const Mode& bestMode)
81
{
82
- uint32_t mean = 0;
83
- uint32_t homo = 0;
84
- uint32_t cuSize = bestMode.fencYuv->m_size;
85
- for (uint32_t y = 0; y < cuSize; y++) {
86
- for (uint32_t x = 0; x < cuSize; x++) {
87
- mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]);
88
+ if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP)
89
+ {
90
+ uint32_t mean = 0;
91
+ uint32_t homo = 0;
92
+ uint32_t cuSize = bestMode.fencYuv->m_size;
93
+ for (uint32_t y = 0; y < cuSize; y++) {
94
+ for (uint32_t x = 0; x < cuSize; x++) {
95
+ mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]);
96
+ }
97
}
98
- }
99
- mean = mean / (cuSize * cuSize);
100
- for (uint32_t y = 0 ; y < cuSize; y++){
101
- for (uint32_t x = 0 ; x < cuSize; x++){
102
- homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean));
103
+ mean = mean / (cuSize * cuSize);
104
+ for (uint32_t y = 0; y < cuSize; y++) {
105
+ for (uint32_t x = 0; x < cuSize; x++) {
106
+ homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean));
107
+ }
108
}
109
- }
110
- homo = homo / (cuSize * cuSize);
111
+ homo = homo / (cuSize * cuSize);
112
113
- if (homo < (.1 * mean))
114
- return true;
115
+ if (homo < (.1 * mean))
116
+ return true;
117
118
- return false;
119
-}
120
+ return false;
121
+ }
122
+ else
123
+ {
124
+ int blockType = bestMode.cu.m_log2CUSize[0] - LOG2_UNIT_SIZE;
125
+ int shift = bestMode.cu.m_log2CUSize[0] * LOG2_UNIT_SIZE;
126
+ intptr_t stride = m_frame->m_fencPic->m_stride;
127
+ intptr_t blockOffsetLuma = bestMode.cu.m_cuPelX + bestMode.cu.m_cuPelY * stride;
128
+ uint64_t sum_ss = primitives.cu[blockType].var(m_frame->m_edgeBitPic + blockOffsetLuma, stride);
129
+ uint32_t sum = (uint32_t)sum_ss;
130
+ uint32_t ss = (uint32_t)(sum_ss >> 32);
131
+ uint32_t pixelCount = 1 << shift;
132
+ double cuEdgeVariance = (ss - ((double)sum * sum / pixelCount)) / pixelCount;
133
+
134
+ if (cuEdgeVariance > (double)m_param->edgeVarThreshold)
135
+ return false;
136
+ else
137
+ return true;
138
+ }
139
+ }
140
141
uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom)
142
{
143
144
cnt++;
145
}
146
}
147
-
148
return cuVariance / cnt;
149
}
150
151
x265_3.3.tar.gz/source/encoder/analysis.h -> x265_3.4.tar.gz/source/encoder/analysis.h
Changed
18
1
2
splitRefs = 0;
3
mvCost[0] = 0; // L0
4
mvCost[1] = 0; // L1
5
- sa8dCost = 0;
6
+ sa8dCost = 0;
7
}
8
};
9
10
11
12
Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
13
int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU);
14
-
15
protected:
16
/* Analysis data for save/load mode, writes/reads data based on absPartIdx */
17
x265_analysis_inter_data* m_reuseInterDataCTU;
18
x265_3.3.tar.gz/source/encoder/api.cpp -> x265_3.4.tar.gz/source/encoder/api.cpp
Changed
35
1
2
3
void x265_zone_free(x265_param *param)
4
{
5
- if (param && param->rc.zonefileCount) {
6
+ if (param && param->rc.zones && (param->rc.zoneCount || param->rc.zonefileCount))
7
+ {
8
for (int i = 0; i < param->rc.zonefileCount; i++)
9
x265_free(param->rc.zones[i].zoneParam);
10
- }
11
- if (param && (param->rc.zoneCount || param->rc.zonefileCount))
12
x265_free(param->rc.zones);
13
+ }
14
}
15
16
static const x265_api libapi =
17
18
fprintf(csvfp, "RateFactor, ");
19
if (param->rc.vbvBufferSize)
20
fprintf(csvfp, "BufferFill, BufferFillFinal, ");
21
+ if (param->rc.vbvBufferSize && param->csvLogLevel >= 2)
22
+ fprintf(csvfp, "UnclippedBufferFillFinal, ");
23
if (param->bEnablePsnr)
24
fprintf(csvfp, "Y PSNR, U PSNR, V PSNR, YUV PSNR, ");
25
if (param->bEnableSsim)
26
27
fprintf(param->csvfpt, "%.3lf,", frameStats->rateFactor);
28
if (param->rc.vbvBufferSize)
29
fprintf(param->csvfpt, "%.3lf, %.3lf,", frameStats->bufferFill, frameStats->bufferFillFinal);
30
+ if (param->rc.vbvBufferSize && param->csvLogLevel >= 2)
31
+ fprintf(param->csvfpt, "%.3lf,", frameStats->unclippedBufferFillFinal);
32
if (param->bEnablePsnr)
33
fprintf(param->csvfpt, "%.3lf, %.3lf, %.3lf, %.3lf,", frameStats->psnrY, frameStats->psnrU, frameStats->psnrV, frameStats->psnr);
34
if (param->bEnableSsim)
35
x265_3.3.tar.gz/source/encoder/encoder.cpp -> x265_3.4.tar.gz/source/encoder/encoder.cpp
Changed
201
1
2
3
if (m_param->bHistBasedSceneCut)
4
{
5
- for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; i++)
6
- {
7
- m_planeSizes[i] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]);
8
- }
9
+ m_planeSizes[0] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[0]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[0]);
10
uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1;
11
m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes);
12
m_edgeHistThreshold = m_param->edgeTransitionThreshold;
13
14
int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
15
memset(m_edgePic, 0, bufSize);
16
17
- if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false))
18
+ if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1))
19
{
20
- x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
21
+ x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!");
22
return false;
23
}
24
25
26
if (m_param->bHistBasedSceneCut && pic_in)
27
{
28
x265_picture *pic = (x265_picture *) pic_in;
29
+
30
+ if (pic->poc == 0)
31
+ {
32
+ /* for entire encode compute the chroma plane sizes only once */
33
+ for (int i = 1; i < x265_cli_csps[m_param->internalCsp].planes; i++)
34
+ m_planeSizes[i] = (pic->width >> x265_cli_csps[m_param->internalCsp].width[i]) * (pic->height >> x265_cli_csps[m_param->internalCsp].height[i]);
35
+ }
36
+
37
if (computeHistograms(pic))
38
{
39
double maxUVSad = 0.0, edgeSad = 0.0;
40
41
}
42
}
43
}
44
+ if (m_param->recursionSkipMode == EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut)
45
+ {
46
+ pixel* src = m_edgePic;
47
+ primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, inFrame->m_edgeBitPic, inFrame->m_fencPic->m_stride,
48
+ inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0);
49
+ }
50
}
51
else
52
{
53
54
encParam->maxNumReferences = param->maxNumReferences; // never uses more refs than specified in stream headers
55
encParam->bEnableFastIntra = param->bEnableFastIntra;
56
encParam->bEnableEarlySkip = param->bEnableEarlySkip;
57
- encParam->bEnableRecursionSkip = param->bEnableRecursionSkip;
58
+ encParam->recursionSkipMode = param->recursionSkipMode;
59
encParam->searchMethod = param->searchMethod;
60
/* Scratch buffer prevents me_range from being increased for esa/tesa */
61
if (param->searchRange < encParam->searchRange)
62
63
frameStats->ipCostRatio = curFrame->m_lowres.ipCostRatio;
64
frameStats->bufferFill = m_rateControl->m_bufferFillActual;
65
frameStats->bufferFillFinal = m_rateControl->m_bufferFillFinal;
66
+ if (m_param->csvLogLevel >= 2)
67
+ frameStats->unclippedBufferFillFinal = m_rateControl->m_unclippedBufferFillFinal;
68
frameStats->frameLatency = inPoc - poc;
69
if (m_param->rc.rateControlMode == X265_RC_CRF)
70
frameStats->rateFactor = curEncData.m_rateFactor;
71
72
p->maxNumReferences = zone->maxNumReferences;
73
p->bEnableFastIntra = zone->bEnableFastIntra;
74
p->bEnableEarlySkip = zone->bEnableEarlySkip;
75
- p->bEnableRecursionSkip = zone->bEnableRecursionSkip;
76
+ p->recursionSkipMode = zone->recursionSkipMode;
77
p->searchMethod = zone->searchMethod;
78
p->searchRange = zone->searchRange;
79
p->subpelRefine = zone->subpelRefine;
80
81
if (p->analysisLoad && !p->analysisLoadReuseLevel)
82
p->analysisLoadReuseLevel = 5;
83
84
- if ((p->bAnalysisType == DEFAULT) && p->rc.cuTree)
85
- {
86
- if (p->analysisSaveReuseLevel && p->analysisSaveReuseLevel < 10)
87
- {
88
- x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-save-reuse-level 10, Disabling cu-tree\n");
89
- p->rc.cuTree = 0;
90
- }
91
- if (p->analysisLoadReuseLevel && p->analysisLoadReuseLevel < 10)
92
- {
93
- x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-load-reuse-level 10, Disabling cu-tree\n");
94
- p->rc.cuTree = 0;
95
- }
96
- }
97
-
98
if ((p->analysisLoad || p->analysisSave) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation))
99
{
100
x265_log(p, X265_LOG_WARNING, "Analysis load/save options incompatible with pmode/pme, Disabling pmode/pme\n");
101
102
}
103
else
104
{
105
- if (fread(&m_conformanceWindow.rightOffset, sizeof(int), 1, m_analysisFileIn) != 1)
106
+ int rightOffset, bottomOffset;
107
+ if (fread(&rightOffset, sizeof(int), 1, m_analysisFileIn) != 1)
108
{
109
x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window right offset missing\n");
110
m_aborted = true;
111
}
112
- else if (m_conformanceWindow.rightOffset && p->analysisLoadReuseLevel > 1)
113
+ else if (rightOffset && p->analysisLoadReuseLevel > 1)
114
{
115
int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor;
116
- padsize = m_conformanceWindow.rightOffset * scaleFactor;
117
+ padsize = rightOffset * scaleFactor;
118
p->sourceWidth += padsize;
119
m_conformanceWindow.bEnabled = true;
120
m_conformanceWindow.rightOffset = padsize;
121
}
122
123
- if (fread(&m_conformanceWindow.bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1)
124
+ if (fread(&bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1)
125
{
126
x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window bottom offset missing\n");
127
m_aborted = true;
128
}
129
- else if (m_conformanceWindow.bottomOffset && p->analysisLoadReuseLevel > 1)
130
+ else if (bottomOffset && p->analysisLoadReuseLevel > 1)
131
{
132
int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor;
133
- padsize = m_conformanceWindow.bottomOffset * scaleFactor;
134
+ padsize = bottomOffset * scaleFactor;
135
p->sourceHeight += padsize;
136
m_conformanceWindow.bEnabled = true;
137
m_conformanceWindow.bottomOffset = padsize;
138
139
x265_log(p, X265_LOG_WARNING, "Radl requires fixed gop-length (keyint == min-keyint). Disabling radl.\n");
140
}
141
142
- if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP)
143
+ if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP && m_param->bResetZoneConfig)
144
{
145
p->chunkStart = p->chunkEnd = 0;
146
x265_log(p, X265_LOG_WARNING, "Chunking requires closed gop structure. Disabling chunking.\n");
147
148
x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n");
149
}
150
151
- if (!m_param->bResetZoneConfig && (p->keyframeMax != p->keyframeMin))
152
- x265_log(p, X265_LOG_WARNING, "External zone reconfiguration requires a fixed GOP size to enable appropriate signaling of HRD info\n");
153
-
154
- if (!m_param->bResetZoneConfig && (p->reconfigWindowSize != (uint64_t)p->keyframeMax))
155
- x265_log(p, X265_LOG_WARNING, "Zone size must be multiple of GOP size to enable appropriate signaling of HRD info\n");
156
-
157
if (m_param->bEnableHME)
158
{
159
if (m_param->sourceHeight < 540)
160
161
}
162
}
163
164
+ uint32_t numCUsLoad, numCUsInHeightLoad;
165
+
166
/* Now arrived at the right frame, read the record */
167
analysis->poc = poc;
168
analysis->frameRecordSize = frameRecordSize;
169
X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn, &(picData->sliceType));
170
X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn, &(picData->bScenecut));
171
X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFileIn, &(picData->satdCost));
172
- X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame));
173
+ X265_FREAD(&numCUsLoad, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame));
174
X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFileIn, &(picData->numPartitions));
175
176
+ /* Update analysis info to save current settings */
177
+ uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize;
178
+ uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize;
179
+ uint32_t numCUsInFrame = widthInCU * heightInCU;
180
+ analysis->numCUsInFrame = numCUsInFrame;
181
+ analysis->numCuInHeight = heightInCU;
182
+
183
if (m_param->bDisableLookahead)
184
{
185
- X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight));
186
+ X265_FREAD(&numCUsInHeightLoad, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight));
187
X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFileIn, &(picData->lookahead));
188
}
189
int scaledNumPartition = analysis->numPartitions;
190
191
192
if (m_param->ctuDistortionRefine == CTU_DISTORTION_INTERNAL)
193
{
194
- X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), analysis->numCUsInFrame, m_analysisFileIn, picDistortion);
195
+ X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), numCUsLoad, m_analysisFileIn, picDistortion);
196
computeDistortionOffset(analysis);
197
}
198
if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
199
{
200
size_t vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;
201
x265_3.3.tar.gz/source/encoder/frameencoder.cpp -> x265_3.4.tar.gz/source/encoder/frameencoder.cpp
Changed
29
1
2
{
3
rowSum += sliceGroupSizeAccu;
4
m_sliceBaseRow[++sidx] = i;
5
- }
6
+ }
7
}
8
X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
9
m_sliceBaseRow[0] = 0;
10
11
m_ssimCnt = 0;
12
memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats));
13
14
+ if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
15
+ {
16
+ int height = m_frame->m_fencPic->m_picHeight;
17
+ int width = m_frame->m_fencPic->m_picWidth;
18
+ intptr_t stride = m_frame->m_fencPic->m_stride;
19
+
20
+ if (!computeEdge(m_frame->m_edgeBitPic, m_frame->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1))
21
+ {
22
+ x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !");
23
+ }
24
+ }
25
+
26
/* Emit access unit delimiter unless this is the first frame and the user is
27
* not repeating headers (since AUD is supposed to be the first NAL in the access
28
* unit) */
29
x265_3.3.tar.gz/source/encoder/ratecontrol.cpp -> x265_3.4.tar.gz/source/encoder/ratecontrol.cpp
Changed
173
1
2
x265_log(m_param, X265_LOG_WARNING, "NAL HRD parameters require VBV parameters, ignored\n");
3
m_param->bEmitHRDSEI = 0;
4
}
5
- m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && !m_2pass && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate;
6
+ m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate;
7
if (m_param->rc.bStrictCbr && !m_isCbr)
8
{
9
x265_log(m_param, X265_LOG_WARNING, "strict CBR set without CBR mode, ignored\n");
10
11
int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;
12
int vbvMaxBitrate = m_param->rc.vbvMaxBitrate * 1000;
13
14
- if (m_param->bEmitHRDSEI)
15
+ if (m_param->bEmitHRDSEI && !m_param->decoderVbvMaxRate)
16
{
17
const HRDInfo* hrd = &sps.vuiParameters.hrdParameters;
18
vbvBufferSize = hrd->cpbSizeValue << (hrd->cpbSizeScale + CPB_SHIFT);
19
20
CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
21
CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold);
22
CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh);
23
+ CMP_OPT_FIRST_PASS("frame-dup", m_param->bEnableFrameDuplication);
24
if (m_param->bMultiPassOptRPS)
25
{
26
CMP_OPT_FIRST_PASS("multi-pass-opt-rps", m_param->bMultiPassOptRPS);
27
28
x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
29
m_param->totalFrames, m_numEntries);
30
}
31
- if (m_param->totalFrames > m_numEntries)
32
+ if (m_param->totalFrames > m_numEntries && !m_param->bEnableFrameDuplication)
33
{
34
x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
35
m_param->totalFrames, m_numEntries);
36
37
// Init HRD
38
HRDInfo* hrd = &sps.vuiParameters.hrdParameters;
39
hrd->cbrFlag = m_isCbr;
40
+ if (m_param->reconfigWindowSize) {
41
+ hrd->cbrFlag = 0;
42
+ vbvMaxBitrate = m_param->decoderVbvMaxRate * 1000;
43
+ }
44
45
// normalize HRD size and rate to the value / scale notation
46
hrd->bitRateScale = x265_clip3(0, 15, calcScale(vbvMaxBitrate) - BR_SHIFT);
47
48
/* weighted average of cplx of future frames */
49
for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++)
50
{
51
- int index = m_encOrder[i + j];
52
+ int index = i+j;
53
RateControlEntry *rcj = &m_rce2Pass[index];
54
weight *= 1 - pow(rcj->iCuCount / m_ncu, 2);
55
if (weight < 0.0001)
56
57
weight = 1.0;
58
for (int j = 0; j <= cplxBlur * 2 && j <= i; j++)
59
{
60
- int index = m_encOrder[i - j];
61
+ int index = i-j;
62
RateControlEntry *rcj = &m_rce2Pass[index];
63
gaussianWeight = weight * exp(-j * j / 200.0);
64
weightSum += gaussianWeight;
65
66
if (weight < .0001)
67
break;
68
}
69
- m_rce2Pass[m_encOrder[i]].blurredComplexity = cplxSum / weightSum;
70
+ m_rce2Pass[i].blurredComplexity= cplxSum / weightSum;
71
}
72
CHECKED_MALLOC(qScale, double, m_numEntries);
73
if (filterSize > 1)
74
75
expectedBits = 1;
76
for (int i = 0; i < m_numEntries; i++)
77
{
78
- RateControlEntry* rce = &m_rce2Pass[m_encOrder[i]];
79
+ RateControlEntry* rce = &m_rce2Pass[i];
80
double q = getQScale(rce, 1.0);
81
expectedBits += qScale2bits(rce, q);
82
m_lastQScaleFor[rce->sliceType] = q;
83
84
/* find qscale */
85
for (int i = 0; i < m_numEntries; i++)
86
{
87
- RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]];
88
+ RateControlEntry *rce = &m_rce2Pass[i];
89
qScale[i] = getQScale(rce, rateFactor);
90
m_lastQScaleFor[rce->sliceType] = qScale[i];
91
}
92
93
/* fixed I/B qscale relative to P */
94
- for (int i = m_numEntries - 1; i >= 0; i--)
95
+ for (int i = 0; i < m_numEntries; i++)
96
{
97
- qScale[i] = getDiffLimitedQScale(&m_rce2Pass[m_encOrder[i]], qScale[i]);
98
+ qScale[i] = getDiffLimitedQScale(&m_rce2Pass[i], qScale[i]);
99
X265_CHECK(qScale[i] >= 0, "qScale became negative\n");
100
}
101
102
103
for (int i = 0; i < m_numEntries; i++)
104
{
105
double q = 0.0, sum = 0.0;
106
-
107
for (int j = 0; j < filterSize; j++)
108
{
109
int idx = i + j - filterSize / 2;
110
111
double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur));
112
if (idx < 0 || idx >= m_numEntries)
113
continue;
114
- if (m_rce2Pass[m_encOrder[i]].sliceType != m_rce2Pass[m_encOrder[idx]].sliceType)
115
+ if (m_rce2Pass[i].sliceType != m_rce2Pass[idx].sliceType)
116
continue;
117
q += qScale[idx] * coeff;
118
sum += coeff;
119
120
/* find expected bits */
121
for (int i = 0; i < m_numEntries; i++)
122
{
123
- RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]];
124
+ RateControlEntry *rce = &m_rce2Pass[i];
125
rce->newQScale = clipQscale(NULL, rce, blurredQscale[i]); // check if needed
126
X265_CHECK(rce->newQScale >= 0, "new Qscale is negative\n");
127
expectedBits += qScale2bits(rce, rce->newQScale);
128
129
m_param->rc.vbvMaxBitrate = m_param->rc.zones[i].zoneParam->rc.vbvMaxBitrate;
130
memcpy(m_relativeComplexity, m_param->rc.zones[i].relativeComplexity, sizeof(double) * m_param->reconfigWindowSize);
131
reconfigureRC();
132
+ m_isCbr = 1; /* Always vbvmaxrate == bitrate here*/
133
m_top->zoneReadCount[i].incr();
134
}
135
}
136
137
/* Adjust quant based on the difference between
138
* achieved and expected bitrate so far */
139
double curTime = (double)rce->encodeOrder / m_numEntries;
140
- double w = x265_clip3(0.0, 1.0, curTime * 100);
141
+ double w = x265_clip3(0.0, 1.0, curTime);
142
q *= pow((double)m_totalBits / m_expectedBitsSum, w);
143
}
144
if (m_framesDone == 0 && m_param->rc.rateControlMode == X265_RC_ABR && m_isGrainEnabled)
145
146
x265_log(m_param, X265_LOG_WARNING, "poc:%d, VBV underflow (%.0f bits)\n", rce->poc, m_bufferFillFinal);
147
148
m_bufferFillFinal = X265_MAX(m_bufferFillFinal, 0);
149
- m_bufferFillFinal += m_bufferRate;
150
+ m_bufferFillFinal += rce->bufferRate;
151
+ if (m_param->csvLogLevel >= 2)
152
+ m_unclippedBufferFillFinal = m_bufferFillFinal;
153
154
if (m_param->rc.bStrictCbr)
155
{
156
157
filler += FILLER_OVERHEAD * 8;
158
}
159
m_bufferFillFinal -= filler;
160
- bufferBits = X265_MIN(bits + filler + m_bufferExcess, m_bufferRate);
161
+ bufferBits = X265_MIN(bits + filler + m_bufferExcess, rce->bufferRate);
162
m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits + filler, 0);
163
m_bufferFillActual += bufferBits - bits - filler;
164
}
165
else
166
{
167
m_bufferFillFinal = X265_MIN(m_bufferFillFinal, m_bufferSize);
168
- bufferBits = X265_MIN(bits + m_bufferExcess, m_bufferRate);
169
+ bufferBits = X265_MIN(bits + m_bufferExcess, rce->bufferRate);
170
m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits, 0);
171
m_bufferFillActual += bufferBits - bits;
172
m_bufferFillActual = X265_MIN(m_bufferFillActual, m_bufferSize);
173
x265_3.3.tar.gz/source/encoder/ratecontrol.h -> x265_3.4.tar.gz/source/encoder/ratecontrol.h
Changed
9
1
2
double m_rateFactorConstant;
3
double m_bufferSize;
4
double m_bufferFillFinal; /* real buffer as of the last finished frame */
5
+ double m_unclippedBufferFillFinal; /* real unclipped buffer as of the last finished frame used to log in CSV*/
6
double m_bufferFill; /* planned buffer, if all in-progress frames hit their bit budget */
7
double m_bufferRate; /* # of bits added to buffer_fill after each frame */
8
double m_vbvMaxRate; /* in kbps */
9
x265_3.3.tar.gz/source/encoder/slicetype.cpp -> x265_3.4.tar.gz/source/encoder/slicetype.cpp
Changed
33
1
2
3
namespace X265_NS {
4
5
-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta)
6
+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel)
7
{
8
intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0;
9
intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0;
10
11
theta = 180 + theta;
12
edgeTheta[middle] = (pixel)theta;
13
}
14
- edgePic[middle] = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel);
15
+ edgePic[middle] = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel);
16
}
17
}
18
return true;
19
20
if (param->rc.aqMode == X265_AQ_EDGE)
21
edgeFilter(curFrame, param);
22
23
+ if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->recursionSkipMode == EDGE_BASED_RSKIP)
24
+ {
25
+ pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
26
+ primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, curFrame->m_edgeBitPic,
27
+ curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE);
28
+ }
29
+
30
if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)
31
{
32
double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
33
x265_3.3.tar.gz/source/encoder/slicetype.h -> x265_3.4.tar.gz/source/encoder/slicetype.h
Changed
31
1
2
#define EDGE_INCLINATION 45
3
4
#if HIGH_BIT_DEPTH
5
-#define edgeThreshold 1023.0
6
+#define EDGE_THRESHOLD 1023.0
7
#else
8
-#define edgeThreshold 255.0
9
+#define EDGE_THRESHOLD 255.0
10
#endif
11
#define PI 3.14159265
12
13
14
protected:
15
16
uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
17
- uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
18
+ uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
19
uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
20
uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
21
bool allocWeightedRef(Lowres& fenc);
22
23
CostEstimateGroup& operator=(const CostEstimateGroup&);
24
};
25
26
-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta);
27
-
28
+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD);
29
}
30
#endif // ifndef X265_SLICETYPE_H
31
x265_3.3.tar.gz/source/test/CMakeLists.txt -> x265_3.4.tar.gz/source/test/CMakeLists.txt
Changed
24
1
2
3
# add ARM assembly files
4
if(ARM OR CROSS_COMPILE_ARM)
5
- enable_language(ASM)
6
- set(NASM_SRC checkasm-arm.S)
7
- add_custom_command(
8
- OUTPUT checkasm-arm.obj
9
- COMMAND ${CMAKE_CXX_COMPILER}
10
- ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
11
- DEPENDS checkasm-arm.S)
12
+ if(NOT ARM64)
13
+ enable_language(ASM)
14
+ set(NASM_SRC checkasm-arm.S)
15
+ add_custom_command(
16
+ OUTPUT checkasm-arm.obj
17
+ COMMAND ${CMAKE_CXX_COMPILER}
18
+ ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
19
+ DEPENDS checkasm-arm.S)
20
+ endif()
21
endif(ARM OR CROSS_COMPILE_ARM)
22
23
# add PowerPC assembly files
24
x265_3.3.tar.gz/source/test/regression-tests.txt -> x265_3.4.tar.gz/source/test/regression-tests.txt
Changed
23
1
2
News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0
3
News-4k.y4m,--preset superfast --slices 4 --aq-mode 0
4
News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 16
5
-News-4k.y4m,--preset veryslow --no-rskip
6
+News-4k.y4m,--preset veryslow --rskip 0
7
News-4k.y4m,--preset veryslow --pme --crf 40
8
OldTownCross_1920x1080_50_10bit_422.yuv,--preset superfast --weightp
9
OldTownCross_1920x1080_50_10bit_422.yuv,--preset medium --no-weightp
10
11
sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
12
sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02
13
sintel_trailer_2k_1920x1080_24.yuv, --preset ultrafast --hist-scenecut --hist-threshold 0.02
14
-
15
+crowd_run_1920x1080_50.yuv, --preset faster --ctu 32 --rskip 2 --rskip-edge-threshold 5
16
+crowd_run_1920x1080_50.yuv, --preset fast --ctu 64 --rskip 2 --rskip-edge-threshold 5 --aq-mode 4
17
+crowd_run_1920x1080_50.yuv, --preset slow --ctu 32 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1
18
+crowd_run_1920x1080_50.yuv, --preset slower --ctu 16 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4
19
+
20
# Main12 intraCost overflow bug test
21
720p50_parkrun_ter.y4m,--preset medium
22
23
x265_3.3.tar.gz/source/test/save-load-tests.txt -> x265_3.4.tar.gz/source/test/save-load-tests.txt
Changed
6
1
2
RaceHorses_416x240_30.y4m, --preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22 --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m, --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m, --preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2
3
crowd_run_540p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset veryslow --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000
4
crowd_run_540p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_540.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m, --preset medium --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000
5
+News-4k.y4m, --preset medium --analysis-save x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000::News-4k.y4m, --analysis-load x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
6
x265_3.3.tar.gz/source/test/testbench.cpp -> x265_3.4.tar.gz/source/test/testbench.cpp
Changed
38
1
2
* Mandar Gurav <mandar@multicorewareinc.com>
3
* Mahesh Pittala <mahesh@multicorewareinc.com>
4
* Min Chen <chenm003@163.com>
5
+ * Yimeng Su <yimeng.su@huawei.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
10
EncoderPrimitives asmprim;
11
memset(&asmprim, 0, sizeof(asmprim));
12
setupAssemblyPrimitives(asmprim, test_arch[i].flag);
13
+
14
+#if X265_ARCH_ARM64
15
+ /* Temporary workaround because luma_vsp assembly primitive has not been completed
16
+ * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive.
17
+ * Otherwise, segment fault occurs. */
18
+ setupAliasCPrimitives(cprim, asmprim, test_arch[i].flag);
19
+#endif
20
+
21
setupAliasPrimitives(asmprim);
22
memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives));
23
for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
24
25
#endif
26
setupAssemblyPrimitives(optprim, cpuid);
27
28
+#if X265_ARCH_ARM64
29
+ /* Temporary workaround because luma_vsp assembly primitive has not been completed
30
+ * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive.
31
+ * Otherwise, segment fault occurs. */
32
+ setupAliasCPrimitives(cprim, optprim, cpuid);
33
+#endif
34
+
35
/* Note that we do not setup aliases for performance tests, that would be
36
* redundant. The testbench only verifies they are correctly aliased */
37
38
x265_3.3.tar.gz/source/test/testharness.h -> x265_3.4.tar.gz/source/test/testharness.h
Changed
26
1
2
*
3
* Authors: Steve Borho <steve@borho.org>
4
* Min Chen <chenm003@163.com>
5
+ * Yimeng Su <yimeng.su@huawei.com>
6
*
7
* This program is free software; you can redistribute it and/or modify
8
* it under the terms of the GNU General Public License as published by
9
10
#if X265_ARCH_X86
11
asm volatile("rdtsc" : "=a" (a) ::"edx");
12
#elif X265_ARCH_ARM
13
+#if X265_ARCH_ARM64
14
+ asm volatile("mrs %0, cntvct_el0" : "=r"(a));
15
+#else
16
// TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
17
// asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
18
19
// TO-DO: replace clock() function with appropriate ARM cpu instructions
20
a = clock();
21
#endif
22
+#endif
23
return a;
24
}
25
#endif // ifdef _MSC_VER
26
x265_3.3.tar.gz/source/x265.cpp -> x265_3.4.tar.gz/source/x265.cpp
Changed
201
1
2
3
#include "x265.h"
4
#include "x265cli.h"
5
-
6
-#include "input/input.h"
7
-#include "output/output.h"
8
-#include "output/reconplay.h"
9
-#include "svt.h"
10
+#include "abrEncApp.h"
11
12
#if HAVE_VLD
13
/* Visual Leak Detector */
14
15
#include <fstream>
16
#include <queue>
17
18
-#define CONSOLE_TITLE_SIZE 200
19
-#ifdef _WIN32
20
-#include <windows.h>
21
-#define SetThreadExecutionState(es)
22
-static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = "";
23
-#else
24
-#define GetConsoleTitle(t, n)
25
-#define SetConsoleTitle(t)
26
-#define SetThreadExecutionState(es)
27
-#endif
28
-
29
using namespace X265_NS;
30
31
-/* Ctrl-C handler */
32
-static volatile sig_atomic_t b_ctrl_c /* = 0 */;
33
-static void sigint_handler(int)
34
-{
35
- b_ctrl_c = 1;
36
-}
37
-#define START_CODE 0x00000001
38
-#define START_CODE_BYTES 4
39
-
40
-struct CLIOptions
41
-{
42
- InputFile* input;
43
- ReconFile* recon;
44
- OutputFile* output;
45
- FILE* qpfile;
46
- FILE* zoneFile;
47
- FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */
48
- const char* reconPlayCmd;
49
- const x265_api* api;
50
- x265_param* param;
51
- x265_vmaf_data* vmafData;
52
- bool bProgress;
53
- bool bForceY4m;
54
- bool bDither;
55
- uint32_t seek; // number of frames to skip from the beginning
56
- uint32_t framesToBeEncoded; // number of frames to encode
57
- uint64_t totalbytes;
58
- int64_t startTime;
59
- int64_t prevUpdateTime;
60
-
61
- /* in microseconds */
62
- static const int UPDATE_INTERVAL = 250000;
63
-
64
- CLIOptions()
65
- {
66
- input = NULL;
67
- recon = NULL;
68
- output = NULL;
69
- qpfile = NULL;
70
- zoneFile = NULL;
71
- dolbyVisionRpu = NULL;
72
- reconPlayCmd = NULL;
73
- api = NULL;
74
- param = NULL;
75
- vmafData = NULL;
76
- framesToBeEncoded = seek = 0;
77
- totalbytes = 0;
78
- bProgress = true;
79
- bForceY4m = false;
80
- startTime = x265_mdate();
81
- prevUpdateTime = 0;
82
- bDither = false;
83
- }
84
+#define X265_HEAD_ENTRIES 3
85
86
- void destroy();
87
- void printStatus(uint32_t frameNum);
88
- bool parse(int argc, char **argv);
89
- bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount);
90
- bool parseQPFile(x265_picture &pic_org);
91
- bool parseZoneFile();
92
-};
93
-
94
-void CLIOptions::destroy()
95
-{
96
- if (input)
97
- input->release();
98
- input = NULL;
99
- if (recon)
100
- recon->release();
101
- recon = NULL;
102
- if (qpfile)
103
- fclose(qpfile);
104
- qpfile = NULL;
105
- if (zoneFile)
106
- fclose(zoneFile);
107
- zoneFile = NULL;
108
- if (dolbyVisionRpu)
109
- fclose(dolbyVisionRpu);
110
- dolbyVisionRpu = NULL;
111
- if (output)
112
- output->release();
113
- output = NULL;
114
-}
115
-
116
-void CLIOptions::printStatus(uint32_t frameNum)
117
-{
118
- char buf[200];
119
- int64_t time = x265_mdate();
120
-
121
- if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL))
122
- return;
123
-
124
- int64_t elapsed = time - startTime;
125
- double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0;
126
- float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum);
127
- if (framesToBeEncoded)
128
- {
129
- int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000));
130
- sprintf(buf, "x265 [%.1f%%] %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d",
131
- 100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate,
132
- eta / 3600, (eta / 60) % 60, eta % 60);
133
- }
134
- else
135
- sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate);
136
-
137
- fprintf(stderr, "%s \r", buf + 5);
138
- SetConsoleTitle(buf);
139
- fflush(stderr); // needed in windows
140
- prevUpdateTime = time;
141
-}
142
+#ifdef _WIN32
143
+#define strdup _strdup
144
+#endif
145
146
-bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount)
147
+#ifdef _WIN32
148
+/* Copy of x264 code, which allows for Unicode characters in the command line.
149
+ * Retrieve command line arguments as UTF-8. */
150
+static int get_argv_utf8(int *argc_ptr, char ***argv_ptr)
151
{
152
- bool bError = false;
153
- int bShowHelp = false;
154
- int outputBitDepth = 0;
155
- const char *profile = NULL;
156
-
157
- /* Presets are applied before all other options. */
158
- for (optind = 0;;)
159
- {
160
- int c = getopt_long(argc, argv, short_options, long_options, NULL);
161
- if (c == -1)
162
- break;
163
- else if (c == 'D')
164
- outputBitDepth = atoi(optarg);
165
- else if (c == 'P')
166
- profile = optarg;
167
- else if (c == '?')
168
- bShowHelp = true;
169
- }
170
-
171
- if (!outputBitDepth && profile)
172
- {
173
- /* try to derive the output bit depth from the requested profile */
174
- if (strstr(profile, "10"))
175
- outputBitDepth = 10;
176
- else if (strstr(profile, "12"))
177
- outputBitDepth = 12;
178
- else
179
- outputBitDepth = 8;
180
- }
181
-
182
- api = x265_api_get(outputBitDepth);
183
- if (!api)
184
+ int ret = 0;
185
+ wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr);
186
+ if (argv_utf16)
187
{
188
- x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n");
189
- api = x265_api_get(0);
190
- }
191
+ int argc = *argc_ptr;
192
+ int offset = (argc + 1) * sizeof(char*);
193
+ int size = offset;
194
195
- if (bShowHelp)
196
- {
197
- printVersion(globalParam, api);
198
- showHelp(globalParam);
199
- }
200
+ for (int i = 0; i < argc; i++)
201
x265_3.3.tar.gz/source/x265.h -> x265_3.4.tar.gz/source/x265.h
Changed
60
1
2
int ctuDistortionRefine;
3
int rightOffset;
4
int bottomOffset;
5
+ int frameDuplication;
6
}x265_analysis_validate;
7
8
/* Stores intra analysis data for a single frame. This struct needs better packing */
9
10
double totalFrameTime;
11
double vmafFrameScore;
12
double bufferFillFinal;
13
+ double unclippedBufferFillFinal;
14
} x265_frame_stats;
15
16
typedef struct x265_ctu_info_t
17
18
* skip blocks. Default is disabled */
19
int bEnableEarlySkip;
20
21
- /* Enable early CU size decisions to avoid recursing to higher depths.
22
+ /* Enable early CU size decisions to avoid recursing to higher depths.
23
* Default is enabled */
24
- int bEnableRecursionSkip;
25
+ int recursionSkipMode;
26
27
/* Use a faster search method to find the best intra mode. Default is 0 */
28
int bEnableFastIntra;
29
30
double edgeTransitionThreshold;
31
32
/* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */
33
- int bHistBasedSceneCut;
34
+ int bHistBasedSceneCut;
35
36
/* Enable HME search ranges for L0, L1 and L2 respectively. */
37
int hmeRange[3];
38
39
* analysis information stored in analysis-save. Higher the refine level higher
40
* the information stored. Default is 5 */
41
int analysisSaveReuseLevel;
42
-
43
+
44
/* A value between 1 and 10 (both inclusive) determines the level of
45
* analysis information reused in analysis-load. Higher the refine level higher
46
* the information reused. Default is 5 */
47
48
* info is available from the corresponding analysis-save. */
49
50
int confWinBottomOffset;
51
+
52
+ /* Edge variance threshold for quad tree establishment. */
53
+ float edgeVarThreshold;
54
+
55
+ /* Maxrate that could be signaled to the decoder. Default 0. API only. */
56
+ int decoderVbvMaxRate;
57
} x265_param;
58
59
/* x265_param_alloc:
60
x265_3.4.tar.gz/source/x265cli.cpp
Added
201
1
2
+/*****************************************************************************
3
+ * Copyright (C) 2013-2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Steve Borho <steve@borho.org>
6
+ * Min Chen <chenm003@163.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+#if _MSC_VER
26
+#pragma warning(disable: 4127) // conditional expression is constant, yes I know
27
+#endif
28
+
29
+#include "x265cli.h"
30
+#include "svt.h"
31
+
32
+#define START_CODE 0x00000001
33
+#define START_CODE_BYTES 4
34
+
35
+#ifdef __cplusplus
36
+namespace X265_NS {
37
+#endif
38
+
39
+ static void printVersion(x265_param *param, const x265_api* api)
40
+ {
41
+ x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str);
42
+ x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str);
43
+ }
44
+
45
+ static void showHelp(x265_param *param)
46
+ {
47
+ int level = param->logLevel;
48
+
49
+#define OPT(value) (value ? "enabled" : "disabled")
50
+#define H0 printf
51
+#define H1 if (level >= X265_LOG_DEBUG) printf
52
+
53
+ H0("\nSyntax: x265 [options] infile [-o] outfile\n");
54
+ H0(" infile can be YUV or Y4M\n");
55
+ H0(" outfile is raw HEVC bitstream\n");
56
+ H0("\nExecutable Options:\n");
57
+ H0("-h/--help Show this help text and exit\n");
58
+ H0(" --fullhelp Show all options and exit\n");
59
+ H0("-V/--version Show version info and exit\n");
60
+ H0("\nOutput Options:\n");
61
+ H0("-o/--output <filename> Bitstream output file name\n");
62
+ H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth);
63
+ H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]);
64
+ H0(" --no-progress Disable CLI progress reports\n");
65
+ H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n");
66
+ H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n");
67
+ H0("\nInput Options:\n");
68
+ H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n");
69
+ H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n");
70
+ H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n");
71
+ H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n");
72
+ H1(" --input-depth <integer> Bit-depth of input file. Default 8\n");
73
+ H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n");
74
+ H1(" 0 - i400 (4:0:0 monochrome)\n");
75
+ H1(" 1 - i420 (4:2:0 default)\n");
76
+ H1(" 2 - i422 (4:2:2)\n");
77
+ H1(" 3 - i444 (4:4:4)\n");
78
+#if ENABLE_HDR10_PLUS
79
+ H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n");
80
+ H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n");
81
+#endif
82
+ H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n");
83
+ H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n"
84
+ " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n");
85
+ H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n");
86
+ H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n");
87
+ H0(" --seek <integer> First frame to encode\n");
88
+ H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n");
89
+ H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT(param->bField));
90
+ H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n");
91
+ H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame));
92
+ H0("\nQuality reporting metrics:\n");
93
+ H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim));
94
+ H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr));
95
+ H0("\nProfile, Level, Tier:\n");
96
+ H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n");
97
+ H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n");
98
+ H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n");
99
+ H0(" --uhd-bd Enable UHD Bluray compatibility support\n");
100
+ H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance));
101
+ H0("\nThreading, performance:\n");
102
+ H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n");
103
+ H0(" '-' implies no threads on node, '+' implies one thread per core on node\n");
104
+ H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n");
105
+ H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront));
106
+ H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices);
107
+ H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis));
108
+ H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation));
109
+ H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n");
110
+ H0("\nPresets:\n");
111
+ H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n");
112
+ H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n");
113
+ H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n");
114
+ H0(" psnr, ssim, grain, zerolatency, fastdecode\n");
115
+ H0("\nQuad-Tree size and depth:\n");
116
+ H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize);
117
+ H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize);
118
+ H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize);
119
+ H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth);
120
+ H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth);
121
+ H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU);
122
+ H0("\nAnalysis:\n");
123
+ H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel);
124
+ H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd);
125
+ H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel);
126
+ H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq);
127
+ H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd);
128
+ H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd));
129
+ H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine));
130
+ H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip));
131
+ H0(" --rskip <mode> Set mode for early exit from recursion. Mode 1: exit using rdcost & CU homogenity. Mode 2: exit using CU edge density.\n"
132
+ " Mode 0: disabled. Default %d\n", param->recursionSkipMode);
133
+ H1(" --rskip-edge-threshold Threshold in terms of percentage (integer of range [0,100]) for minimum edge density in CUs used to prun the recursion depth. Applicable only for rskip mode 2. Value is preset dependent. Default: %.f\n", param->edgeVarThreshold*100.0f);
134
+ H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast));
135
+ H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip));
136
+ H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n");
137
+ H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n");
138
+ H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n"
139
+ " - 1: force the partitions if CTU information is present\n"
140
+ " - 2: functionality of (1) and reduce qp if CTU information has changed\n"
141
+ " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n"
142
+ " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n");
143
+ H0("\nCoding tools:\n");
144
+ H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred));
145
+ H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred));
146
+ H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless));
147
+ H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding));
148
+ H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip));
149
+ H0("\nTemporal / motion search options:\n");
150
+ H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand);
151
+ H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
152
+ H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences);
153
+ H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod);
154
+ H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine);
155
+ H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange);
156
+ H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter));
157
+ H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP));
158
+ H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes);
159
+ H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp));
160
+ H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME));
161
+ H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
162
+ H1(" --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange[0], param->hmeRange[1], param->hmeRange[2]);
163
+ H0("\nSpatial / intra options:\n");
164
+ H0(" --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing));
165
+ H0(" --[no-]constrained-intra Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra));
166
+ H0(" --[no-]b-intra Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames));
167
+ H0(" --[no-]fast-intra Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra));
168
+ H0(" --rdpenalty <0..2> penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty);
169
+ H0("\nSlice decision options:\n");
170
+ H0(" --[no-]open-gop Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP));
171
+ H0("-I/--keyint <integer> Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax);
172
+ H0("-i/--min-keyint <integer> Scenecuts closer together than this are coded as I, not IDR. Default: auto\n");
173
+ H0(" --gop-lookahead <integer> Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n");
174
+ H0(" --no-scenecut Disable adaptive I-frame decision\n");
175
+ H0(" --scenecut <integer> How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold);
176
+ H1(" --scenecut-bias <0..100.0> Bias for scenecut detection. Default %.2f\n", param->scenecutBias);
177
+ H0(" --hist-scenecut Enables histogram based scene-cut detection using histogram based algorithm.\n");
178
+ H0(" --no-hist-scenecut Disables histogram based scene-cut detection using histogram based algorithm.\n");
179
+ H1(" --hist-threshold <0.0..2.0> Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold);
180
+ H0(" --[no-]fades Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades));
181
+ H1(" --[no-]scenecut-aware-qp Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp));
182
+ H1(" --scenecut-window <0..1000> QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow);
183
+ H1(" --max-qp-delta <0..10> QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta);
184
+ H0(" --radl <integer> Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl);
185
+ H0(" --intra-refresh Use Periodic Intra Refresh instead of IDR frames\n");
186
+ H0(" --rc-lookahead <integer> Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth);
187
+ H1(" --lookahead-slices <0..16> Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices);
188
+ H0(" --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads);
189
+ H0("-b/--bframes <0..16> Maximum number of consecutive b-frames. Default %d\n", param->bframes);
190
+ H1(" --bframe-bias <integer> Bias towards B frame decisions. Default %d\n", param->bFrameBias);
191
+ H0(" --b-adapt <0..2> 0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive);
192
+ H0(" --[no-]b-pyramid Use B-frames as references. Default %s\n", OPT(param->bBPyramid));
193
+ H1(" --qpfile <string> Force frametypes and QPs for some or all frames\n");
194
+ H1(" Format of each line: framenumber frametype QP\n");
195
+ H1(" QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n");
196
+ H1(" QPs are restricted by qpmin/qpmax.\n");
197
+ H1(" --force-flush <integer> Force the encoder to flush frames. Default %d\n", param->forceFlush);
198
+ H1(" 0 - flush the encoder only when all the input pictures are over.\n");
199
+ H1(" 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n");
200
+ H1(" 2 - flush the slicetype decided frames only.\n");
201
x265_3.3.tar.gz/source/x265cli.h -> x265_3.4.tar.gz/source/x265cli.h
Changed
201
1
2
3
#include "common.h"
4
#include "param.h"
5
+#include "input/input.h"
6
+#include "output/output.h"
7
+#include "output/reconplay.h"
8
9
#include <getopt.h>
10
11
+#define CONSOLE_TITLE_SIZE 200
12
+#ifdef _WIN32
13
+#include <windows.h>
14
+#define SetThreadExecutionState(es)
15
+static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = "";
16
+#else
17
+#define GetConsoleTitle(t, n)
18
+#define SetConsoleTitle(t)
19
+#define SetThreadExecutionState(es)
20
+#endif
21
+
22
#ifdef __cplusplus
23
namespace X265_NS {
24
#endif
25
26
{ "amp", no_argument, NULL, 0 },
27
{ "no-early-skip", no_argument, NULL, 0 },
28
{ "early-skip", no_argument, NULL, 0 },
29
- { "no-rskip", no_argument, NULL, 0 },
30
- { "rskip", no_argument, NULL, 0 },
31
+ { "rskip", required_argument, NULL, 0 },
32
+ { "rskip-edge-threshold", required_argument, NULL, 0 },
33
{ "no-fast-cbf", no_argument, NULL, 0 },
34
{ "fast-cbf", no_argument, NULL, 0 },
35
{ "no-tskip", no_argument, NULL, 0 },
36
37
{ "cll", no_argument, NULL, 0 },
38
{ "no-cll", no_argument, NULL, 0 },
39
{ "hme-range", required_argument, NULL, 0 },
40
+ { "abr-ladder", required_argument, NULL, 0 },
41
{ 0, 0, 0, 0 },
42
{ 0, 0, 0, 0 },
43
{ 0, 0, 0, 0 },
44
45
{ 0, 0, 0, 0 }
46
};
47
48
-static void printVersion(x265_param *param, const x265_api* api)
49
-{
50
- x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str);
51
- x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str);
52
-}
53
+ struct CLIOptions
54
+ {
55
+ InputFile* input;
56
+ ReconFile* recon;
57
+ OutputFile* output;
58
+ FILE* qpfile;
59
+ FILE* zoneFile;
60
+ FILE* dolbyVisionRpu; /* File containing Dolby Vision BL RPU metadata */
61
+ const char* reconPlayCmd;
62
+ const x265_api* api;
63
+ x265_param* param;
64
+ x265_vmaf_data* vmafData;
65
+ bool bProgress;
66
+ bool bForceY4m;
67
+ bool bDither;
68
+ uint32_t seek; // number of frames to skip from the beginning
69
+ uint32_t framesToBeEncoded; // number of frames to encode
70
+ uint64_t totalbytes;
71
+ int64_t startTime;
72
+ int64_t prevUpdateTime;
73
74
-static void showHelp(x265_param *param)
75
-{
76
- int level = param->logLevel;
77
+ int argCnt;
78
+ char** argString;
79
80
-#define OPT(value) (value ? "enabled" : "disabled")
81
-#define H0 printf
82
-#define H1 if (level >= X265_LOG_DEBUG) printf
83
+ /* ABR ladder settings */
84
+ bool isAbrLadderConfig;
85
+ bool enableScaler;
86
+ char* encName;
87
+ char* reuseName;
88
+ uint32_t encId;
89
+ int refId;
90
+ uint32_t loadLevel;
91
+ uint32_t saveLevel;
92
+ uint32_t numRefs;
93
94
- H0("\nSyntax: x265 [options] infile [-o] outfile\n");
95
- H0(" infile can be YUV or Y4M\n");
96
- H0(" outfile is raw HEVC bitstream\n");
97
- H0("\nExecutable Options:\n");
98
- H0("-h/--help Show this help text and exit\n");
99
- H0(" --fullhelp Show all options and exit\n");
100
- H0("-V/--version Show version info and exit\n");
101
- H0("\nOutput Options:\n");
102
- H0("-o/--output <filename> Bitstream output file name\n");
103
- H0("-D/--output-depth 8|10|12 Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth);
104
- H0(" --log-level <string> Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]);
105
- H0(" --no-progress Disable CLI progress reports\n");
106
- H0(" --csv <filename> Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n");
107
- H0(" --csv-log-level <integer> Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n");
108
- H0("\nInput Options:\n");
109
- H0(" --input <filename> Raw YUV or Y4M input file name. `-` for stdin\n");
110
- H1(" --y4m Force parsing of input stream as YUV4MPEG2 regardless of file extension\n");
111
- H0(" --fps <float|rational> Source frame rate (float or num/denom), auto-detected if Y4M\n");
112
- H0(" --input-res WxH Source picture size [w x h], auto-detected if Y4M\n");
113
- H1(" --input-depth <integer> Bit-depth of input file. Default 8\n");
114
- H1(" --input-csp <string> Chroma subsampling, auto-detected if Y4M\n");
115
- H1(" 0 - i400 (4:0:0 monochrome)\n");
116
- H1(" 1 - i420 (4:2:0 default)\n");
117
- H1(" 2 - i422 (4:2:2)\n");
118
- H1(" 3 - i444 (4:4:4)\n");
119
-#if ENABLE_HDR10_PLUS
120
- H0(" --dhdr10-info <filename> JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n");
121
- H0(" --[no-]dhdr10-opt Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n");
122
-#endif
123
- H0(" --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n");
124
- H0(" --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n"
125
- " If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n");
126
- H0(" --nalu-file <filename> Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n");
127
- H0("-f/--frames <integer> Maximum number of frames to encode. Default all\n");
128
- H0(" --seek <integer> First frame to encode\n");
129
- H1(" --[no-]interlace <bff|tff> Indicate input pictures are interlace fields in temporal order. Default progressive\n");
130
- H0(" --[no-]field Enable or disable field coding. Default %s\n", OPT( param->bField));
131
- H1(" --dither Enable dither if downscaling to 8 bit pixels. Default disabled\n");
132
- H0(" --[no-]copy-pic Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame));
133
- H0("\nQuality reporting metrics:\n");
134
- H0(" --[no-]ssim Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim));
135
- H0(" --[no-]psnr Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr));
136
- H0("\nProfile, Level, Tier:\n");
137
- H0("-P/--profile <string> Enforce an encode profile: main, main10, mainstillpicture\n");
138
- H0(" --level-idc <integer|float> Force a minimum required decoder level (as '5.0' or '50')\n");
139
- H0(" --[no-]high-tier If a decoder level is specified, this modifier selects High tier of that level\n");
140
- H0(" --uhd-bd Enable UHD Bluray compatibility support\n");
141
- H0(" --[no-]allow-non-conformance Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance));
142
- H0("\nThreading, performance:\n");
143
- H0(" --pools <integer,...> Comma separated thread count per thread pool (pool per NUMA node)\n");
144
- H0(" '-' implies no threads on node, '+' implies one thread per core on node\n");
145
- H0("-F/--frame-threads <integer> Number of concurrently encoded frames. 0: auto-determined by core count\n");
146
- H0(" --[no-]wpp Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront));
147
- H0(" --[no-]slices <integer> Enable Multiple Slices feature. Default %d\n", param->maxSlices);
148
- H0(" --[no-]pmode Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis));
149
- H0(" --[no-]pme Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation));
150
- H0(" --[no-]asm <bool|int|string> Override CPU detection. Default: auto\n");
151
- H0("\nPresets:\n");
152
- H0("-p/--preset <string> Trade off performance for compression efficiency. Default medium\n");
153
- H0(" ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n");
154
- H0("-t/--tune <string> Tune the settings for a particular type of source or situation:\n");
155
- H0(" psnr, ssim, grain, zerolatency, fastdecode\n");
156
- H0("\nQuad-Tree size and depth:\n");
157
- H0("-s/--ctu <64|32|16> Maximum CU size (WxH). Default %d\n", param->maxCUSize);
158
- H0(" --min-cu-size <64|32|16|8> Minimum CU size (WxH). Default %d\n", param->minCUSize);
159
- H0(" --max-tu-size <32|16|8|4> Maximum TU size (WxH). Default %d\n", param->maxTUSize);
160
- H0(" --tu-intra-depth <integer> Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth);
161
- H0(" --tu-inter-depth <integer> Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth);
162
- H0(" --limit-tu <0..4> Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU);
163
- H0("\nAnalysis:\n");
164
- H0(" --rd <1..6> Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel);
165
- H0(" --[no-]psy-rd <0..5.0> Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd);
166
- H0(" --[no-]rdoq-level <0|1|2> Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel);
167
- H0(" --[no-]psy-rdoq <0..50.0> Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq);
168
- H0(" --dynamic-rd <0..4.0> Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd);
169
- H0(" --[no-]ssim-rd Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd));
170
- H0(" --[no-]rd-refine Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine));
171
- H0(" --[no-]early-skip Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip));
172
- H0(" --[no-]rskip Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip));
173
- H1(" --[no-]tskip-fast Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast));
174
- H1(" --[no-]splitrd-skip Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip));
175
- H1(" --nr-intra <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n");
176
- H1(" --nr-inter <integer> An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n");
177
- H0(" --ctu-info <integer> Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n"
178
- " - 1: force the partitions if CTU information is present\n"
179
- " - 2: functionality of (1) and reduce qp if CTU information has changed\n"
180
- " - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n"
181
- " Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n");
182
- H0("\nCoding tools:\n");
183
- H0("-w/--[no-]weightp Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred));
184
- H0(" --[no-]weightb Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred));
185
- H0(" --[no-]cu-lossless Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless));
186
- H0(" --[no-]signhide Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding));
187
- H1(" --[no-]tskip Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip));
188
- H0("\nTemporal / motion search options:\n");
189
- H0(" --max-merge <1..5> Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand);
190
- H0(" --ref <integer> max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
191
- H0(" --limit-refs <0|1|2|3> Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences);
192
- H0(" --me <string> Motion search method dia hex umh star full. Default %d\n", param->searchMethod);
193
- H0("-m/--subme <integer> Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine);
194
- H0(" --merange <integer> Motion search range. Default %d\n", param->searchRange);
195
- H0(" --[no-]rect Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter));
196
- H0(" --[no-]amp Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP));
197
- H0(" --[no-]limit-modes Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes);
198
- H1(" --[no-]temporal-mvp Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp));
199
- H1(" --[no-]hme Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME));
200
- H1(" --hme-search <string> Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
201