Changes of Revision 39

x265.changes Changed
x
 
1
@@ -1,4 +1,40 @@
2
 -------------------------------------------------------------------
3
+Mon Jun  1 17:51:22 UTC 2020 - Luigi Baldoni <aloisio@gmx.com>
4
+
5
+- Update to version 3.4
6
+  New features:
7
+  * Edge-aware quadtree partitioning to terminate CU depth
8
+    recursion based on edge information. --rskip level 2 enables
9
+    the feature and --rskip-edge-threshold denotes the minimum
10
+    expected edge-density percentage within the CU, below which
11
+    the recursion is skipped. Experimental feature.
12
+  * Application-level feature --abr-ladder for automating
13
+    efficient ABR ladder generation. Shows ~65% savings in the
14
+    over-all turn-around time required for the generation of a
15
+    typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280
16
+    CPU @ 2.70GHz over a sequential ABR-ladder generation
17
+    approach that leverages save-load architecture.
18
+  Enhancements to existing features:
19
+  * Improved efficiency in 2-pass rate-control algorithm. The
20
+    savings in the bitrate is ~1.72% with visual improvement in
21
+    quality in the initial 1-2 secs.
22
+  Encoder enhancements:
23
+  * Faster ARM64 encodes enabled by ASM contributions from
24
+    Huawei. The speed-up over no-asm version for 1080p encodes @
25
+    medium preset is ~15% in a 16 core H/W.
26
+  * Strict VBV conformance in zone encoding.
27
+  Bug fixes:
28
+  * Multi-pass encode failures with --frame-dup.
29
+  * Corrupted bitstreams with --hist-scenecut when input depth
30
+    and internal bit-depth differ.
31
+  * Incorrect analysis propagation in multi-level save-load
32
+    architecture.
33
+  * Failure in detecting NUMA packages installed in non-standard
34
+    directories.
35
+
36
+- Refreshed arm.patch
37
+
38
+-------------------------------------------------------------------
39
 Sat Mar 28 14:28:56 UTC 2020 - Luigi Baldoni <aloisio@gmx.com>
40
 
41
 - Update to version 3.3
42
x265.spec Changed
23
 
1
@@ -17,11 +17,11 @@
2
 #
3
 
4
 
5
-%define sover  188
6
+%define sover  192
7
 %define libname lib%{name}
8
 %define libsoname %{libname}-%{sover}
9
 Name:           x265
10
-Version:        3.3
11
+Version:        3.4
12
 Release:        0
13
 Summary:        A free h265/HEVC encoder - encoder binary
14
 License:        GPL-2.0-or-later
15
@@ -67,7 +67,6 @@
16
 %patch0 -p1
17
 %patch1 -p1
18
 %patch2 -p1
19
-
20
 sed -i -e "s/0.0/%{sover}.0/g" source/cmake/version.cmake
21
 
22
 
23
arm.patch Changed
129
 
1
@@ -1,8 +1,8 @@
2
-Index: x265_2.2/source/CMakeLists.txt
3
+Index: x265_3.4/source/CMakeLists.txt
4
 ===================================================================
5
---- x265_2.2.orig/source/CMakeLists.txt
6
-+++ x265_2.2/source/CMakeLists.txt
7
-@@ -65,15 +65,22 @@ elseif(POWERMATCH GREATER "-1")
8
+--- x265_3.4.orig/source/CMakeLists.txt
9
++++ x265_3.4/source/CMakeLists.txt
10
+@@ -64,26 +64,26 @@ elseif(POWERMATCH GREATER "-1")
11
          add_definitions(-DPPC64=1)
12
          message(STATUS "Detected POWER PPC64 target processor")
13
      endif()
14
@@ -12,41 +12,62 @@
15
 -    else()
16
 -        set(CROSS_COMPILE_ARM 0)
17
 -    endif()
18
--    message(STATUS "Detected ARM target processor")
19
 -    set(ARM 1)
20
--    add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1)
21
+-    if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
22
+-        message(STATUS "Detected ARM64 target processor")
23
+-        set(ARM64 1)
24
+-        add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0)
25
+-    else()
26
+-        message(STATUS "Detected ARM target processor")
27
+-        add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1)
28
+-    endif()
29
 +elseif(${SYSPROC} MATCHES "armv5.*")
30
 +    message(STATUS "Detected ARMV5 system processor")
31
 +    set(ARMV5 1)
32
-+    add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0)
33
++    add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=0 -DHAVE_NEON=0)
34
 +elseif(${SYSPROC} STREQUAL "armv6l")
35
 +    message(STATUS "Detected ARMV6 system processor")
36
 +    set(ARMV6 1)
37
-+    add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
38
++    add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
39
 +elseif(${SYSPROC} STREQUAL "armv7l")
40
 +    message(STATUS "Detected ARMV7 system processor")
41
 +    set(ARMV7 1)
42
-+    add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
43
++    add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
44
 +elseif(${SYSPROC} STREQUAL "aarch64")
45
 +    message(STATUS "Detected AArch64 system processor")
46
 +    set(ARMV7 1)
47
-+    add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1 -DHAVE_NEON=0)
48
++    add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0 -DHAVE_NEON=0)
49
  else()
50
      message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
51
      message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
52
-@@ -208,18 +215,9 @@ if(GCC)
53
+ endif()
54
+-
55
+ if(UNIX)
56
+     list(APPEND PLATFORM_LIBS pthread)
57
+     find_library(LIBRT rt)
58
+@@ -238,28 +238,9 @@ if(GCC)
59
              endif()
60
          endif()
61
      endif()
62
 -    if(ARM AND CROSS_COMPILE_ARM)
63
--        set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
64
+-        if(ARM64)
65
+-            set(ARM_ARGS -fPIC)
66
+-        else()
67
+-            set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
68
+-        endif()
69
+-        message(STATUS "cross compile arm")
70
 -    elseif(ARM)
71
--        find_package(Neon)
72
--        if(CPU_HAS_NEON)
73
--            set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
74
+-        if(ARM64)
75
+-            set(ARM_ARGS -fPIC)
76
 -            add_definitions(-DHAVE_NEON)
77
 -        else()
78
--            set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
79
+-            find_package(Neon)
80
+-            if(CPU_HAS_NEON)
81
+-                set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
82
+-                add_definitions(-DHAVE_NEON)
83
+-            else()
84
+-                set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
85
+-            endif()
86
 -        endif()
87
 +    if(ARMV7)
88
 +        add_definitions(-fPIC)
89
@@ -55,11 +76,11 @@
90
      if(FPROFILE_GENERATE)
91
          if(INTEL_CXX)
92
              add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}")
93
-Index: x265_2.2/source/common/cpu.cpp
94
+Index: x265_3.4/source/common/cpu.cpp
95
 ===================================================================
96
---- x265_2.2.orig/source/common/cpu.cpp
97
-+++ x265_2.2/source/common/cpu.cpp
98
-@@ -37,7 +37,7 @@
99
+--- x265_3.4.orig/source/common/cpu.cpp
100
++++ x265_3.4/source/common/cpu.cpp
101
+@@ -39,7 +39,7 @@
102
  #include <machine/cpu.h>
103
  #endif
104
  
105
@@ -68,7 +89,7 @@
106
  #include <signal.h>
107
  #include <setjmp.h>
108
  static sigjmp_buf jmpbuf;
109
-@@ -344,7 +344,6 @@ uint32_t cpu_detect(void)
110
+@@ -350,7 +350,6 @@ uint32_t cpu_detect(bool benableavx512)
111
      }
112
  
113
      canjump = 1;
114
@@ -76,7 +97,7 @@
115
      canjump = 0;
116
      signal(SIGILL, oldsig);
117
  #endif // if !HAVE_NEON
118
-@@ -360,7 +359,7 @@ uint32_t cpu_detect(void)
119
+@@ -366,7 +365,7 @@ uint32_t cpu_detect(bool benableavx512)
120
      // which may result in incorrect detection and the counters stuck enabled.
121
      // right now Apple does not seem to support performance counters for this test
122
  #ifndef __MACH__
123
@@ -84,4 +105,4 @@
124
 +    //flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
125
  #endif
126
      // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
127
- #endif // if HAVE_ARMV6
128
+ #elif X265_ARCH_ARM64
129
baselibs.conf Changed
4
 
1
@@ -1,1 +1,1 @@
2
-libx265-179
3
+libx265-192
4
x265_3.3.tar.gz/.hg_archival.txt -> x265_3.4.tar.gz/.hg_archival.txt Changed
10
 
1
@@ -1,5 +1,4 @@
2
 repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf
3
-node: f94b0d32737d40b2b9a9d74df57fee45e6be5cb0
4
-branch: Release_3.3
5
-latesttag: 3.3
6
-latesttagdistance: 1
7
+node: 2a65b720985096bcb1664f7cb05c3d04aeb576f5
8
+branch: Release_3.4
9
+tag: 3.4
10
x265_3.3.tar.gz/.hgtags -> x265_3.4.tar.gz/.hgtags Changed
6
 
1
@@ -40,3 +40,4 @@
2
 5ee3593ebd82b4d8957909bbc1b68b99b59ba773 3.3_RC1
3
 96a10df63c0b778b480330bdf3be8da7db8a5fb1 3.3_RC2
4
 057215961bc4b51b6260a584ff3d506e6d65cfd6 3.3
5
+ee92f36782800f145970131e01c79955a3ed5c10 3.4_RC1
6
x265_3.4.tar.gz/build/aarch64-linux/crosscompile.cmake Added
17
 
1
@@ -0,0 +1,15 @@
2
+# CMake toolchain file for cross compiling x265 for aarch64
3
+# This feature is only supported as experimental. Use with caution.
4
+# Please report bugs on bitbucket
5
+# Run cmake with: cmake -DCMAKE_TOOLCHAIN_FILE=crosscompile.cmake -G "Unix Makefiles" ../../source && ccmake ../../source
6
+
7
+set(CROSS_COMPILE_ARM 1)
8
+set(CMAKE_SYSTEM_NAME Linux)
9
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
10
+
11
+# specify the cross compiler
12
+set(CMAKE_C_COMPILER aarch64-linux-gnu-gcc)
13
+set(CMAKE_CXX_COMPILER aarch64-linux-gnu-g++)
14
+
15
+# specify the target environment
16
+SET(CMAKE_FIND_ROOT_PATH  /usr/aarch64-linux-gnu)
17
x265_3.4.tar.gz/build/aarch64-linux/make-Makefiles.bash Added
6
 
1
@@ -0,0 +1,4 @@
2
+#!/bin/bash
3
+# Run this from within a bash shell
4
+
5
+cmake -DCMAKE_TOOLCHAIN_FILE="crosscompile.cmake" -G "Unix Makefiles" ../../source && ccmake ../../source
6
x265_3.3.tar.gz/doc/reST/cli.rst -> x265_3.4.tar.gz/doc/reST/cli.rst Changed
79
 
1
@@ -107,6 +107,9 @@
2
    
3
    **BufferFillFinal** Buffer bits available after removing the frame out of CPB.
4
    
5
+   **UnclippedBufferFillFinal** Unclipped buffer bits available after removing the frame 
6
+   out of CPB only used for csv logging purpose.
7
+   
8
    **Latency** Latency in terms of number of frames between when the frame 
9
    was given in and when the frame is given out.
10
    
11
@@ -842,15 +845,31 @@
12
    Measure 2Nx2N merge candidates first; if no residual is found, 
13
    additional modes at that depth are not analysed. Default disabled
14
 
15
-.. option:: --rskip, --no-rskip
16
+.. option:: --rskip <0|1|2>
17
+
18
+   This option determines early exit from CU depth recursion in modes 1 and 2. When a skip CU is
19
+   found, additional heuristics (depending on the RD level and rskip mode) are used to decide whether
20
+   to terminate recursion. The following table summarizes the behavior.
21
+   
22
+   +----------+------------+----------------------------------------------------------------+
23
+   | RD Level | Rskip Mode |   Skip Recursion Heuristic                                     |
24
+   +==========+============+================================================================+
25
+   |   0 - 4  |      1     |   Neighbour costs and CU homogenity.                           |
26
+   +----------+------------+----------------------------------------------------------------+
27
+   |   5 - 6  |      1     |   Comparison with inter2Nx2N.                                  |
28
+   +----------+------------+----------------------------------------------------------------+
29
+   |   0 - 6  |      2     |   CU edge density.                                             |
30
+   +----------+------------+----------------------------------------------------------------+
31
+
32
+   Provides minimal quality degradation at good performance gains for non-zero modes.
33
+   :option:`--rskip mode 0` means disabled. Default: 1, disabled when :option:`--tune grain` is used.
34
+   This is a integer value representing the edge-density percentage within the CU. Internally normalized to a number between 0.0 to 1.0 in x265. 
35
+   Recommended low thresholds for slow encodes and high for fast encodes.
36
 
37
-   This option determines early exit from CU depth recursion. When a skip CU is
38
-   found, additional heuristics (depending on rd-level) are used to decide whether
39
-   to terminate recursion. In rdlevels 5 and 6, comparison with inter2Nx2N is used, 
40
-   while at rdlevels 4 and neighbour costs are used to skip recursion.
41
-   Provides minimal quality degradation at good performance gains when enabled. 
42
+.. option:: --rskip-edge-threshold <0..100>
43
 
44
-   Default: enabled, disabled for :option:`--tune grain`
45
+   Denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped.
46
+   Default: 5, requires :option:`--rskip mode 2` to be enabled.
47
 
48
 .. option:: --splitrd-skip, --no-splitrd-skip
49
 
50
@@ -2501,6 +2520,28 @@
51
    --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
52
 
53
    **CLI ONLY**
54
+   
55
+ABR-ladder Options
56
+==================
57
+
58
+.. option:: --abr-ladder <filename>
59
+
60
+   File containing the encoder configurations to generate ABR ladder.
61
+   The format of each line is:
62
+
63
+   **<encID:reuse-level:refID> <CLI>**
64
+   
65
+   where, encID indicates the unique name given to the encode, refID indicates
66
+   the name of the encode from which analysis info has to be re-used ( set to 'nil'
67
+   if analysis reuse isn't preferred ), and reuse-level indicates the level ( :option:`--analysis-load-reuse-level`)
68
+   at which analysis info has to be reused.
69
+   
70
+   A sample config file is available in `the downloads page <https://bitbucket.org/multicoreware/x265/downloads/Sample_ABR_ladder_config>`_
71
+   
72
+   Default: Disabled ( Conventional single encode generation ). Experimental feature.
73
+
74
+   **CLI ONLY**
75
+
76
 
77
 SVT-HEVC Encoder Options
78
 ========================
79
x265_3.3.tar.gz/doc/reST/releasenotes.rst -> x265_3.4.tar.gz/doc/reST/releasenotes.rst Changed
34
 
1
@@ -2,6 +2,32 @@
2
 Release Notes
3
 *************
4
 
5
+Version 3.4
6
+===========
7
+
8
+Release date - 29th May, 2020.
9
+
10
+New features
11
+------------
12
+1. **Edge-aware quadtree partitioning** to terminate CU depth recursion based on edge information. :option:`--rskip` level 2 enables the feature and  :option:`--rskip-edge-threshold` denotes the minimum expected edge-density percentage within the CU, below which the recursion is skipped. Experimental feature.
13
+2. Application-level feature :option:`--abr-ladder` for automating efficient ABR ladder generation. Shows ~65% savings in the over-all turn-around time required for the generation of a typical Apple HLS ladder in Intel(R) Xeon(R) Platinum 8280 CPU @ 2.70GHz over a sequential ABR-ladder generation approach that leverages save-load architecture.
14
+
15
+Enhancements to existing features
16
+---------------------------------
17
+1. Improved efficiency in 2-pass rate-control algorithm. The savings in the bitrate is ~1.72% with visual improvement in quality in the initial 1-2 secs.
18
+
19
+Encoder enhancements
20
+--------------------
21
+1. Faster ARM64 encodes enabled by ASM contributions from Huawei. The speed-up over no-asm version for 1080p encodes @ medium preset is ~15% in a 16 core H/W.
22
+2. Strict VBV conformance in zone encoding.
23
+
24
+Bug fixes
25
+---------
26
+1. Multi-pass encode failures with :option:`--frame-dup`.
27
+2. Corrupted bitstreams with :option:`--hist-scenecut` when input depth and internal bit-depth differ.
28
+3. Incorrect analysis propagation in multi-level save-load architecture.
29
+4. Failure in detecting NUMA packages installed in non-standard directories.
30
+
31
 Version 3.3
32
 ===========
33
 
34
x265_3.3.tar.gz/source/CMakeLists.txt -> x265_3.4.tar.gz/source/CMakeLists.txt Changed
109
 
1
@@ -29,7 +29,7 @@
2
 option(STATIC_LINK_CRT "Statically link C runtime for release builds" OFF)
3
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
4
 # X265_BUILD must be incremented each time the public API is changed
5
-set(X265_BUILD 188)
6
+set(X265_BUILD 192)
7
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
8
                "${PROJECT_BINARY_DIR}/x265.def")
9
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
10
@@ -40,7 +40,7 @@
11
 # System architecture detection
12
 string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" SYSPROC)
13
 set(X86_ALIASES x86 i386 i686 x86_64 amd64)
14
-set(ARM_ALIASES armv6l armv7l)
15
+set(ARM_ALIASES armv6l armv7l aarch64)
16
 list(FIND X86_ALIASES "${SYSPROC}" X86MATCH)
17
 list(FIND ARM_ALIASES "${SYSPROC}" ARMMATCH)
18
 set(POWER_ALIASES ppc64 ppc64le)
19
@@ -70,9 +70,15 @@
20
     else()
21
         set(CROSS_COMPILE_ARM 0)
22
     endif()
23
-    message(STATUS "Detected ARM target processor")
24
     set(ARM 1)
25
-    add_definitions(-DX265_ARCH_ARM=1 -DHAVE_ARMV6=1)
26
+    if("${CMAKE_SIZEOF_VOID_P}" MATCHES 8)
27
+        message(STATUS "Detected ARM64 target processor")
28
+        set(ARM64 1)
29
+        add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=1 -DHAVE_ARMV6=0)
30
+    else()
31
+        message(STATUS "Detected ARM target processor")
32
+        add_definitions(-DX265_ARCH_ARM=1 -DX265_ARCH_ARM64=0 -DHAVE_ARMV6=1)
33
+    endif()
34
 else()
35
     message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
36
     message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
37
@@ -95,6 +101,8 @@
38
         if(NUMA_FOUND)
39
             link_directories(${NUMA_LIBRARY_DIR})
40
             list(APPEND CMAKE_REQUIRED_LIBRARIES numa)
41
+            list(APPEND CMAKE_REQUIRED_INCLUDES ${NUMA_INCLUDE_DIR})
42
+            list(APPEND CMAKE_REQUIRED_LINK_OPTIONS "-L${NUMA_LIBRARY_DIR}")
43
             check_symbol_exists(numa_node_of_cpu numa.h NUMA_V2)
44
             if(NUMA_V2)
45
                 add_definitions(-DHAVE_LIBNUMA)
46
@@ -231,14 +239,24 @@
47
         endif()
48
     endif()
49
     if(ARM AND CROSS_COMPILE_ARM)
50
-        set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
51
+        if(ARM64)
52
+            set(ARM_ARGS -fPIC)
53
+        else()
54
+            set(ARM_ARGS -march=armv6 -mfloat-abi=soft -mfpu=vfp -marm -fPIC)
55
+        endif()
56
+        message(STATUS "cross compile arm")
57
     elseif(ARM)
58
-        find_package(Neon)
59
-        if(CPU_HAS_NEON)
60
-            set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
61
+        if(ARM64)
62
+            set(ARM_ARGS -fPIC)
63
             add_definitions(-DHAVE_NEON)
64
         else()
65
-            set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
66
+            find_package(Neon)
67
+            if(CPU_HAS_NEON)
68
+                set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=neon -marm -fPIC)
69
+                add_definitions(-DHAVE_NEON)
70
+            else()
71
+                set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
72
+            endif()
73
         endif()
74
     endif()
75
     add_definitions(${ARM_ARGS})
76
@@ -518,7 +536,11 @@
77
     # compile ARM arch asm files here
78
         enable_language(ASM)
79
         foreach(ASM ${ARM_ASMS})
80
-            set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})
81
+            if(ARM64)
82
+                set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/aarch64/${ASM})
83
+            else()
84
+                set(ASM_SRC ${CMAKE_CURRENT_SOURCE_DIR}/common/arm/${ASM})
85
+            endif()
86
             list(APPEND ASM_SRCS ${ASM_SRC})
87
             list(APPEND ASM_OBJS ${ASM}.${SUFFIX})
88
             add_custom_command(
89
@@ -725,16 +747,16 @@
90
         # Xcode seems unable to link the CLI with libs, so link as one targget
91
         if(ENABLE_HDR10_PLUS)
92
         add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
93
-                        x265.cpp x265.h x265cli.h
94
+                        x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h
95
                         $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> $<TARGET_OBJECTS:dynamicHDR10> ${ASM_OBJS})
96
         else()
97
             add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT}
98
-                        x265.cpp x265.h x265cli.h
99
+                        x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h
100
                         $<TARGET_OBJECTS:encoder> $<TARGET_OBJECTS:common> ${ASM_OBJS})
101
         endif()
102
     else()
103
         add_executable(cli ../COPYING ${InputFiles} ${OutputFiles} ${GETOPT} ${X265_RC_FILE}
104
-                       ${ExportDefs} x265.cpp x265.h x265cli.h)
105
+                       ${ExportDefs} x265.cpp x265.h x265cli.cpp x265cli.h abrEncApp.cpp abrEncApp.h)
106
         if(WIN32 OR NOT ENABLE_SHARED OR INTEL_CXX)
107
             # The CLI cannot link to the shared library on Windows, it
108
             # requires internal APIs not exported from the DLL
109
x265_3.4.tar.gz/source/abrEncApp.cpp Added
201
 
1
@@ -0,0 +1,1108 @@
2
+/*****************************************************************************
3
+* Copyright (C) 2013-2020 MulticoreWare, Inc
4
+*
5
+* Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+*          Aruna Matheswaran <aruna@multicorewareinc.com>
7
+*
8
+* This program is free software; you can redistribute it and/or modify
9
+* it under the terms of the GNU General Public License as published by
10
+* the Free Software Foundation; either version 2 of the License, or
11
+* (at your option) any later version.
12
+*
13
+* This program is distributed in the hope that it will be useful,
14
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+* GNU General Public License for more details.
17
+*
18
+* You should have received a copy of the GNU General Public License
19
+* along with this program; if not, write to the Free Software
20
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21
+*
22
+* This program is also available under a commercial proprietary license.
23
+* For more information, contact us at license @ x265.com.
24
+*****************************************************************************/
25
+
26
+#include "abrEncApp.h"
27
+#include "mv.h"
28
+#include "slice.h"
29
+#include "param.h"
30
+
31
+#include <signal.h>
32
+#include <errno.h>
33
+
34
+#include <queue>
35
+
36
+using namespace X265_NS;
37
+
38
+/* Ctrl-C handler */
39
+static volatile sig_atomic_t b_ctrl_c /* = 0 */;
40
+static void sigint_handler(int)
41
+{
42
+    b_ctrl_c = 1;
43
+}
44
+
45
+namespace X265_NS {
46
+    // private namespace
47
+#define X265_INPUT_QUEUE_SIZE 250
48
+
49
+    AbrEncoder::AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int &ret)
50
+    {
51
+        m_numEncodes = numEncodes;
52
+        m_numActiveEncodes.set(numEncodes);
53
+        m_queueSize = (numEncodes > 1) ? X265_INPUT_QUEUE_SIZE : 1;
54
+        m_passEnc = X265_MALLOC(PassEncoder*, m_numEncodes);
55
+
56
+        for (uint8_t i = 0; i < m_numEncodes; i++)
57
+        {
58
+            m_passEnc[i] = new PassEncoder(i, cliopt[i], this);
59
+            if (!m_passEnc[i])
60
+            {
61
+                x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for passEncoder\n");
62
+                ret = 4;
63
+            }
64
+            m_passEnc[i]->init(ret);
65
+        }
66
+
67
+        if (!allocBuffers())
68
+        {
69
+            x265_log(NULL, X265_LOG_ERROR, "Unable to allocate memory for buffers\n");
70
+            ret = 4;
71
+        }
72
+
73
+        /* start passEncoder worker threads */
74
+        for (uint8_t pass = 0; pass < m_numEncodes; pass++)
75
+            m_passEnc[pass]->startThreads();
76
+    }
77
+
78
+    bool AbrEncoder::allocBuffers()
79
+    {
80
+        m_inputPicBuffer = X265_MALLOC(x265_picture**, m_numEncodes);
81
+        m_analysisBuffer = X265_MALLOC(x265_analysis_data*, m_numEncodes);
82
+
83
+        m_picWriteCnt = new ThreadSafeInteger[m_numEncodes];
84
+        m_picReadCnt = new ThreadSafeInteger[m_numEncodes];
85
+        m_analysisWriteCnt = new ThreadSafeInteger[m_numEncodes];
86
+        m_analysisReadCnt = new ThreadSafeInteger[m_numEncodes];
87
+
88
+        m_picIdxReadCnt = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
89
+        m_analysisWrite = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
90
+        m_analysisRead = X265_MALLOC(ThreadSafeInteger*, m_numEncodes);
91
+        m_readFlag = X265_MALLOC(int*, m_numEncodes);
92
+
93
+        for (uint8_t pass = 0; pass < m_numEncodes; pass++)
94
+        {
95
+            m_inputPicBuffer[pass] = X265_MALLOC(x265_picture*, m_queueSize);
96
+            for (uint32_t idx = 0; idx < m_queueSize; idx++)
97
+            {
98
+                m_inputPicBuffer[pass][idx] = x265_picture_alloc();
99
+                x265_picture_init(m_passEnc[pass]->m_param, m_inputPicBuffer[pass][idx]);
100
+            }
101
+
102
+            m_analysisBuffer[pass] = X265_MALLOC(x265_analysis_data, m_queueSize);
103
+            m_picIdxReadCnt[pass] = new ThreadSafeInteger[m_queueSize];
104
+            m_analysisWrite[pass] = new ThreadSafeInteger[m_queueSize];
105
+            m_analysisRead[pass] = new ThreadSafeInteger[m_queueSize];
106
+            m_readFlag[pass] = X265_MALLOC(int, m_queueSize);
107
+        }
108
+        return true;
109
+    }
110
+
111
+    void AbrEncoder::destroy()
112
+    {
113
+        x265_cleanup(); /* Free library singletons */
114
+        for (uint8_t pass = 0; pass < m_numEncodes; pass++)
115
+        {
116
+            for (uint32_t index = 0; index < m_queueSize; index++)
117
+            {
118
+                X265_FREE(m_inputPicBuffer[pass][index]->planes[0]);
119
+                x265_picture_free(m_inputPicBuffer[pass][index]);
120
+            }
121
+
122
+            X265_FREE(m_inputPicBuffer[pass]);
123
+            X265_FREE(m_analysisBuffer[pass]);
124
+            X265_FREE(m_readFlag[pass]);
125
+            delete[] m_picIdxReadCnt[pass];
126
+            delete[] m_analysisWrite[pass];
127
+            delete[] m_analysisRead[pass];
128
+            m_passEnc[pass]->destroy();
129
+            delete m_passEnc[pass];
130
+        }
131
+        X265_FREE(m_inputPicBuffer);
132
+        X265_FREE(m_analysisBuffer);
133
+        X265_FREE(m_readFlag);
134
+
135
+        delete[] m_picWriteCnt;
136
+        delete[] m_picReadCnt;
137
+        delete[] m_analysisWriteCnt;
138
+        delete[] m_analysisReadCnt;
139
+
140
+        X265_FREE(m_picIdxReadCnt);
141
+        X265_FREE(m_analysisWrite);
142
+        X265_FREE(m_analysisRead);
143
+
144
+        X265_FREE(m_passEnc);
145
+    }
146
+
147
+    PassEncoder::PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent)
148
+    {
149
+        m_id = id;
150
+        m_cliopt = cliopt;
151
+        m_parent = parent;
152
+        if(!(m_cliopt.enableScaler && m_id))
153
+            m_input = m_cliopt.input;
154
+        m_param = cliopt.param;
155
+        m_inputOver = false;
156
+        m_lastIdx = -1;
157
+        m_encoder = NULL;
158
+        m_scaler = NULL;
159
+        m_reader = NULL;
160
+        m_ret = 0;
161
+    }
162
+
163
+    int PassEncoder::init(int &result)
164
+    {
165
+        if (m_parent->m_numEncodes > 1)
166
+            setReuseLevel();
167
+                
168
+        if (!(m_cliopt.enableScaler && m_id))
169
+            m_reader = new Reader(m_id, this);
170
+        else
171
+        {
172
+            VideoDesc *src = NULL, *dst = NULL;
173
+            dst = new VideoDesc(m_param->sourceWidth, m_param->sourceHeight, m_param->internalCsp, m_param->internalBitDepth);
174
+            int dstW = m_parent->m_passEnc[m_id - 1]->m_param->sourceWidth;
175
+            int dstH = m_parent->m_passEnc[m_id - 1]->m_param->sourceHeight;
176
+            src = new VideoDesc(dstW, dstH, m_param->internalCsp, m_param->internalBitDepth);
177
+            if (src != NULL && dst != NULL)
178
+            {
179
+                m_scaler = new Scaler(0, 1, m_id, src, dst, this);
180
+                if (!m_scaler)
181
+                {
182
+                    x265_log(m_param, X265_LOG_ERROR, "\n MALLOC failure in Scaler");
183
+                    result = 4;
184
+                }
185
+            }
186
+        }
187
+
188
+        /* note: we could try to acquire a different libx265 API here based on
189
+        * the profile found during option parsing, but it must be done before
190
+        * opening an encoder */
191
+
192
+        if (m_param)
193
+            m_encoder = m_cliopt.api->encoder_open(m_param);
194
+        if (!m_encoder)
195
+        {
196
+            x265_log(NULL, X265_LOG_ERROR, "x265_encoder_open() failed for Enc, \n");
197
+            m_ret = 2;
198
+            return -1;
199
+        }
200
+
201
x265_3.4.tar.gz/source/abrEncApp.h Added
155
 
1
@@ -0,0 +1,153 @@
2
+/*****************************************************************************
3
+* Copyright (C) 2013-2020 MulticoreWare, Inc
4
+*
5
+* Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+*          Aruna Matheswaran <aruna@multicorewareinc.com>
7
+*           
8
+*
9
+* This program is free software; you can redistribute it and/or modify
10
+* it under the terms of the GNU General Public License as published by
11
+* the Free Software Foundation; either version 2 of the License, or
12
+* (at your option) any later version.
13
+*
14
+* This program is distributed in the hope that it will be useful,
15
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
+* GNU General Public License for more details.
18
+*
19
+* You should have received a copy of the GNU General Public License
20
+* along with this program; if not, write to the Free Software
21
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
22
+*
23
+* This program is also available under a commercial proprietary license.
24
+* For more information, contact us at license @ x265.com.
25
+*****************************************************************************/
26
+
27
+#ifndef ABR_ENCODE_H
28
+#define ABR_ENCODE_H
29
+
30
+#include "x265.h"
31
+#include "scaler.h"
32
+#include "threading.h"
33
+#include "x265cli.h"
34
+
35
+namespace X265_NS {
36
+    // private namespace
37
+
38
+    class PassEncoder;
39
+    class Scaler;
40
+    class Reader;
41
+
42
+    class AbrEncoder
43
+    {
44
+    public:
45
+        uint8_t           m_numEncodes;
46
+        PassEncoder        **m_passEnc;
47
+        uint32_t           m_queueSize;
48
+        ThreadSafeInteger  m_numActiveEncodes;
49
+
50
+        x265_picture       ***m_inputPicBuffer; //[numEncodes][queueSize]
51
+        x265_analysis_data **m_analysisBuffer; //[numEncodes][queueSize]
52
+        int                **m_readFlag;
53
+
54
+        ThreadSafeInteger  *m_picWriteCnt;
55
+        ThreadSafeInteger  *m_picReadCnt;
56
+        ThreadSafeInteger  **m_picIdxReadCnt;
57
+        ThreadSafeInteger  *m_analysisWriteCnt; //[numEncodes][queueSize]
58
+        ThreadSafeInteger  *m_analysisReadCnt; //[numEncodes][queueSize]
59
+        ThreadSafeInteger  **m_analysisWrite; //[numEncodes][queueSize]
60
+        ThreadSafeInteger  **m_analysisRead; //[numEncodes][queueSize]
61
+
62
+        AbrEncoder(CLIOptions cliopt[], uint8_t numEncodes, int& ret);
63
+        bool allocBuffers();
64
+        void destroy();
65
+
66
+    };
67
+
68
+    class PassEncoder : public Thread
69
+    {
70
+    public:
71
+
72
+        uint32_t m_id;
73
+        x265_param *m_param;
74
+        AbrEncoder *m_parent;
75
+        x265_encoder *m_encoder;
76
+        Reader *m_reader;
77
+        Scaler *m_scaler;
78
+        bool m_inputOver;
79
+
80
+        int m_threadActive;
81
+        int m_lastIdx;
82
+        uint32_t m_outputNalsCount;
83
+
84
+        x265_picture **m_inputPicBuffer;
85
+        x265_analysis_data **m_analysisBuffer;
86
+        x265_nal **m_outputNals;
87
+        x265_picture **m_outputRecon;
88
+
89
+        CLIOptions m_cliopt;
90
+        InputFile* m_input;
91
+        const char* m_reconPlayCmd;
92
+        FILE*    m_qpfile;
93
+        FILE*    m_zoneFile;
94
+        FILE*    m_dolbyVisionRpu;/* File containing Dolby Vision BL RPU metadata */
95
+
96
+        int m_ret;
97
+
98
+        PassEncoder(uint32_t id, CLIOptions cliopt, AbrEncoder *parent);
99
+        int init(int &result);
100
+        void setReuseLevel();
101
+
102
+        void startThreads();
103
+        void copyInfo(x265_analysis_data *src);
104
+
105
+        bool readPicture(x265_picture*);
106
+        void destroy();
107
+
108
+    private:
109
+        void threadMain();
110
+    };
111
+
112
+    class Scaler : public Thread
113
+    {
114
+    public:
115
+        PassEncoder *m_parentEnc;
116
+        int m_id;
117
+        int m_scalePlanes[3];
118
+        int m_scaleFrameSize;
119
+        uint32_t m_threadId;
120
+        uint32_t m_threadTotal;
121
+        ThreadSafeInteger m_scaledWriteCnt;
122
+        VideoDesc* m_srcFormat;
123
+        VideoDesc* m_dstFormat;
124
+        int m_threadActive;
125
+        ScalerFilterManager* m_filterManager;
126
+
127
+        Scaler(int threadId, int threadNum, int id, VideoDesc *src, VideoDesc * dst, PassEncoder *parentEnc);
128
+        bool scalePic(x265_picture *destination, x265_picture *source);
129
+        void threadMain();
130
+        void destroy()
131
+        {
132
+            if (m_filterManager)
133
+            {
134
+                delete m_filterManager;
135
+                m_filterManager = NULL;
136
+            }
137
+        }
138
+    };
139
+
140
+    class Reader : public Thread
141
+    {
142
+    public:
143
+        PassEncoder *m_parentEnc;
144
+        int m_id;
145
+        InputFile* m_input;
146
+        int m_threadActive;
147
+
148
+        Reader(int id, PassEncoder *parentEnc);
149
+        void threadMain();
150
+    };
151
+}
152
+
153
+#endif // ifndef ABR_ENCODE_H
154
+#pragma once
155
x265_3.3.tar.gz/source/common/CMakeLists.txt -> x265_3.4.tar.gz/source/common/CMakeLists.txt Changed
59
 
1
@@ -14,7 +14,7 @@
2
 endif(EXTRA_LIB)
3
 
4
 if(ENABLE_ASSEMBLY)
5
-    set_source_files_properties(threading.cpp primitives.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1)
6
+    set_source_files_properties(threading.cpp primitives.cpp pixel.cpp PROPERTIES COMPILE_FLAGS -DENABLE_ASSEMBLY=1)
7
     list(APPEND VFLAGS "-DENABLE_ASSEMBLY=1")
8
 endif(ENABLE_ASSEMBLY)
9
 
10
@@ -84,16 +84,33 @@
11
 endif(ENABLE_ASSEMBLY AND X86)
12
 
13
 if(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM))
14
-    set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
15
+    if(ARM64)
16
+        if(GCC AND (CMAKE_CXX_FLAGS_RELEASE MATCHES "-O3"))
17
+            message(STATUS "Detected CXX compiler using -O3 optimization level")
18
+            add_definitions(-DAUTO_VECTORIZE=1)
19
+        endif()
20
+        set(C_SRCS asm-primitives.cpp pixel.h ipfilter8.h)
21
 
22
-    # add ARM assembly/intrinsic files here
23
-    set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S)
24
-    set(VEC_PRIMITIVES)
25
+        # add ARM assembly/intrinsic files here
26
+        set(A_SRCS asm.S mc-a.S sad-a.S pixel-util.S ipfilter8.S)
27
+        set(VEC_PRIMITIVES)
28
 
29
-    set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
30
-    foreach(SRC ${C_SRCS})
31
-        set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC})
32
-    endforeach()
33
+        set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
34
+        foreach(SRC ${C_SRCS})
35
+            set(ASM_PRIMITIVES ${ASM_PRIMITIVES} aarch64/${SRC})
36
+        endforeach()
37
+    else()
38
+        set(C_SRCS asm-primitives.cpp pixel.h mc.h ipfilter8.h blockcopy8.h dct8.h loopfilter.h)
39
+
40
+        # add ARM assembly/intrinsic files here
41
+        set(A_SRCS asm.S cpu-a.S mc-a.S sad-a.S pixel-util.S ssd-a.S blockcopy8.S ipfilter8.S dct-a.S)
42
+        set(VEC_PRIMITIVES)
43
+
44
+        set(ARM_ASMS "${A_SRCS}" CACHE INTERNAL "ARM Assembly Sources")
45
+        foreach(SRC ${C_SRCS})
46
+            set(ASM_PRIMITIVES ${ASM_PRIMITIVES} arm/${SRC})
47
+        endforeach()
48
+    endif()
49
     source_group(Assembly FILES ${ASM_PRIMITIVES})
50
 endif(ENABLE_ASSEMBLY AND (ARM OR CROSS_COMPILE_ARM))
51
 
52
@@ -151,4 +168,5 @@
53
     predict.cpp  predict.h
54
     scalinglist.cpp scalinglist.h
55
     quant.cpp quant.h contexts.h
56
-    deblock.cpp deblock.h)
57
+    deblock.cpp deblock.h
58
+    scaler.cpp scaler.h)
59
x265_3.4.tar.gz/source/common/aarch64/asm-primitives.cpp Added
201
 
1
@@ -0,0 +1,219 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *          Yimeng Su <yimeng.su@huawei.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+
26
+#include "common.h"
27
+#include "primitives.h"
28
+#include "x265.h"
29
+#include "cpu.h"
30
+
31
+
32
+#if defined(__GNUC__)
33
+#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
34
+#endif
35
+
36
+#define GCC_4_9_0 40900
37
+#define GCC_5_1_0 50100
38
+
39
+extern "C" {
40
+#include "pixel.h"
41
+#include "pixel-util.h"
42
+#include "ipfilter8.h"
43
+}
44
+
45
+namespace X265_NS {
46
+// private x265 namespace
47
+
48
+
49
+template<int size>
50
+void interp_8tap_hv_pp_cpu(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int idxX, int idxY)
51
+{
52
+    ALIGN_VAR_32(int16_t, immed[MAX_CU_SIZE * (MAX_CU_SIZE + NTAPS_LUMA - 1)]);
53
+    const int halfFilterSize = NTAPS_LUMA >> 1;
54
+    const int immedStride = MAX_CU_SIZE;
55
+
56
+    primitives.pu[size].luma_hps(src, srcStride, immed, immedStride, idxX, 1);
57
+    primitives.pu[size].luma_vsp(immed + (halfFilterSize - 1) * immedStride, immedStride, dst, dstStride, idxY);
58
+}
59
+
60
+
61
+/* Temporary workaround because luma_vsp assembly primitive has not been completed
62
+ * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive.
63
+ * Otherwise, segment fault occurs. */
64
+void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask)
65
+{
66
+    if (cpuMask & X265_CPU_NEON)
67
+    {
68
+        asmp.pu[LUMA_8x4].luma_vsp   = cp.pu[LUMA_8x4].luma_vsp;
69
+        asmp.pu[LUMA_8x8].luma_vsp   = cp.pu[LUMA_8x8].luma_vsp;
70
+        asmp.pu[LUMA_8x16].luma_vsp  = cp.pu[LUMA_8x16].luma_vsp;
71
+        asmp.pu[LUMA_8x32].luma_vsp  = cp.pu[LUMA_8x32].luma_vsp;
72
+        asmp.pu[LUMA_12x16].luma_vsp = cp.pu[LUMA_12x16].luma_vsp;
73
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */
74
+        asmp.pu[LUMA_16x4].luma_vsp  = cp.pu[LUMA_16x4].luma_vsp;
75
+        asmp.pu[LUMA_16x8].luma_vsp  = cp.pu[LUMA_16x8].luma_vsp;
76
+        asmp.pu[LUMA_16x12].luma_vsp = cp.pu[LUMA_16x12].luma_vsp;
77
+        asmp.pu[LUMA_16x16].luma_vsp = cp.pu[LUMA_16x16].luma_vsp;
78
+        asmp.pu[LUMA_16x32].luma_vsp = cp.pu[LUMA_16x32].luma_vsp;
79
+        asmp.pu[LUMA_16x64].luma_vsp = cp.pu[LUMA_16x64].luma_vsp;
80
+        asmp.pu[LUMA_32x16].luma_vsp = cp.pu[LUMA_32x16].luma_vsp;
81
+        asmp.pu[LUMA_32x24].luma_vsp = cp.pu[LUMA_32x24].luma_vsp;
82
+        asmp.pu[LUMA_32x32].luma_vsp = cp.pu[LUMA_32x32].luma_vsp;
83
+        asmp.pu[LUMA_32x64].luma_vsp = cp.pu[LUMA_32x64].luma_vsp;
84
+        asmp.pu[LUMA_48x64].luma_vsp = cp.pu[LUMA_48x64].luma_vsp;
85
+        asmp.pu[LUMA_64x16].luma_vsp = cp.pu[LUMA_64x16].luma_vsp;
86
+        asmp.pu[LUMA_64x32].luma_vsp = cp.pu[LUMA_64x32].luma_vsp;
87
+        asmp.pu[LUMA_64x48].luma_vsp = cp.pu[LUMA_64x48].luma_vsp;
88
+        asmp.pu[LUMA_64x64].luma_vsp = cp.pu[LUMA_64x64].luma_vsp;    
89
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_4_9_0 /* gcc_version < gcc-4.9.0 */
90
+        asmp.pu[LUMA_4x4].luma_vsp   = cp.pu[LUMA_4x4].luma_vsp;
91
+        asmp.pu[LUMA_4x8].luma_vsp   = cp.pu[LUMA_4x8].luma_vsp;
92
+        asmp.pu[LUMA_4x16].luma_vsp  = cp.pu[LUMA_4x16].luma_vsp;
93
+        asmp.pu[LUMA_24x32].luma_vsp = cp.pu[LUMA_24x32].luma_vsp;
94
+        asmp.pu[LUMA_32x8].luma_vsp  = cp.pu[LUMA_32x8].luma_vsp;
95
+#endif
96
+#endif
97
+    }
98
+}
99
+
100
+
101
+void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask) 
102
+{
103
+    if (cpuMask & X265_CPU_NEON)
104
+    {
105
+        p.pu[LUMA_4x4].satd   = PFX(pixel_satd_4x4_neon);
106
+        p.pu[LUMA_4x8].satd   = PFX(pixel_satd_4x8_neon);
107
+        p.pu[LUMA_4x16].satd  = PFX(pixel_satd_4x16_neon);
108
+        p.pu[LUMA_8x4].satd   = PFX(pixel_satd_8x4_neon);
109
+        p.pu[LUMA_8x8].satd   = PFX(pixel_satd_8x8_neon);
110
+        p.pu[LUMA_12x16].satd = PFX(pixel_satd_12x16_neon);
111
+        
112
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].satd    = PFX(pixel_satd_4x4_neon);
113
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].satd    = PFX(pixel_satd_4x8_neon);
114
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].satd   = PFX(pixel_satd_4x16_neon);
115
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].satd    = PFX(pixel_satd_8x4_neon);
116
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].satd    = PFX(pixel_satd_8x8_neon);
117
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].satd  = PFX(pixel_satd_12x16_neon);
118
+        
119
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x4].satd    = PFX(pixel_satd_4x4_neon);
120
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].satd    = PFX(pixel_satd_4x8_neon);
121
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].satd   = PFX(pixel_satd_4x16_neon);
122
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].satd   = PFX(pixel_satd_4x32_neon);
123
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].satd    = PFX(pixel_satd_8x4_neon);
124
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].satd    = PFX(pixel_satd_8x8_neon);
125
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].satd  = PFX(pixel_satd_12x32_neon);
126
+
127
+        p.pu[LUMA_4x4].pixelavg_pp[NONALIGNED]   = PFX(pixel_avg_pp_4x4_neon);
128
+        p.pu[LUMA_4x8].pixelavg_pp[NONALIGNED]   = PFX(pixel_avg_pp_4x8_neon);
129
+        p.pu[LUMA_4x16].pixelavg_pp[NONALIGNED]  = PFX(pixel_avg_pp_4x16_neon);
130
+        p.pu[LUMA_8x4].pixelavg_pp[NONALIGNED]   = PFX(pixel_avg_pp_8x4_neon);
131
+        p.pu[LUMA_8x8].pixelavg_pp[NONALIGNED]   = PFX(pixel_avg_pp_8x8_neon);
132
+        p.pu[LUMA_8x16].pixelavg_pp[NONALIGNED]  = PFX(pixel_avg_pp_8x16_neon);
133
+        p.pu[LUMA_8x32].pixelavg_pp[NONALIGNED]  = PFX(pixel_avg_pp_8x32_neon);
134
+
135
+        p.pu[LUMA_4x4].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_4x4_neon);
136
+        p.pu[LUMA_4x8].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_4x8_neon);
137
+        p.pu[LUMA_4x16].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_4x16_neon);
138
+        p.pu[LUMA_8x4].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_8x4_neon);
139
+        p.pu[LUMA_8x8].pixelavg_pp[ALIGNED]   = PFX(pixel_avg_pp_8x8_neon);
140
+        p.pu[LUMA_8x16].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_8x16_neon);
141
+        p.pu[LUMA_8x32].pixelavg_pp[ALIGNED]  = PFX(pixel_avg_pp_8x32_neon);
142
+
143
+        p.pu[LUMA_8x4].sad_x3   = PFX(sad_x3_8x4_neon);
144
+        p.pu[LUMA_8x8].sad_x3   = PFX(sad_x3_8x8_neon);
145
+        p.pu[LUMA_8x16].sad_x3  = PFX(sad_x3_8x16_neon);
146
+        p.pu[LUMA_8x32].sad_x3  = PFX(sad_x3_8x32_neon);
147
+
148
+        p.pu[LUMA_8x4].sad_x4   = PFX(sad_x4_8x4_neon);
149
+        p.pu[LUMA_8x8].sad_x4   = PFX(sad_x4_8x8_neon);
150
+        p.pu[LUMA_8x16].sad_x4  = PFX(sad_x4_8x16_neon);
151
+        p.pu[LUMA_8x32].sad_x4  = PFX(sad_x4_8x32_neon);
152
+
153
+        // quant
154
+        p.quant = PFX(quant_neon);
155
+        // luma_hps
156
+        p.pu[LUMA_4x4].luma_hps   = PFX(interp_8tap_horiz_ps_4x4_neon);
157
+        p.pu[LUMA_4x8].luma_hps   = PFX(interp_8tap_horiz_ps_4x8_neon);
158
+        p.pu[LUMA_4x16].luma_hps  = PFX(interp_8tap_horiz_ps_4x16_neon);
159
+        p.pu[LUMA_8x4].luma_hps   = PFX(interp_8tap_horiz_ps_8x4_neon);
160
+        p.pu[LUMA_8x8].luma_hps   = PFX(interp_8tap_horiz_ps_8x8_neon);
161
+        p.pu[LUMA_8x16].luma_hps  = PFX(interp_8tap_horiz_ps_8x16_neon);
162
+        p.pu[LUMA_8x32].luma_hps  = PFX(interp_8tap_horiz_ps_8x32_neon);
163
+        p.pu[LUMA_12x16].luma_hps = PFX(interp_8tap_horiz_ps_12x16_neon);
164
+        p.pu[LUMA_24x32].luma_hps = PFX(interp_8tap_horiz_ps_24x32_neon);
165
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */
166
+        p.pu[LUMA_16x4].luma_hps  = PFX(interp_8tap_horiz_ps_16x4_neon);
167
+        p.pu[LUMA_16x8].luma_hps  = PFX(interp_8tap_horiz_ps_16x8_neon);
168
+        p.pu[LUMA_16x12].luma_hps = PFX(interp_8tap_horiz_ps_16x12_neon);
169
+        p.pu[LUMA_16x16].luma_hps = PFX(interp_8tap_horiz_ps_16x16_neon);
170
+        p.pu[LUMA_16x32].luma_hps = PFX(interp_8tap_horiz_ps_16x32_neon);
171
+        p.pu[LUMA_16x64].luma_hps = PFX(interp_8tap_horiz_ps_16x64_neon);
172
+        p.pu[LUMA_32x8].luma_hps  = PFX(interp_8tap_horiz_ps_32x8_neon);
173
+        p.pu[LUMA_32x16].luma_hps = PFX(interp_8tap_horiz_ps_32x16_neon);
174
+        p.pu[LUMA_32x24].luma_hps = PFX(interp_8tap_horiz_ps_32x24_neon);
175
+        p.pu[LUMA_32x32].luma_hps = PFX(interp_8tap_horiz_ps_32x32_neon);
176
+        p.pu[LUMA_32x64].luma_hps = PFX(interp_8tap_horiz_ps_32x64_neon);
177
+        p.pu[LUMA_48x64].luma_hps = PFX(interp_8tap_horiz_ps_48x64_neon);
178
+        p.pu[LUMA_64x16].luma_hps = PFX(interp_8tap_horiz_ps_64x16_neon);
179
+        p.pu[LUMA_64x32].luma_hps = PFX(interp_8tap_horiz_ps_64x32_neon);
180
+        p.pu[LUMA_64x48].luma_hps = PFX(interp_8tap_horiz_ps_64x48_neon);
181
+        p.pu[LUMA_64x64].luma_hps = PFX(interp_8tap_horiz_ps_64x64_neon);
182
+#endif
183
+
184
+        p.pu[LUMA_8x4].luma_hvpp   =  interp_8tap_hv_pp_cpu<LUMA_8x4>;
185
+        p.pu[LUMA_8x8].luma_hvpp   =  interp_8tap_hv_pp_cpu<LUMA_8x8>;
186
+        p.pu[LUMA_8x16].luma_hvpp  =  interp_8tap_hv_pp_cpu<LUMA_8x16>;
187
+        p.pu[LUMA_8x32].luma_hvpp  =  interp_8tap_hv_pp_cpu<LUMA_8x32>;
188
+        p.pu[LUMA_12x16].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_12x16>;
189
+#if !AUTO_VECTORIZE || GCC_VERSION < GCC_5_1_0 /* gcc_version < gcc-5.1.0 */
190
+        p.pu[LUMA_16x4].luma_hvpp  =  interp_8tap_hv_pp_cpu<LUMA_16x4>;
191
+        p.pu[LUMA_16x8].luma_hvpp  =  interp_8tap_hv_pp_cpu<LUMA_16x8>;
192
+        p.pu[LUMA_16x12].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_16x12>;
193
+        p.pu[LUMA_16x16].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_16x16>;
194
+        p.pu[LUMA_16x32].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_16x32>;
195
+        p.pu[LUMA_16x64].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_16x64>;
196
+        p.pu[LUMA_32x16].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_32x16>;
197
+        p.pu[LUMA_32x24].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_32x24>;
198
+        p.pu[LUMA_32x32].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_32x32>;
199
+        p.pu[LUMA_32x64].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_32x64>;
200
+        p.pu[LUMA_48x64].luma_hvpp =  interp_8tap_hv_pp_cpu<LUMA_48x64>;
201
x265_3.4.tar.gz/source/common/aarch64/asm.S Added
71
 
1
@@ -0,0 +1,69 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+.arch           armv8-a
26
+
27
+#ifdef PREFIX
28
+#define EXTERN_ASM _
29
+#else
30
+#define EXTERN_ASM
31
+#endif
32
+
33
+#ifdef __ELF__
34
+#define ELF
35
+#else
36
+#define ELF @
37
+#endif
38
+
39
+#define HAVE_AS_FUNC 1
40
+
41
+#if HAVE_AS_FUNC
42
+#define FUNC
43
+#else
44
+#define FUNC @
45
+#endif
46
+
47
+.macro function name, export=1
48
+    .macro endfunc
49
+ELF     .size   \name, . - \name
50
+FUNC    .endfunc
51
+        .purgem endfunc
52
+    .endm
53
+        .align  2
54
+.if \export == 1
55
+        .global EXTERN_ASM\name
56
+ELF     .hidden EXTERN_ASM\name
57
+ELF     .type   EXTERN_ASM\name, %function
58
+FUNC    .func   EXTERN_ASM\name
59
+EXTERN_ASM\name:
60
+.else
61
+ELF     .hidden \name
62
+ELF     .type   \name, %function
63
+FUNC    .func   \name
64
+\name:
65
+.endif
66
+.endm
67
+
68
+
69
+#define FENC_STRIDE 64
70
+#define FDEC_STRIDE 32
71
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.S Added
201
 
1
@@ -0,0 +1,414 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#include "asm.S"
26
+
27
+.section .rodata
28
+
29
+.align 4
30
+
31
+.text
32
+
33
+
34
+
35
+.macro qpel_filter_0_32b
36
+    movi            v24.8h, #64
37
+    uxtl            v19.8h, v5.8b
38
+    smull           v17.4s, v19.4h, v24.4h
39
+    smull2          v18.4s, v19.8h, v24.8h
40
+.endm
41
+
42
+.macro qpel_filter_1_32b
43
+    movi            v16.8h, #58
44
+    uxtl            v19.8h, v5.8b
45
+    smull           v17.4s, v19.4h, v16.4h
46
+    smull2          v18.4s, v19.8h, v16.8h
47
+
48
+    movi            v24.8h, #10
49
+    uxtl            v21.8h, v1.8b
50
+    smull           v19.4s, v21.4h, v24.4h
51
+    smull2          v20.4s, v21.8h, v24.8h
52
+
53
+    movi            v16.8h, #17
54
+    uxtl            v23.8h, v2.8b
55
+    smull           v21.4s, v23.4h, v16.4h
56
+    smull2          v22.4s, v23.8h, v16.8h
57
+
58
+    movi            v24.8h, #5
59
+    uxtl            v1.8h, v6.8b
60
+    smull           v23.4s, v1.4h, v24.4h
61
+    smull2          v16.4s, v1.8h, v24.8h
62
+
63
+    sub             v17.4s, v17.4s, v19.4s
64
+    sub             v18.4s, v18.4s, v20.4s
65
+
66
+    uxtl            v1.8h, v4.8b
67
+    sshll           v19.4s, v1.4h, #2
68
+    sshll2          v20.4s, v1.8h, #2
69
+
70
+    add             v17.4s, v17.4s, v21.4s
71
+    add             v18.4s, v18.4s, v22.4s
72
+
73
+    uxtl            v1.8h, v0.8b
74
+    uxtl            v2.8h, v3.8b
75
+    ssubl           v21.4s, v2.4h, v1.4h
76
+    ssubl2          v22.4s, v2.8h, v1.8h
77
+
78
+    add             v17.4s, v17.4s, v19.4s
79
+    add             v18.4s, v18.4s, v20.4s
80
+    sub             v21.4s, v21.4s, v23.4s
81
+    sub             v22.4s, v22.4s, v16.4s
82
+    add             v17.4s, v17.4s, v21.4s
83
+    add             v18.4s, v18.4s, v22.4s
84
+.endm
85
+
86
+.macro qpel_filter_2_32b
87
+    movi            v16.4s, #11
88
+    uxtl            v19.8h, v5.8b
89
+    uxtl            v20.8h, v2.8b
90
+    saddl           v17.4s, v19.4h, v20.4h
91
+    saddl2          v18.4s, v19.8h, v20.8h
92
+
93
+    uxtl            v21.8h, v1.8b
94
+    uxtl            v22.8h, v6.8b
95
+    saddl           v19.4s, v21.4h, v22.4h
96
+    saddl2          v20.4s, v21.8h, v22.8h
97
+
98
+    mul             v19.4s, v19.4s, v16.4s
99
+    mul             v20.4s, v20.4s, v16.4s
100
+
101
+    movi            v16.4s, #40
102
+    mul             v17.4s, v17.4s, v16.4s
103
+    mul             v18.4s, v18.4s, v16.4s
104
+
105
+    uxtl            v21.8h, v4.8b
106
+    uxtl            v22.8h, v3.8b
107
+    saddl           v23.4s, v21.4h, v22.4h
108
+    saddl2          v16.4s, v21.8h, v22.8h
109
+
110
+    uxtl            v1.8h, v0.8b
111
+    uxtl            v2.8h, v7.8b
112
+    saddl           v21.4s, v1.4h, v2.4h
113
+    saddl2          v22.4s, v1.8h, v2.8h
114
+
115
+    shl             v23.4s, v23.4s, #2
116
+    shl             v16.4s, v16.4s, #2
117
+
118
+    add             v19.4s, v19.4s, v21.4s
119
+    add             v20.4s, v20.4s, v22.4s
120
+    add             v17.4s, v17.4s, v23.4s
121
+    add             v18.4s, v18.4s, v16.4s
122
+    sub             v17.4s, v17.4s, v19.4s
123
+    sub             v18.4s, v18.4s, v20.4s
124
+.endm
125
+
126
+.macro qpel_filter_3_32b
127
+    movi            v16.8h, #17
128
+    movi            v24.8h, #5
129
+
130
+    uxtl            v19.8h, v5.8b
131
+    smull           v17.4s, v19.4h, v16.4h
132
+    smull2          v18.4s, v19.8h, v16.8h
133
+
134
+    uxtl            v21.8h, v1.8b
135
+    smull           v19.4s, v21.4h, v24.4h
136
+    smull2          v20.4s, v21.8h, v24.8h
137
+
138
+    movi            v16.8h, #58
139
+    uxtl            v23.8h, v2.8b
140
+    smull           v21.4s, v23.4h, v16.4h
141
+    smull2          v22.4s, v23.8h, v16.8h
142
+
143
+    movi            v24.8h, #10
144
+    uxtl            v1.8h, v6.8b
145
+    smull           v23.4s, v1.4h, v24.4h
146
+    smull2          v16.4s, v1.8h, v24.8h
147
+
148
+    sub             v17.4s, v17.4s, v19.4s
149
+    sub             v18.4s, v18.4s, v20.4s
150
+
151
+    uxtl            v1.8h, v3.8b
152
+    sshll           v19.4s, v1.4h, #2
153
+    sshll2          v20.4s, v1.8h, #2
154
+
155
+    add             v17.4s, v17.4s, v21.4s
156
+    add             v18.4s, v18.4s, v22.4s
157
+
158
+    uxtl            v1.8h, v4.8b
159
+    uxtl            v2.8h, v7.8b
160
+    ssubl           v21.4s, v1.4h, v2.4h
161
+    ssubl2          v22.4s, v1.8h, v2.8h
162
+
163
+    add             v17.4s, v17.4s, v19.4s
164
+    add             v18.4s, v18.4s, v20.4s
165
+    sub             v21.4s, v21.4s, v23.4s
166
+    sub             v22.4s, v22.4s, v16.4s
167
+    add             v17.4s, v17.4s, v21.4s
168
+    add             v18.4s, v18.4s, v22.4s
169
+.endm
170
+
171
+
172
+
173
+
174
+.macro vextin8
175
+    ld1             {v3.16b}, [x11], #16
176
+    mov             v7.d[0], v3.d[1]
177
+    ext             v0.8b, v3.8b, v7.8b, #1
178
+    ext             v4.8b, v3.8b, v7.8b, #2
179
+    ext             v1.8b, v3.8b, v7.8b, #3
180
+    ext             v5.8b, v3.8b, v7.8b, #4
181
+    ext             v2.8b, v3.8b, v7.8b, #5
182
+    ext             v6.8b, v3.8b, v7.8b, #6
183
+    ext             v3.8b, v3.8b, v7.8b, #7
184
+.endm
185
+
186
+
187
+
188
+// void interp_horiz_ps_c(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt)
189
+.macro HPS_FILTER a b filterhps
190
+    mov             w12, #8192
191
+    mov             w6, w10
192
+    sub             x3, x3, #\a
193
+    lsl             x3, x3, #1
194
+    mov             w9, #\a
195
+    cmp             w9, #4
196
+    b.eq            14f
197
+    cmp             w9, #12
198
+    b.eq            15f
199
+    b               7f
200
+14:
201
x265_3.4.tar.gz/source/common/aarch64/ipfilter8.h Added
57
 
1
@@ -0,0 +1,55 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#ifndef X265_IPFILTER8_AARCH64_H
26
+#define X265_IPFILTER8_AARCH64_H
27
+
28
+
29
+void x265_interp_8tap_horiz_ps_4x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
30
+void x265_interp_8tap_horiz_ps_4x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
31
+void x265_interp_8tap_horiz_ps_4x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
32
+void x265_interp_8tap_horiz_ps_8x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
33
+void x265_interp_8tap_horiz_ps_8x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
34
+void x265_interp_8tap_horiz_ps_8x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
35
+void x265_interp_8tap_horiz_ps_8x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
36
+void x265_interp_8tap_horiz_ps_12x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
37
+void x265_interp_8tap_horiz_ps_16x4_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
38
+void x265_interp_8tap_horiz_ps_16x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
39
+void x265_interp_8tap_horiz_ps_16x12_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
40
+void x265_interp_8tap_horiz_ps_16x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
41
+void x265_interp_8tap_horiz_ps_16x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
42
+void x265_interp_8tap_horiz_ps_16x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
43
+void x265_interp_8tap_horiz_ps_24x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
44
+void x265_interp_8tap_horiz_ps_32x8_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
45
+void x265_interp_8tap_horiz_ps_32x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
46
+void x265_interp_8tap_horiz_ps_32x24_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
47
+void x265_interp_8tap_horiz_ps_32x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
48
+void x265_interp_8tap_horiz_ps_32x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
49
+void x265_interp_8tap_horiz_ps_48x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
50
+void x265_interp_8tap_horiz_ps_64x16_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
51
+void x265_interp_8tap_horiz_ps_64x32_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
52
+void x265_interp_8tap_horiz_ps_64x48_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
53
+void x265_interp_8tap_horiz_ps_64x64_neon(const pixel* src, intptr_t srcStride, int16_t* dst, intptr_t dstStride, int coeffIdx, int isRowExt);
54
+
55
+
56
+#endif // ifndef X265_IPFILTER8_AARCH64_H
57
x265_3.4.tar.gz/source/common/aarch64/mc-a.S Added
65
 
1
@@ -0,0 +1,63 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#include "asm.S"
26
+
27
+.section .rodata
28
+
29
+.align 4
30
+
31
+.text
32
+
33
+.macro pixel_avg_pp_4xN_neon h
34
+function x265_pixel_avg_pp_4x\h\()_neon
35
+.rept \h
36
+    ld1             {v0.s}[0], [x2], x3
37
+    ld1             {v1.s}[0], [x4], x5
38
+    urhadd          v2.8b, v0.8b, v1.8b
39
+    st1             {v2.s}[0], [x0], x1
40
+.endr
41
+    ret
42
+endfunc
43
+.endm
44
+
45
+pixel_avg_pp_4xN_neon 4
46
+pixel_avg_pp_4xN_neon 8
47
+pixel_avg_pp_4xN_neon 16
48
+
49
+.macro pixel_avg_pp_8xN_neon h
50
+function x265_pixel_avg_pp_8x\h\()_neon
51
+.rept \h
52
+    ld1             {v0.8b}, [x2], x3
53
+    ld1             {v1.8b}, [x4], x5
54
+    urhadd          v2.8b, v0.8b, v1.8b
55
+    st1             {v2.8b}, [x0], x1
56
+.endr
57
+    ret
58
+endfunc
59
+.endm
60
+
61
+pixel_avg_pp_8xN_neon 4
62
+pixel_avg_pp_8xN_neon 8
63
+pixel_avg_pp_8xN_neon 16
64
+pixel_avg_pp_8xN_neon 32
65
x265_3.4.tar.gz/source/common/aarch64/pixel-util.S Added
201
 
1
@@ -0,0 +1,419 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ *          Hongbin Liu <liuhongbin1@huawei.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+
26
+#include "asm.S"
27
+
28
+.section .rodata
29
+
30
+.align 4
31
+
32
+.text
33
+
34
+.macro x265_satd_4x8_8x4_end_neon
35
+    add             v0.8h, v4.8h, v6.8h
36
+    add             v1.8h, v5.8h, v7.8h
37
+    sub             v2.8h, v4.8h, v6.8h
38
+    sub             v3.8h, v5.8h, v7.8h
39
+
40
+    trn1            v16.8h, v0.8h, v1.8h
41
+    trn2            v17.8h, v0.8h, v1.8h
42
+    add             v4.8h, v16.8h, v17.8h
43
+    trn1            v18.8h, v2.8h, v3.8h
44
+    trn2            v19.8h, v2.8h, v3.8h
45
+    sub             v5.8h, v16.8h, v17.8h
46
+    add             v6.8h, v18.8h, v19.8h
47
+    sub             v7.8h, v18.8h, v19.8h
48
+    trn1            v0.4s, v4.4s, v6.4s
49
+    trn2            v2.4s, v4.4s, v6.4s
50
+    abs             v0.8h, v0.8h
51
+    trn1            v1.4s, v5.4s, v7.4s
52
+    trn2            v3.4s, v5.4s, v7.4s
53
+    abs             v2.8h, v2.8h
54
+    abs             v1.8h, v1.8h
55
+    abs             v3.8h, v3.8h
56
+    umax            v0.8h, v0.8h, v2.8h
57
+    umax            v1.8h, v1.8h, v3.8h
58
+    add             v0.8h, v0.8h, v1.8h
59
+    uaddlv          s0, v0.8h
60
+.endm
61
+
62
+.macro pixel_satd_4x8_neon
63
+    ld1r             {v1.2s}, [x2], x3
64
+    ld1r            {v0.2s}, [x0], x1
65
+    ld1r            {v3.2s}, [x2], x3
66
+    ld1r            {v2.2s}, [x0], x1
67
+    ld1r            {v5.2s}, [x2], x3
68
+    ld1r            {v4.2s}, [x0], x1
69
+    ld1r            {v7.2s}, [x2], x3
70
+    ld1r            {v6.2s}, [x0], x1
71
+
72
+    ld1             {v1.s}[1], [x2], x3
73
+    ld1             {v0.s}[1], [x0], x1
74
+    usubl           v0.8h, v0.8b, v1.8b
75
+    ld1             {v3.s}[1], [x2], x3
76
+    ld1             {v2.s}[1], [x0], x1
77
+    usubl           v1.8h, v2.8b, v3.8b
78
+    ld1             {v5.s}[1], [x2], x3
79
+    ld1             {v4.s}[1], [x0], x1
80
+    usubl           v2.8h, v4.8b, v5.8b
81
+    ld1             {v7.s}[1], [x2], x3
82
+    add             v4.8h, v0.8h, v1.8h
83
+    sub             v5.8h, v0.8h, v1.8h
84
+    ld1             {v6.s}[1], [x0], x1
85
+    usubl           v3.8h, v6.8b, v7.8b
86
+    add         v6.8h, v2.8h, v3.8h
87
+    sub         v7.8h, v2.8h, v3.8h
88
+    x265_satd_4x8_8x4_end_neon
89
+.endm
90
+
91
+// template<int w, int h>
92
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
93
+function x265_pixel_satd_4x8_neon
94
+    pixel_satd_4x8_neon
95
+    mov               w0, v0.s[0]
96
+    ret
97
+endfunc
98
+
99
+// template<int w, int h>
100
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
101
+function x265_pixel_satd_4x16_neon
102
+    eor             w4, w4, w4
103
+    pixel_satd_4x8_neon
104
+    mov               w5, v0.s[0]
105
+    add             w4, w4, w5
106
+    pixel_satd_4x8_neon
107
+    mov               w5, v0.s[0]
108
+    add             w0, w5, w4
109
+    ret
110
+endfunc
111
+
112
+// template<int w, int h>
113
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
114
+function x265_pixel_satd_4x32_neon
115
+    eor             w4, w4, w4
116
+.rept 4
117
+    pixel_satd_4x8_neon
118
+    mov             w5, v0.s[0]
119
+    add             w4, w4, w5
120
+.endr
121
+    mov             w0, w4
122
+    ret
123
+endfunc
124
+
125
+// template<int w, int h>
126
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
127
+function x265_pixel_satd_12x16_neon
128
+    mov             x4, x0
129
+    mov             x5, x2
130
+    eor             w7, w7, w7
131
+    pixel_satd_4x8_neon
132
+    mov             w6, v0.s[0]
133
+    add             w7, w7, w6
134
+    pixel_satd_4x8_neon
135
+    mov             w6, v0.s[0]
136
+    add             w7, w7, w6
137
+
138
+    add             x0, x4, #4
139
+    add             x2, x5, #4
140
+    pixel_satd_4x8_neon
141
+    mov             w6, v0.s[0]
142
+    add             w7, w7, w6
143
+    pixel_satd_4x8_neon
144
+    mov             w6, v0.s[0]
145
+    add             w7, w7, w6
146
+
147
+    add             x0, x4, #8
148
+    add             x2, x5, #8
149
+    pixel_satd_4x8_neon
150
+    mov             w6, v0.s[0]
151
+    add             w7, w7, w6
152
+    pixel_satd_4x8_neon
153
+    mov             w6, v0.s[0]
154
+    add             w0, w7, w6
155
+    ret
156
+endfunc
157
+
158
+// template<int w, int h>
159
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
160
+function x265_pixel_satd_12x32_neon
161
+    mov             x4, x0
162
+    mov             x5, x2
163
+    eor             w7, w7, w7
164
+.rept 4
165
+    pixel_satd_4x8_neon
166
+    mov             w6, v0.s[0]
167
+    add             w7, w7, w6
168
+.endr
169
+
170
+    add             x0, x4, #4
171
+    add             x2, x5, #4
172
+.rept 4
173
+    pixel_satd_4x8_neon
174
+    mov             w6, v0.s[0]
175
+    add             w7, w7, w6
176
+.endr
177
+
178
+    add             x0, x4, #8
179
+    add             x2, x5, #8
180
+.rept 4
181
+    pixel_satd_4x8_neon
182
+    mov             w6, v0.s[0]
183
+    add             w7, w7, w6
184
+.endr
185
+
186
+    mov             w0, w7
187
+    ret
188
+endfunc
189
+
190
+// template<int w, int h>
191
+// int satd4(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2)
192
+function x265_pixel_satd_8x8_neon
193
+    eor             w4, w4, w4
194
+    mov             x6, x0
195
+    mov             x7, x2
196
+    pixel_satd_4x8_neon
197
+    mov             w5, v0.s[0]
198
+    add             w4, w4, w5
199
+    add             x0, x6, #4
200
+    add             x2, x7, #4
201
x265_3.4.tar.gz/source/common/aarch64/pixel-util.h Added
42
 
1
@@ -0,0 +1,40 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Yimeng Su <yimeng.su@huawei.com>
6
+ *          Hongbin Liu <liuhongbin1@huawei.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+
26
+#ifndef X265_PIXEL_UTIL_AARCH64_H
27
+#define X265_PIXEL_UTIL_AARCH64_H
28
+
29
+int x265_pixel_satd_4x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
30
+int x265_pixel_satd_4x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
31
+int x265_pixel_satd_4x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
32
+int x265_pixel_satd_4x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
33
+int x265_pixel_satd_8x4_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
34
+int x265_pixel_satd_8x8_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
35
+int x265_pixel_satd_12x16_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
36
+int x265_pixel_satd_12x32_neon(const pixel* pix1, intptr_t stride_pix1, const pixel* pix2, intptr_t stride_pix2);
37
+
38
+uint32_t x265_quant_neon(const int16_t* coef, const int32_t* quantCoeff, int32_t* deltaU, int16_t* qCoef, int qBits, int add, int numCoeff);
39
+int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
40
+
41
+#endif // ifndef X265_PIXEL_UTIL_AARCH64_H
42
x265_3.4.tar.gz/source/common/aarch64/pixel.h Added
107
 
1
@@ -0,0 +1,105 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#ifndef X265_I386_PIXEL_AARCH64_H
26
+#define X265_I386_PIXEL_AARCH64_H
27
+
28
+void x265_pixel_avg_pp_4x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
29
+void x265_pixel_avg_pp_4x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
30
+void x265_pixel_avg_pp_4x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
31
+void x265_pixel_avg_pp_8x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
32
+void x265_pixel_avg_pp_8x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
33
+void x265_pixel_avg_pp_8x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
34
+void x265_pixel_avg_pp_8x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
35
+void x265_pixel_avg_pp_12x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
36
+void x265_pixel_avg_pp_16x4_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
37
+void x265_pixel_avg_pp_16x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
38
+void x265_pixel_avg_pp_16x12_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
39
+void x265_pixel_avg_pp_16x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
40
+void x265_pixel_avg_pp_16x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
41
+void x265_pixel_avg_pp_16x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
42
+void x265_pixel_avg_pp_24x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
43
+void x265_pixel_avg_pp_32x8_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
44
+void x265_pixel_avg_pp_32x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
45
+void x265_pixel_avg_pp_32x24_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
46
+void x265_pixel_avg_pp_32x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
47
+void x265_pixel_avg_pp_32x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
48
+void x265_pixel_avg_pp_48x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
49
+void x265_pixel_avg_pp_64x16_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
50
+void x265_pixel_avg_pp_64x32_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
51
+void x265_pixel_avg_pp_64x48_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
52
+void x265_pixel_avg_pp_64x64_neon (pixel* dst, intptr_t dstride, const pixel* src0, intptr_t sstride0, const pixel* src1, intptr_t sstride1, int);
53
+
54
+void x265_sad_x3_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
55
+void x265_sad_x3_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
56
+void x265_sad_x3_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
57
+void x265_sad_x3_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
58
+void x265_sad_x3_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
59
+void x265_sad_x3_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
60
+void x265_sad_x3_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
61
+void x265_sad_x3_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
62
+void x265_sad_x3_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
63
+void x265_sad_x3_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
64
+void x265_sad_x3_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
65
+void x265_sad_x3_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
66
+void x265_sad_x3_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
67
+void x265_sad_x3_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
68
+void x265_sad_x3_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
69
+void x265_sad_x3_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
70
+void x265_sad_x3_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
71
+void x265_sad_x3_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
72
+void x265_sad_x3_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
73
+void x265_sad_x3_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
74
+void x265_sad_x3_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
75
+void x265_sad_x3_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
76
+void x265_sad_x3_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
77
+void x265_sad_x3_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
78
+void x265_sad_x3_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, intptr_t frefstride, int32_t* res);
79
+
80
+void x265_sad_x4_4x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
81
+void x265_sad_x4_4x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
82
+void x265_sad_x4_4x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
83
+void x265_sad_x4_8x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
84
+void x265_sad_x4_8x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
85
+void x265_sad_x4_8x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
86
+void x265_sad_x4_8x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
87
+void x265_sad_x4_12x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
88
+void x265_sad_x4_16x4_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
89
+void x265_sad_x4_16x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
90
+void x265_sad_x4_16x12_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
91
+void x265_sad_x4_16x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
92
+void x265_sad_x4_16x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
93
+void x265_sad_x4_16x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
94
+void x265_sad_x4_24x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
95
+void x265_sad_x4_32x8_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
96
+void x265_sad_x4_32x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
97
+void x265_sad_x4_32x24_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
98
+void x265_sad_x4_32x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
99
+void x265_sad_x4_32x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
100
+void x265_sad_x4_48x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
101
+void x265_sad_x4_64x16_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
102
+void x265_sad_x4_64x32_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
103
+void x265_sad_x4_64x48_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
104
+void x265_sad_x4_64x64_neon(const pixel* fenc, const pixel* fref0, const pixel* fref1, const pixel* fref2, const pixel* fref3, intptr_t frefstride, int32_t* res);
105
+
106
+#endif // ifndef X265_I386_PIXEL_AARCH64_H
107
x265_3.4.tar.gz/source/common/aarch64/sad-a.S Added
107
 
1
@@ -0,0 +1,105 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Hongbin Liu <liuhongbin1@huawei.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#include "asm.S"
26
+
27
+.section .rodata
28
+
29
+.align 4
30
+
31
+.text
32
+
33
+.macro SAD_X_START_8 x
34
+    ld1             {v0.8b}, [x0], x9
35
+.if \x == 3
36
+    ld1             {v1.8b}, [x1], x4
37
+    ld1             {v2.8b}, [x2], x4
38
+    ld1             {v3.8b}, [x3], x4
39
+.elseif \x == 4
40
+    ld1             {v1.8b}, [x1], x5
41
+    ld1             {v2.8b}, [x2], x5
42
+    ld1             {v3.8b}, [x3], x5
43
+    ld1             {v4.8b}, [x4], x5
44
+.endif
45
+    uabdl           v16.8h, v0.8b, v1.8b
46
+    uabdl           v17.8h, v0.8b, v2.8b
47
+    uabdl           v18.8h, v0.8b, v3.8b
48
+.if \x == 4
49
+    uabdl           v19.8h, v0.8b, v4.8b
50
+.endif
51
+.endm
52
+
53
+.macro SAD_X_8 x
54
+    ld1             {v0.8b}, [x0], x9
55
+.if \x == 3
56
+    ld1             {v1.8b}, [x1], x4
57
+    ld1             {v2.8b}, [x2], x4
58
+    ld1             {v3.8b}, [x3], x4
59
+.elseif \x == 4
60
+    ld1             {v1.8b}, [x1], x5
61
+    ld1             {v2.8b}, [x2], x5
62
+    ld1             {v3.8b}, [x3], x5
63
+    ld1             {v4.8b}, [x4], x5
64
+.endif
65
+    uabal           v16.8h, v0.8b, v1.8b
66
+    uabal           v17.8h, v0.8b, v2.8b
67
+    uabal           v18.8h, v0.8b, v3.8b
68
+.if \x == 4
69
+    uabal           v19.8h, v0.8b, v4.8b
70
+.endif
71
+.endm
72
+
73
+.macro SAD_X_8xN x, h
74
+function x265_sad_x\x\()_8x\h\()_neon
75
+    mov             x9, #FENC_STRIDE
76
+    SAD_X_START_8 \x
77
+.rept \h - 1
78
+    SAD_X_8 \x
79
+.endr
80
+    uaddlv          s0, v16.8h
81
+    uaddlv          s1, v17.8h
82
+    uaddlv          s2, v18.8h
83
+.if \x == 4
84
+    uaddlv          s3, v19.8h
85
+.endif
86
+
87
+.if \x == 3
88
+    stp             s0, s1, [x5]
89
+    str             s2, [x5, #8]
90
+.elseif \x == 4
91
+    stp             s0, s1, [x6]
92
+    stp             s2, s3, [x6, #8]
93
+.endif
94
+    ret
95
+endfunc
96
+.endm
97
+
98
+SAD_X_8xN 3 4
99
+SAD_X_8xN 3 8
100
+SAD_X_8xN 3 16
101
+SAD_X_8xN 3 32
102
+
103
+SAD_X_8xN 4 4
104
+SAD_X_8xN 4 8
105
+SAD_X_8xN 4 16
106
+SAD_X_8xN 4 32
107
x265_3.3.tar.gz/source/common/arm/asm-primitives.cpp -> x265_3.4.tar.gz/source/common/arm/asm-primitives.cpp Changed
201
 
1
@@ -5,6 +5,7 @@
2
  *          Praveen Kumar Tiwari <praveen@multicorewareinc.com>
3
  *          Min Chen <chenm003@163.com> <min.chen@multicorewareinc.com>
4
  *          Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
5
+ *          Hongbin Liu<liuhongbin1@huawei.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -48,77 +49,77 @@
10
         p.ssim_4x4x2_core = PFX(ssim_4x4x2_core_neon);
11
 
12
         // addAvg
13
-         p.pu[LUMA_4x4].addAvg   = PFX(addAvg_4x4_neon);
14
-         p.pu[LUMA_4x8].addAvg   = PFX(addAvg_4x8_neon);
15
-         p.pu[LUMA_4x16].addAvg  = PFX(addAvg_4x16_neon);
16
-         p.pu[LUMA_8x4].addAvg   = PFX(addAvg_8x4_neon);
17
-         p.pu[LUMA_8x8].addAvg   = PFX(addAvg_8x8_neon);
18
-         p.pu[LUMA_8x16].addAvg  = PFX(addAvg_8x16_neon);
19
-         p.pu[LUMA_8x32].addAvg  = PFX(addAvg_8x32_neon);
20
-         p.pu[LUMA_12x16].addAvg = PFX(addAvg_12x16_neon);
21
-         p.pu[LUMA_16x4].addAvg  = PFX(addAvg_16x4_neon);
22
-         p.pu[LUMA_16x8].addAvg  = PFX(addAvg_16x8_neon);
23
-         p.pu[LUMA_16x12].addAvg = PFX(addAvg_16x12_neon);
24
-         p.pu[LUMA_16x16].addAvg = PFX(addAvg_16x16_neon);
25
-         p.pu[LUMA_16x32].addAvg = PFX(addAvg_16x32_neon);
26
-         p.pu[LUMA_16x64].addAvg = PFX(addAvg_16x64_neon);
27
-         p.pu[LUMA_24x32].addAvg = PFX(addAvg_24x32_neon);
28
-         p.pu[LUMA_32x8].addAvg  = PFX(addAvg_32x8_neon);
29
-         p.pu[LUMA_32x16].addAvg = PFX(addAvg_32x16_neon);
30
-         p.pu[LUMA_32x24].addAvg = PFX(addAvg_32x24_neon);
31
-         p.pu[LUMA_32x32].addAvg = PFX(addAvg_32x32_neon);
32
-         p.pu[LUMA_32x64].addAvg = PFX(addAvg_32x64_neon);
33
-         p.pu[LUMA_48x64].addAvg = PFX(addAvg_48x64_neon);
34
-         p.pu[LUMA_64x16].addAvg = PFX(addAvg_64x16_neon);
35
-         p.pu[LUMA_64x32].addAvg = PFX(addAvg_64x32_neon);
36
-         p.pu[LUMA_64x48].addAvg = PFX(addAvg_64x48_neon);
37
-         p.pu[LUMA_64x64].addAvg = PFX(addAvg_64x64_neon);
38
+         p.pu[LUMA_4x4].addAvg[NONALIGNED]   = PFX(addAvg_4x4_neon);
39
+         p.pu[LUMA_4x8].addAvg[NONALIGNED]   = PFX(addAvg_4x8_neon);
40
+         p.pu[LUMA_4x16].addAvg[NONALIGNED]  = PFX(addAvg_4x16_neon);
41
+         p.pu[LUMA_8x4].addAvg[NONALIGNED]   = PFX(addAvg_8x4_neon);
42
+         p.pu[LUMA_8x8].addAvg[NONALIGNED]   = PFX(addAvg_8x8_neon);
43
+         p.pu[LUMA_8x16].addAvg[NONALIGNED]  = PFX(addAvg_8x16_neon);
44
+         p.pu[LUMA_8x32].addAvg[NONALIGNED]  = PFX(addAvg_8x32_neon);
45
+         p.pu[LUMA_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon);
46
+         p.pu[LUMA_16x4].addAvg[NONALIGNED]  = PFX(addAvg_16x4_neon);
47
+         p.pu[LUMA_16x8].addAvg[NONALIGNED]  = PFX(addAvg_16x8_neon);
48
+         p.pu[LUMA_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon);
49
+         p.pu[LUMA_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon);
50
+         p.pu[LUMA_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon);
51
+         p.pu[LUMA_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon);
52
+         p.pu[LUMA_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon);
53
+         p.pu[LUMA_32x8].addAvg[NONALIGNED]  = PFX(addAvg_32x8_neon);
54
+         p.pu[LUMA_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon);
55
+         p.pu[LUMA_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon);
56
+         p.pu[LUMA_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon);
57
+         p.pu[LUMA_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon);
58
+         p.pu[LUMA_48x64].addAvg[NONALIGNED] = PFX(addAvg_48x64_neon);
59
+         p.pu[LUMA_64x16].addAvg[NONALIGNED] = PFX(addAvg_64x16_neon);
60
+         p.pu[LUMA_64x32].addAvg[NONALIGNED] = PFX(addAvg_64x32_neon);
61
+         p.pu[LUMA_64x48].addAvg[NONALIGNED] = PFX(addAvg_64x48_neon);
62
+         p.pu[LUMA_64x64].addAvg[NONALIGNED] = PFX(addAvg_64x64_neon);
63
 
64
         // chroma addAvg
65
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg   = PFX(addAvg_4x2_neon);
66
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg   = PFX(addAvg_4x4_neon);
67
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg   = PFX(addAvg_4x8_neon);
68
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg  = PFX(addAvg_4x16_neon);
69
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg   = PFX(addAvg_6x8_neon);
70
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg   = PFX(addAvg_8x2_neon);
71
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg   = PFX(addAvg_8x4_neon);
72
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg   = PFX(addAvg_8x6_neon);
73
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg   = PFX(addAvg_8x8_neon);
74
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg  = PFX(addAvg_8x16_neon);
75
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg  = PFX(addAvg_8x32_neon);
76
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg = PFX(addAvg_12x16_neon);
77
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg  = PFX(addAvg_16x4_neon);
78
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg  = PFX(addAvg_16x8_neon);
79
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg = PFX(addAvg_16x12_neon);
80
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg = PFX(addAvg_16x16_neon);
81
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg = PFX(addAvg_16x32_neon);
82
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg = PFX(addAvg_24x32_neon);
83
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg  = PFX(addAvg_32x8_neon);
84
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg = PFX(addAvg_32x16_neon);
85
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg = PFX(addAvg_32x24_neon);
86
-        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg = PFX(addAvg_32x32_neon);
87
-
88
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg   = PFX(addAvg_4x8_neon);
89
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg  = PFX(addAvg_4x16_neon);
90
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg  = PFX(addAvg_4x32_neon);
91
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg  = PFX(addAvg_6x16_neon);
92
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg   = PFX(addAvg_8x4_neon);
93
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg   = PFX(addAvg_8x8_neon);
94
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg  = PFX(addAvg_8x12_neon);
95
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg  = PFX(addAvg_8x16_neon);
96
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg  = PFX(addAvg_8x32_neon);
97
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg  = PFX(addAvg_8x64_neon);
98
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg = PFX(addAvg_12x32_neon);
99
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg  = PFX(addAvg_16x8_neon);
100
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg = PFX(addAvg_16x16_neon);
101
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg = PFX(addAvg_16x24_neon);
102
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg = PFX(addAvg_16x32_neon);
103
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg = PFX(addAvg_16x64_neon);
104
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg = PFX(addAvg_24x64_neon);
105
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg = PFX(addAvg_32x16_neon);
106
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg = PFX(addAvg_32x32_neon);
107
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg = PFX(addAvg_32x48_neon);
108
-        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg = PFX(addAvg_32x64_neon);
109
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x2].addAvg[NONALIGNED]   = PFX(addAvg_4x2_neon);
110
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x4].addAvg[NONALIGNED]   = PFX(addAvg_4x4_neon);
111
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x8].addAvg[NONALIGNED]   = PFX(addAvg_4x8_neon);
112
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_4x16].addAvg[NONALIGNED]  = PFX(addAvg_4x16_neon);
113
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_6x8].addAvg[NONALIGNED]   = PFX(addAvg_6x8_neon);
114
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x2].addAvg[NONALIGNED]   = PFX(addAvg_8x2_neon);
115
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x4].addAvg[NONALIGNED]   = PFX(addAvg_8x4_neon);
116
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x6].addAvg[NONALIGNED]   = PFX(addAvg_8x6_neon);
117
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x8].addAvg[NONALIGNED]   = PFX(addAvg_8x8_neon);
118
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x16].addAvg[NONALIGNED]  = PFX(addAvg_8x16_neon);
119
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_8x32].addAvg[NONALIGNED]  = PFX(addAvg_8x32_neon);
120
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_12x16].addAvg[NONALIGNED] = PFX(addAvg_12x16_neon);
121
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x4].addAvg[NONALIGNED]  = PFX(addAvg_16x4_neon);
122
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x8].addAvg[NONALIGNED]  = PFX(addAvg_16x8_neon);
123
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x12].addAvg[NONALIGNED] = PFX(addAvg_16x12_neon);
124
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon);
125
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon);
126
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_24x32].addAvg[NONALIGNED] = PFX(addAvg_24x32_neon);
127
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x8].addAvg[NONALIGNED]  = PFX(addAvg_32x8_neon);
128
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon);
129
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x24].addAvg[NONALIGNED] = PFX(addAvg_32x24_neon);
130
+        p.chroma[X265_CSP_I420].pu[CHROMA_420_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon);
131
+
132
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x8].addAvg[NONALIGNED]   = PFX(addAvg_4x8_neon);
133
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x16].addAvg[NONALIGNED]  = PFX(addAvg_4x16_neon);
134
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_4x32].addAvg[NONALIGNED]  = PFX(addAvg_4x32_neon);
135
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_6x16].addAvg[NONALIGNED]  = PFX(addAvg_6x16_neon);
136
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x4].addAvg[NONALIGNED]   = PFX(addAvg_8x4_neon);
137
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x8].addAvg[NONALIGNED]   = PFX(addAvg_8x8_neon);
138
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x12].addAvg[NONALIGNED]  = PFX(addAvg_8x12_neon);
139
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x16].addAvg[NONALIGNED]  = PFX(addAvg_8x16_neon);
140
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x32].addAvg[NONALIGNED]  = PFX(addAvg_8x32_neon);
141
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_8x64].addAvg[NONALIGNED]  = PFX(addAvg_8x64_neon);
142
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_12x32].addAvg[NONALIGNED] = PFX(addAvg_12x32_neon);
143
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x8].addAvg[NONALIGNED]  = PFX(addAvg_16x8_neon);
144
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x16].addAvg[NONALIGNED] = PFX(addAvg_16x16_neon);
145
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x24].addAvg[NONALIGNED] = PFX(addAvg_16x24_neon);
146
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x32].addAvg[NONALIGNED] = PFX(addAvg_16x32_neon);
147
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_16x64].addAvg[NONALIGNED] = PFX(addAvg_16x64_neon);
148
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_24x64].addAvg[NONALIGNED] = PFX(addAvg_24x64_neon);
149
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x16].addAvg[NONALIGNED] = PFX(addAvg_32x16_neon);
150
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x32].addAvg[NONALIGNED] = PFX(addAvg_32x32_neon);
151
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x48].addAvg[NONALIGNED] = PFX(addAvg_32x48_neon);
152
+        p.chroma[X265_CSP_I422].pu[CHROMA_422_32x64].addAvg[NONALIGNED] = PFX(addAvg_32x64_neon);
153
 
154
         // quant
155
          p.quant = PFX(quant_neon);
156
@@ -402,7 +403,7 @@
157
         p.scale2D_64to32  = PFX(scale2D_64to32_neon);
158
 
159
         // scale1D_128to64
160
-        p.scale1D_128to64 = PFX(scale1D_128to64_neon);
161
+        p.scale1D_128to64[NONALIGNED] = PFX(scale1D_128to64_neon);
162
 
163
         // copy_count
164
         p.cu[BLOCK_4x4].copy_cnt     = PFX(copy_cnt_4_neon);
165
@@ -411,37 +412,37 @@
166
         p.cu[BLOCK_32x32].copy_cnt   = PFX(copy_cnt_32_neon);
167
 
168
         // filterPixelToShort
169
-        p.pu[LUMA_4x4].convert_p2s   = PFX(filterPixelToShort_4x4_neon);
170
-        p.pu[LUMA_4x8].convert_p2s   = PFX(filterPixelToShort_4x8_neon);
171
-        p.pu[LUMA_4x16].convert_p2s  = PFX(filterPixelToShort_4x16_neon);
172
-        p.pu[LUMA_8x4].convert_p2s   = PFX(filterPixelToShort_8x4_neon);
173
-        p.pu[LUMA_8x8].convert_p2s   = PFX(filterPixelToShort_8x8_neon);
174
-        p.pu[LUMA_8x16].convert_p2s  = PFX(filterPixelToShort_8x16_neon);
175
-        p.pu[LUMA_8x32].convert_p2s  = PFX(filterPixelToShort_8x32_neon);
176
-        p.pu[LUMA_12x16].convert_p2s = PFX(filterPixelToShort_12x16_neon);
177
-        p.pu[LUMA_16x4].convert_p2s  = PFX(filterPixelToShort_16x4_neon);
178
-        p.pu[LUMA_16x8].convert_p2s  = PFX(filterPixelToShort_16x8_neon);
179
-        p.pu[LUMA_16x12].convert_p2s = PFX(filterPixelToShort_16x12_neon);
180
-        p.pu[LUMA_16x16].convert_p2s = PFX(filterPixelToShort_16x16_neon);
181
-        p.pu[LUMA_16x32].convert_p2s = PFX(filterPixelToShort_16x32_neon);
182
-        p.pu[LUMA_16x64].convert_p2s = PFX(filterPixelToShort_16x64_neon);
183
-        p.pu[LUMA_24x32].convert_p2s = PFX(filterPixelToShort_24x32_neon);
184
-        p.pu[LUMA_32x8].convert_p2s  = PFX(filterPixelToShort_32x8_neon);
185
-        p.pu[LUMA_32x16].convert_p2s = PFX(filterPixelToShort_32x16_neon);
186
-        p.pu[LUMA_32x24].convert_p2s = PFX(filterPixelToShort_32x24_neon);
187
-        p.pu[LUMA_32x32].convert_p2s = PFX(filterPixelToShort_32x32_neon);
188
-        p.pu[LUMA_32x64].convert_p2s = PFX(filterPixelToShort_32x64_neon);
189
-        p.pu[LUMA_48x64].convert_p2s = PFX(filterPixelToShort_48x64_neon);
190
-        p.pu[LUMA_64x16].convert_p2s = PFX(filterPixelToShort_64x16_neon);
191
-        p.pu[LUMA_64x32].convert_p2s = PFX(filterPixelToShort_64x32_neon);
192
-        p.pu[LUMA_64x48].convert_p2s = PFX(filterPixelToShort_64x48_neon);
193
-        p.pu[LUMA_64x64].convert_p2s = PFX(filterPixelToShort_64x64_neon);
194
+        p.pu[LUMA_4x4].convert_p2s[NONALIGNED]   = PFX(filterPixelToShort_4x4_neon);
195
+        p.pu[LUMA_4x8].convert_p2s[NONALIGNED]   = PFX(filterPixelToShort_4x8_neon);
196
+        p.pu[LUMA_4x16].convert_p2s[NONALIGNED]  = PFX(filterPixelToShort_4x16_neon);
197
+        p.pu[LUMA_8x4].convert_p2s[NONALIGNED]   = PFX(filterPixelToShort_8x4_neon);
198
+        p.pu[LUMA_8x8].convert_p2s[NONALIGNED]   = PFX(filterPixelToShort_8x8_neon);
199
+        p.pu[LUMA_8x16].convert_p2s[NONALIGNED]  = PFX(filterPixelToShort_8x16_neon);
200
+        p.pu[LUMA_8x32].convert_p2s[NONALIGNED]  = PFX(filterPixelToShort_8x32_neon);
201
x265_3.3.tar.gz/source/common/common.h -> x265_3.4.tar.gz/source/common/common.h Changed
27
 
1
@@ -129,6 +129,7 @@
2
 typedef uint64_t sum2_t;
3
 typedef uint64_t pixel4;
4
 typedef int64_t  ssum2_t;
5
+#define SHIFT_TO_BITPLANE 9
6
 #define HISTOGRAM_BINS 1024
7
 #else
8
 typedef uint8_t  pixel;
9
@@ -136,6 +137,7 @@
10
 typedef uint32_t sum2_t;
11
 typedef uint32_t pixel4;
12
 typedef int32_t  ssum2_t; // Signed sum
13
+#define SHIFT_TO_BITPLANE 7
14
 #define HISTOGRAM_BINS 256
15
 #endif // if HIGH_BIT_DEPTH
16
 
17
@@ -270,6 +272,9 @@
18
 #define MAX_TR_SIZE (1 << MAX_LOG2_TR_SIZE)
19
 #define MAX_TS_SIZE (1 << MAX_LOG2_TS_SIZE)
20
 
21
+#define RDCOST_BASED_RSKIP 1
22
+#define EDGE_BASED_RSKIP 2
23
+
24
 #define COEF_REMAIN_BIN_REDUCTION   3 // indicates the level at which the VLC
25
                                       // transitions from Golomb-Rice to TU+EG(k)
26
 
27
x265_3.3.tar.gz/source/common/cpu.cpp -> x265_3.4.tar.gz/source/common/cpu.cpp Changed
19
 
1
@@ -5,6 +5,8 @@
2
  *          Laurent Aimar <fenrir@via.ecp.fr>
3
  *          Fiona Glaser <fiona@x264.com>
4
  *          Steve Borho <steve@borho.org>
5
+ *          Hongbin Liu <liuhongbin1@huawei.com>
6
+ *          Yimeng Su <yimeng.su@huawei.com>
7
  *
8
  * This program is free software; you can redistribute it and/or modify
9
  * it under the terms of the GNU General Public License as published by
10
@@ -367,6 +369,8 @@
11
     flags |= PFX(cpu_fast_neon_mrc_test)() ? X265_CPU_FAST_NEON_MRC : 0;
12
 #endif
13
     // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc)
14
+#elif X265_ARCH_ARM64
15
+    flags |= X265_CPU_NEON;
16
 #endif // if HAVE_ARMV6
17
     return flags;
18
 }
19
x265_3.3.tar.gz/source/common/frame.cpp -> x265_3.4.tar.gz/source/common/frame.cpp Changed
41
 
1
@@ -61,6 +61,8 @@
2
     m_edgePic = NULL;
3
     m_gaussianPic = NULL;
4
     m_thetaPic = NULL;
5
+    m_edgeBitPlane = NULL;
6
+    m_edgeBitPic = NULL;
7
 }
8
 
9
 bool Frame::create(x265_param *param, float* quantOffsets)
10
@@ -115,6 +117,19 @@
11
         m_thetaPic = X265_MALLOC(pixel, m_stride * (maxHeight + (m_lumaMarginY * 2)));
12
     }
13
 
14
+    if (param->recursionSkipMode == EDGE_BASED_RSKIP)
15
+    {
16
+        uint32_t numCuInWidth = (param->sourceWidth + param->maxCUSize - 1) / param->maxCUSize;
17
+        uint32_t numCuInHeight = (param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize;
18
+        uint32_t lumaMarginX = param->maxCUSize + 32;
19
+        uint32_t lumaMarginY = param->maxCUSize + 16;
20
+        uint32_t stride = (numCuInWidth * param->maxCUSize) + (lumaMarginX << 1);
21
+        uint32_t maxHeight = numCuInHeight * param->maxCUSize;
22
+        uint32_t bitPlaneSize = stride * (maxHeight + (lumaMarginY * 2));
23
+        CHECKED_MALLOC_ZERO(m_edgeBitPlane, pixel, bitPlaneSize);
24
+        m_edgeBitPic = m_edgeBitPlane + lumaMarginY * stride + lumaMarginX;
25
+    }
26
+
27
     if (m_fencPic->create(param, !!m_param->bCopyPicToFrame) && m_lowres.create(param, m_fencPic, param->rc.qgSize))
28
     {
29
         X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
30
@@ -267,4 +282,10 @@
31
         X265_FREE(m_gaussianPic);
32
         X265_FREE(m_thetaPic);
33
     }
34
+
35
+    if (m_param->recursionSkipMode == EDGE_BASED_RSKIP)
36
+    {
37
+        X265_FREE_ZERO(m_edgeBitPlane);
38
+        m_edgeBitPic = NULL;
39
+    }
40
 }
41
x265_3.3.tar.gz/source/common/frame.h -> x265_3.4.tar.gz/source/common/frame.h Changed
21
 
1
@@ -99,7 +99,7 @@
2
     float*                 m_quantOffsets;       // points to quantOffsets in x265_picture
3
     x265_sei               m_userSEI;
4
     uint32_t               m_picStruct;          // picture structure SEI message
5
-    x265_dolby_vision_rpu            m_rpu;
6
+    x265_dolby_vision_rpu  m_rpu;
7
 
8
     /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
9
     ThreadSafeInteger*     m_reconRowFlag;       // flag of CTU rows completely reconstructed and extended for motion reference
10
@@ -137,6 +137,10 @@
11
     pixel*                 m_gaussianPic;
12
     pixel*                 m_thetaPic;
13
 
14
+    /* edge bit plane for rskips 2 and 3 */
15
+    pixel*                 m_edgeBitPlane;
16
+    pixel*                 m_edgeBitPic;
17
+
18
     Frame();
19
 
20
     bool create(x265_param *param, float* quantOffsets);
21
x265_3.3.tar.gz/source/common/param.cpp -> x265_3.4.tar.gz/source/common/param.cpp Changed
145
 
1
@@ -198,7 +198,8 @@
2
     param->bEnableWeightedPred = 1;
3
     param->bEnableWeightedBiPred = 0;
4
     param->bEnableEarlySkip = 1;
5
-    param->bEnableRecursionSkip = 1;
6
+    param->recursionSkipMode = 1;
7
+    param->edgeVarThreshold = 0.05f;
8
     param->bEnableAMP = 0;
9
     param->bEnableRectInter = 0;
10
     param->rdLevel = 3;
11
@@ -285,6 +286,7 @@
12
     param->rc.bEnableConstVbv = 0;
13
     param->bResetZoneConfig = 1;
14
     param->reconfigWindowSize = 0;
15
+    param->decoderVbvMaxRate = 0;
16
 
17
     /* Video Usability Information (VUI) */
18
     param->vui.aspectRatioIdc = 0;
19
@@ -546,7 +548,7 @@
20
             param->maxNumMergeCand = 5;
21
             param->searchMethod = X265_STAR_SEARCH;
22
             param->bEnableTransformSkip = 1;
23
-            param->bEnableRecursionSkip = 0;
24
+            param->recursionSkipMode = 0;
25
             param->maxNumReferences = 5;
26
             param->limitReferences = 0;
27
             param->lookaheadSlices = 0; // disabled for best quality
28
@@ -598,7 +600,7 @@
29
             param->rc.hevcAq = 0;
30
             param->rc.qpStep = 1;
31
             param->rc.bEnableGrain = 1;
32
-            param->bEnableRecursionSkip = 0;
33
+            param->recursionSkipMode = 0;
34
             param->psyRd = 4.0;
35
             param->psyRdoq = 10.0;
36
             param->bEnableSAO = 0;
37
@@ -702,8 +704,9 @@
38
     OPT("ref") p->maxNumReferences = atoi(value);
39
     OPT("fast-intra") p->bEnableFastIntra = atobool(value);
40
     OPT("early-skip") p->bEnableEarlySkip = atobool(value);
41
-    OPT("rskip") p->bEnableRecursionSkip = atobool(value);
42
-    OPT("me")p->searchMethod = parseName(value, x265_motion_est_names, bError);
43
+    OPT("rskip") p->recursionSkipMode = atoi(value);
44
+    OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f;
45
+    OPT("me") p->searchMethod = parseName(value, x265_motion_est_names, bError);
46
     OPT("subme") p->subpelRefine = atoi(value);
47
     OPT("merange") p->searchRange = atoi(value);
48
     OPT("rect") p->bEnableRectInter = atobool(value);
49
@@ -919,7 +922,7 @@
50
     OPT("max-merge") p->maxNumMergeCand = (uint32_t)atoi(value);
51
     OPT("temporal-mvp") p->bEnableTemporalMvp = atobool(value);
52
     OPT("early-skip") p->bEnableEarlySkip = atobool(value);
53
-    OPT("rskip") p->bEnableRecursionSkip = atobool(value);
54
+    OPT("rskip") p->recursionSkipMode = atoi(value);
55
     OPT("rdpenalty") p->rdPenalty = atoi(value);
56
     OPT("tskip") p->bEnableTransformSkip = atobool(value);
57
     OPT("no-tskip-fast") p->bEnableTSkipFast = atobool(value);
58
@@ -1221,6 +1224,7 @@
59
             }
60
         }
61
         OPT("hist-threshold") p->edgeTransitionThreshold = atof(value);
62
+        OPT("rskip-edge-threshold") p->edgeVarThreshold = atoi(value)/100.0f;
63
         OPT("lookahead-threads") p->lookaheadThreads = atoi(value);
64
         OPT("opt-cu-delta-qp") p->bOptCUDeltaQP = atobool(value);
65
         OPT("multi-pass-opt-analysis") p->analysisMultiPassRefine = atobool(value);
66
@@ -1596,9 +1600,16 @@
67
     CHECK(param->rdLevel < 1 || param->rdLevel > 6,
68
           "RD Level is out of range");
69
     CHECK(param->rdoqLevel < 0 || param->rdoqLevel > 2,
70
-        "RDOQ Level is out of range");
71
+          "RDOQ Level is out of range");
72
     CHECK(param->dynamicRd < 0 || param->dynamicRd > x265_ADAPT_RD_STRENGTH,
73
-        "Dynamic RD strength must be between 0 and 4");
74
+          "Dynamic RD strength must be between 0 and 4");
75
+    CHECK(param->recursionSkipMode > 2 || param->recursionSkipMode < 0,
76
+          "Invalid Recursion skip mode. Valid modes 0,1,2");
77
+    if (param->recursionSkipMode == EDGE_BASED_RSKIP)
78
+    {
79
+        CHECK(param->edgeVarThreshold < 0.0f || param->edgeVarThreshold > 1.0f,
80
+              "Minimum edge density percentage for a CU should be an integer between 0 to 100");
81
+    }
82
     CHECK(param->bframes && param->bframes >= param->lookaheadDepth && !param->rc.bStatRead,
83
           "Lookahead depth must be greater than the max consecutive bframe count");
84
     CHECK(param->bframes < 0,
85
@@ -1789,6 +1800,7 @@
86
     }
87
     CHECK(param->confWinRightOffset < 0, "Conformance Window Right Offset must be 0 or greater");
88
     CHECK(param->confWinBottomOffset < 0, "Conformance Window Bottom Offset must be 0 or greater");
89
+    CHECK(param->decoderVbvMaxRate < 0, "Invalid Decoder Vbv Maxrate. Value can not be less than zero");
90
     return check_failed;
91
 }
92
 
93
@@ -1908,7 +1920,9 @@
94
     TOOLVAL(param->psyRdoq, "psy-rdoq=%.2lf");
95
     TOOLOPT(param->bEnableRdRefine, "rd-refine");
96
     TOOLOPT(param->bEnableEarlySkip, "early-skip");
97
-    TOOLOPT(param->bEnableRecursionSkip, "rskip");
98
+    TOOLVAL(param->recursionSkipMode, "rskip mode=%d");
99
+    if (param->recursionSkipMode == EDGE_BASED_RSKIP)
100
+        TOOLVAL(param->edgeVarThreshold, "rskip-edge-threshold=%.2f");
101
     TOOLOPT(param->bEnableSplitRdSkip, "splitrd-skip");
102
     TOOLVAL(param->noiseReductionIntra, "nr-intra=%d");
103
     TOOLVAL(param->noiseReductionInter, "nr-inter=%d");
104
@@ -2066,7 +2080,10 @@
105
     s += sprintf(s, " rd=%d", p->rdLevel);
106
     s += sprintf(s, " selective-sao=%d", p->selectiveSAO);
107
     BOOL(p->bEnableEarlySkip, "early-skip");
108
-    BOOL(p->bEnableRecursionSkip, "rskip");
109
+    BOOL(p->recursionSkipMode, "rskip");
110
+    if (p->recursionSkipMode == EDGE_BASED_RSKIP)
111
+        s += sprintf(s, " rskip-edge-threshold=%f", p->edgeVarThreshold);
112
+
113
     BOOL(p->bEnableFastIntra, "fast-intra");
114
     BOOL(p->bEnableTSkipFast, "tskip-fast");
115
     BOOL(p->bCULossless, "cu-lossless");
116
@@ -2204,6 +2221,7 @@
117
     if (p->bEnableSceneCutAwareQp)
118
         s += sprintf(s, " scenecut-window=%d max-qp-delta=%d", p->scenecutWindow, p->maxQpDelta);
119
     s += sprintf(s, "conformance-window-offsets right=%d bottom=%d", p->confWinRightOffset, p->confWinBottomOffset);
120
+    s += sprintf(s, " decoder-max-rate=%d", p->decoderVbvMaxRate);
121
 #undef BOOL
122
     return buf;
123
 }
124
@@ -2373,7 +2391,8 @@
125
     dst->bSaoNonDeblocked = src->bSaoNonDeblocked;
126
     dst->rdLevel = src->rdLevel;
127
     dst->bEnableEarlySkip = src->bEnableEarlySkip;
128
-    dst->bEnableRecursionSkip = src->bEnableRecursionSkip;
129
+    dst->recursionSkipMode = src->recursionSkipMode;
130
+    dst->edgeVarThreshold = src->edgeVarThreshold;
131
     dst->bEnableFastIntra = src->bEnableFastIntra;
132
     dst->bEnableTSkipFast = src->bEnableTSkipFast;
133
     dst->bCULossless = src->bCULossless;
134
@@ -2419,8 +2438,9 @@
135
     dst->rc.zonefileCount = src->rc.zonefileCount;
136
     dst->reconfigWindowSize = src->reconfigWindowSize;
137
     dst->bResetZoneConfig = src->bResetZoneConfig;
138
+    dst->decoderVbvMaxRate = src->decoderVbvMaxRate;
139
 
140
-    if (src->rc.zonefileCount && src->rc.zones)
141
+    if (src->rc.zonefileCount && src->rc.zones && src->bResetZoneConfig)
142
     {
143
         for (int i = 0; i < src->rc.zonefileCount; i++)
144
         {
145
x265_3.3.tar.gz/source/common/pixel.cpp -> x265_3.4.tar.gz/source/common/pixel.cpp Changed
58
 
1
@@ -5,6 +5,7 @@
2
  *          Mandar Gurav <mandar@multicorewareinc.com>
3
  *          Mahesh Pittala <mahesh@multicorewareinc.com>
4
  *          Min Chen <min.chen@multicorewareinc.com>
5
+ *          Hongbin Liu<liuhongbin1@huawei.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -265,6 +266,10 @@
10
 {
11
     int satd = 0;
12
 
13
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
14
+    pixelcmp_t satd_4x4 = x265_pixel_satd_4x4_neon;
15
+#endif
16
+
17
     for (int row = 0; row < h; row += 4)
18
         for (int col = 0; col < w; col += 4)
19
             satd += satd_4x4(pix1 + row * stride_pix1 + col, stride_pix1,
20
@@ -279,6 +284,10 @@
21
 {
22
     int satd = 0;
23
 
24
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
25
+    pixelcmp_t satd_8x4 = x265_pixel_satd_8x4_neon;
26
+#endif
27
+
28
     for (int row = 0; row < h; row += 4)
29
         for (int col = 0; col < w; col += 8)
30
             satd += satd_8x4(pix1 + row * stride_pix1 + col, stride_pix1,
31
@@ -876,6 +885,18 @@
32
     }
33
 }
34
 
35
+static void planecopy_pp_shr_c(const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift)
36
+{
37
+    for (int r = 0; r < height; r++)
38
+    {
39
+        for (int c = 0; c < width; c++)
40
+            dst[c] = (pixel)((src[c] >> shift));
41
+
42
+        dst += dstStride;
43
+        src += srcStride;
44
+    }
45
+}
46
+
47
 static void planecopy_sp_shl_c(const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask)
48
 {
49
     for (int r = 0; r < height; r++)
50
@@ -1316,6 +1337,7 @@
51
     p.planecopy_cp = planecopy_cp_c;
52
     p.planecopy_sp = planecopy_sp_c;
53
     p.planecopy_sp_shl = planecopy_sp_shl_c;
54
+    p.planecopy_pp_shr = planecopy_pp_shr_c;
55
 #if HIGH_BIT_DEPTH
56
     p.planeClipAndMax = planeClipAndMax_c;
57
 #endif
58
x265_3.3.tar.gz/source/common/primitives.h -> x265_3.4.tar.gz/source/common/primitives.h Changed
47
 
1
@@ -8,6 +8,8 @@
2
  *          Rajesh Paulraj <rajesh@multicorewareinc.com>
3
  *          Praveen Kumar Tiwari <praveen@multicorewareinc.com>
4
  *          Min Chen <chenm003@163.com>
5
+ *          Hongbin Liu<liuhongbin1@huawei.com>
6
+ *          Yimeng Su <yimeng.su@huawei.com>
7
  *
8
  * This program is free software; you can redistribute it and/or modify
9
  * it under the terms of the GNU General Public License as published by
10
@@ -204,6 +206,7 @@
11
 typedef void (*sign_t)(int8_t *dst, const pixel *src1, const pixel *src2, const int endX);
12
 typedef void (*planecopy_cp_t) (const uint8_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
13
 typedef void (*planecopy_sp_t) (const uint16_t* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift, uint16_t mask);
14
+typedef void (*planecopy_pp_t) (const pixel* src, intptr_t srcStride, pixel* dst, intptr_t dstStride, int width, int height, int shift);
15
 typedef pixel (*planeClipAndMax_t)(pixel *src, intptr_t stride, int width, int height, uint64_t *outsum, const pixel minPix, const pixel maxPix);
16
 
17
 typedef void (*cutree_propagate_cost) (int* dst, const uint16_t* propagateIn, const int32_t* intraCosts, const uint16_t* interCosts, const int32_t* invQscales, const double* fpsFactor, int len);
18
@@ -358,6 +361,7 @@
19
     planecopy_cp_t        planecopy_cp;
20
     planecopy_sp_t        planecopy_sp;
21
     planecopy_sp_t        planecopy_sp_shl;
22
+    planecopy_pp_t        planecopy_pp_shr;
23
     planeClipAndMax_t     planeClipAndMax;
24
 
25
     weightp_sp_t          weight_sp;
26
@@ -465,6 +469,9 @@
27
 void setupInstrinsicPrimitives(EncoderPrimitives &p, int cpuMask);
28
 void setupAssemblyPrimitives(EncoderPrimitives &p, int cpuMask);
29
 void setupAliasPrimitives(EncoderPrimitives &p);
30
+#if X265_ARCH_ARM64
31
+void setupAliasCPrimitives(EncoderPrimitives &cp, EncoderPrimitives &asmp, int cpuMask);
32
+#endif
33
 #if HAVE_ALTIVEC
34
 void setupPixelPrimitives_altivec(EncoderPrimitives &p);
35
 void setupDCTPrimitives_altivec(EncoderPrimitives &p);
36
@@ -479,4 +486,10 @@
37
 extern const char* PFX(build_info_str);
38
 #endif
39
 
40
+#if ENABLE_ASSEMBLY && X265_ARCH_ARM64
41
+extern "C" {
42
+#include "aarch64/pixel-util.h"
43
+}
44
+#endif
45
+
46
 #endif // ifndef X265_PRIMITIVES_H
47
x265_3.4.tar.gz/source/common/scaler.cpp Added
201
 
1
@@ -0,0 +1,1110 @@
2
+/*****************************************************************************
3
+* Copyright (C) 2013-2020 MulticoreWare, Inc
4
+*
5
+* Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+*
7
+* This program is free software; you can redistribute it and/or modify
8
+* it under the terms of the GNU General Public License as published by
9
+* the Free Software Foundation; either version 2 of the License, or
10
+* (at your option) any later version.
11
+*
12
+* This program is distributed in the hope that it will be useful,
13
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+* GNU General Public License for more details.
16
+*
17
+* You should have received a copy of the GNU General Public License
18
+* along with this program; if not, write to the Free Software
19
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
+*
21
+* This program is also available under a commercial proprietary license.
22
+* For more information, contact us at license @ x265.com.
23
+*****************************************************************************/
24
+
25
+#include "scaler.h"
26
+
27
+#if _MSC_VER
28
+#pragma warning(disable: 4706) // assignment within conditional
29
+#pragma warning(disable: 4244) // '=' : possible loss of data
30
+#endif
31
+
32
+#define SHORT_MIN (-(1 << 15))
33
+#define SHORT_MAX ((1 << 15) - 1)
34
+#define SHORT_MAX_10 ((1 << 10) - 1)
35
+
36
+namespace X265_NS{
37
+
38
+ScalerFilterManager::ScalerFilterManager() :
39
+    m_bitDepth(0),
40
+    m_algorithmFlags(0),
41
+    m_srcW(0),
42
+    m_srcH(0),
43
+    m_dstW(0),
44
+    m_dstH(0),
45
+    m_crSrcW(0),
46
+    m_crSrcH(0),
47
+    m_crDstW(0),
48
+    m_crDstH(0),
49
+    m_crSrcHSubSample(0),
50
+    m_crSrcVSubSample(0),
51
+    m_crDstHSubSample(0),
52
+    m_crDstVSubSample(0)
53
+{
54
+    for (int i = 0; i < m_numSlice; i++)
55
+        m_slices[i] = NULL;
56
+    for (int i = 0; i < m_numFilter; i++)
57
+        m_ScalerFilters[i] = NULL;
58
+}
59
+
60
+inline static void filter_copy_c(int64_t* filter, int64_t* filter2, int size)
61
+{
62
+    for (int i = 0; i < size; i++)
63
+        filter2[i] = filter[i];
64
+}
65
+
66
+#if X265_DEPTH == 8
67
+static void doScaling_c(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
68
+{
69
+    for (int i = 0; i < dstW; i++)
70
+    {
71
+        int val = 0;
72
+        int sourcePos = filterPos[i];
73
+        for (int j = 0; j < filterSize; j++)
74
+            val += ((int)src[sourcePos + j]) * filter[filterSize * i + j];
75
+        // the cubic equation does overflow ...
76
+        dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 7);
77
+    }
78
+}
79
+static uint8_t clipUint8(int a)
80
+{
81
+    if (a&(~0xFF))
82
+        return (-a) >> 31;
83
+    else
84
+        return a;
85
+}
86
+
87
+static void yuv2PlaneX_c(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
88
+{
89
+    for (int i = 0; i < dstW; i++)
90
+    {
91
+        int val = 64 << 12;
92
+        for (int j = 0; j < filterSize; j++)
93
+            val += src[j][i] * filter[j];
94
+        dest[i] = clipUint8(val >> 19);
95
+    }
96
+}
97
+#else
98
+static void yuv2PlaneX_c_h(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
99
+{
100
+    for (int i = 0; i < dstW; i++)
101
+    {
102
+        int val = 1 << 16;
103
+        uint16_t* dst16bit = (uint16_t *)dest;
104
+        for (int j = 0; j < filterSize; j++)
105
+            val += src[j][i] * filter[j];
106
+        uint16_t d = x265_clip3(0, SHORT_MAX_10, val >> 17);
107
+        ((uint8_t*)(&dst16bit[i]))[0] = (d);
108
+        ((uint8_t*)(&dst16bit[i]))[1] = (d) >> 8;
109
+    }
110
+}
111
+static void doScaling_c_h(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
112
+{
113
+    const uint16_t *srcLocal = (const uint16_t *)src;
114
+    for (int i = 0; i < dstW; i++)
115
+    {
116
+        int val = 0;
117
+        int sourcePos = filterPos[i];
118
+        for (int j = 0; j < filterSize; j++)
119
+            val += ((int)srcLocal[sourcePos + j]) * filter[filterSize * i + j];
120
+        // the cubic equation does overflow
121
+        dst[i] = x265_clip3(SHORT_MIN, SHORT_MAX, val >> 9);
122
+    }
123
+}
124
+#endif
125
+
126
+ScalerFilter::ScalerFilter() :
127
+    m_filtLen(0),
128
+    m_filtPos(NULL),
129
+    m_filt(NULL),
130
+    m_sourceSlice(NULL),
131
+    m_destSlice(NULL)
132
+{
133
+}
134
+
135
+ScalerFilter::~ScalerFilter()
136
+{
137
+    if (m_filtPos) {
138
+        delete[] m_filtPos; m_filtPos = NULL;
139
+    }
140
+    if (m_filt) {
141
+        delete[] m_filt; m_filt = NULL;
142
+    }
143
+}
144
+
145
+void ScalerHLumFilter::process(int sliceVer, int sliceHor)
146
+{
147
+    uint8_t ** src = m_sourceSlice->m_plane[0].lineBuf;
148
+    uint8_t ** dst = m_destSlice->m_plane[0].lineBuf;
149
+    int sourcePos = sliceVer - m_sourceSlice->m_plane[0].sliceVer;
150
+    int destPos = sliceVer - m_destSlice->m_plane[0].sliceVer;
151
+    int dstW = m_destSlice->m_width;
152
+    for (int i = 0; i < sliceHor; ++i)
153
+    {
154
+        m_hFilterScaler->doScaling((int16_t*)dst[destPos + i], dstW, (const uint8_t *)src[sourcePos + i], m_filt, m_filtPos, m_filtLen);
155
+        m_destSlice->m_plane[0].sliceHor += 1;
156
+    }
157
+}
158
+
159
+void ScalerHCrFilter::process(int sliceVer, int sliceHor)
160
+{
161
+    uint8_t ** src1 = m_sourceSlice->m_plane[1].lineBuf;
162
+    uint8_t ** dst1 = m_destSlice->m_plane[1].lineBuf;
163
+    uint8_t ** src2 = m_sourceSlice->m_plane[2].lineBuf;
164
+    uint8_t ** dst2 = m_destSlice->m_plane[2].lineBuf;
165
+
166
+    int sourcePos1 = sliceVer - m_sourceSlice->m_plane[1].sliceVer;
167
+    int destPos1 = sliceVer - m_destSlice->m_plane[1].sliceVer;
168
+    int sourcePos2 = sliceVer - m_sourceSlice->m_plane[2].sliceVer;
169
+    int destPos2 = sliceVer - m_destSlice->m_plane[2].sliceVer;
170
+
171
+    int dstW = m_destSlice->m_width >> m_destSlice->m_hCrSubSample;
172
+
173
+    for (int i = 0; i < sliceHor; ++i)
174
+    {
175
+        m_hFilterScaler->doScaling((int16_t*)dst1[destPos1 + i], dstW, src1[sourcePos1 + i], m_filt, m_filtPos, m_filtLen);
176
+        m_hFilterScaler->doScaling((int16_t*)dst2[destPos2 + i], dstW, src2[sourcePos2 + i], m_filt, m_filtPos, m_filtLen);
177
+        m_destSlice->m_plane[1].sliceHor += 1;
178
+        m_destSlice->m_plane[2].sliceHor += 1;
179
+    }
180
+}
181
+
182
+void VFilterScaler8Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
183
+{
184
+    int IdxW = FACTOR_4;
185
+    int IdxF = FIL_DEF;
186
+
187
+    (dstW % 4 == 0) && (filterSize == 6) && (IdxF = FIL_6) && (IdxW = FACTOR_4);
188
+    (dstW % 4 == 0) && (filterSize == 8) && (IdxF = FIL_8) && (IdxW = FACTOR_4);
189
+
190
+#if X265_DEPTH == 8
191
+    yuv2PlaneX_c(filter, filterSize, src, dest, dstW);
192
+#else
193
+    yuv2PlaneX_c_h(filter, filterSize, src, dest, dstW);
194
+#endif
195
+}
196
+
197
+void VFilterScaler10Bit::yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW)
198
+{
199
+    int IdxW = FACTOR_4;
200
+    int IdxF = FIL_DEF;
201
x265_3.4.tar.gz/source/common/scaler.h Added
201
 
1
@@ -0,0 +1,254 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2013-2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Pooja Venkatesan <pooja@multicorewareinc.com>
6
+ *
7
+ * This program is free software; you can redistribute it and/or modify
8
+ * it under the terms of the GNU General Public License as published by
9
+ * the Free Software Foundation; either version 2 of the License, or
10
+ * (at your option) any later version.
11
+ *
12
+ * This program is distributed in the hope that it will be useful,
13
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
+ * GNU General Public License for more details.
16
+ *
17
+ * You should have received a copy of the GNU General Public License
18
+ * along with this program; if not, write to the Free Software
19
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20
+ *
21
+ * This program is also available under a commercial proprietary license.
22
+ * For more information, contact us at license @ x265.com.
23
+ *****************************************************************************/
24
+
25
+#ifndef X265_SCALER_H
26
+#define X265_SCALER_H
27
+
28
+#include "common.h"
29
+
30
+namespace X265_NS {
31
+//x265 private namespace
32
+
33
+class ScalerSlice;
34
+class VideoDesc;
35
+
36
+#define MAX_NUM_LINES_AHEAD 4
37
+#define SCALER_ALIGN(x, j) (((x)+(j)-1)&~((j)-1))
38
+#define X265_ABS(j) ((j) >= 0 ? (j) : (-(j)))
39
+#define SCALER_MAX_REDUCE_CUTOFF 0.002
40
+#define SCALER_BITEXACT  0x80000
41
+#define ROUNDED_DIVISION(i,j) (((i)>0 ? (i) + ((j)>>1) : (i) - ((j)>>1))/(j))
42
+#define UH_CEIL_SHIFTR(i,j) (!scale_builtin_constant_p(j) ? -((-(i)) >> (j)) \
43
+                                                          : ((i) + (1<<(j)) - 1) >> (j))
44
+
45
+#if defined(__GNUC__) || defined(__clang__)
46
+#    define scale_builtin_constant_p __builtin_constant_p
47
+#else
48
+#    define scale_builtin_constant_p(x) 0
49
+#endif
50
+
51
+enum ResFactor
52
+{
53
+    RES_FACTOR_64, RES_FACTOR_32, RES_FACTOR_16, RES_FACTOR_8,
54
+    RES_FACTOR_4, RES_FACTOR_DEF, NUM_RES_FACTOR
55
+};
56
+
57
+enum ScalerFactor
58
+{
59
+    FACTOR_4, FACTOR_8, NUM_FACTOR
60
+};
61
+
62
+enum FilterSize
63
+{
64
+    FIL_4, FIL_6, FIL_8, FIL_9, FIL_10, FIL_11, FIL_13, FIL_15,
65
+    FIL_16, FIL_17, FIL_19, FIL_22, FIL_24, FIL_DEF, NUM_FIL
66
+};
67
+
68
+class ScalerFilter {
69
+public:
70
+    int             m_filtLen;
71
+    int32_t*        m_filtPos;      // Array of horizontal/vertical starting pos for each dst for luma / chroma planes.
72
+    int16_t*        m_filt;         // Array of horizontal/vertical filter coefficients for luma / chroma planes.
73
+    ScalerSlice*    m_sourceSlice;  // Source slice
74
+    ScalerSlice*    m_destSlice;    // Output slice
75
+    ScalerFilter();
76
+    virtual ~ScalerFilter();
77
+    virtual void process(int sliceVer, int sliceHor) = 0;
78
+    int initCoeff(int flag, int inc, int srcW, int dstW, int filtAlign, int one, int sourcePos, int destPos);
79
+    void setSlice(ScalerSlice* source, ScalerSlice* dest) { m_sourceSlice = source; m_destSlice = dest; }
80
+};
81
+
82
+class VideoDesc {
83
+public:
84
+    int         m_width;
85
+    int         m_height;
86
+    int         m_csp;
87
+    int         m_inputDepth;
88
+
89
+    VideoDesc(int w, int h, int csp, int bitDepth)
90
+    {
91
+        m_width = w;
92
+        m_height = h;
93
+        m_csp = csp;
94
+        m_inputDepth = bitDepth;
95
+    }
96
+};
97
+
98
+typedef struct ScalerPlane
99
+{
100
+    int       availLines; // max number of lines that can be held by this plane
101
+    int       sliceVer;   // index of first line
102
+    int       sliceHor;   // number of lines
103
+    uint8_t** lineBuf;    // line buffer
104
+} ScalerPlane;
105
+
106
+// Assist horizontal filtering, base class
107
+class HFilterScaler {
108
+public:
109
+    int m_bitDepth;
110
+public:
111
+    HFilterScaler() :m_bitDepth(0) {};
112
+    virtual ~HFilterScaler() {};
113
+    virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize) = 0;
114
+};
115
+
116
+// Assist vertical filtering, base class
117
+class VFilterScaler {
118
+public:
119
+    int m_bitDepth;
120
+public:
121
+    VFilterScaler() :m_bitDepth(0) {};
122
+    virtual ~VFilterScaler() {};
123
+    virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW) = 0;
124
+};
125
+
126
+//  Assist horizontal filtering, process 8 bit case
127
+class HFilterScaler8Bit : public HFilterScaler {
128
+public:
129
+    HFilterScaler8Bit() { m_bitDepth = 8; }
130
+    virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize);
131
+};
132
+
133
+//  Assist horizontal filtering, process 10 bit case
134
+class HFilterScaler10Bit : public HFilterScaler {
135
+public:
136
+    HFilterScaler10Bit() { m_bitDepth = 10; }
137
+    virtual void doScaling(int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize);
138
+};
139
+
140
+//  Assist vertical filtering, process 8 bit case
141
+class VFilterScaler8Bit : public VFilterScaler {
142
+public:
143
+    VFilterScaler8Bit() { m_bitDepth = 8; }
144
+    virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW);
145
+};
146
+
147
+//  Assist vertical filtering, process 10 bit case
148
+class VFilterScaler10Bit : public VFilterScaler {
149
+public:
150
+    VFilterScaler10Bit() { m_bitDepth = 10; }
151
+    virtual void yuv2PlaneX(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW);
152
+};
153
+
154
+// Horizontal filter for luma
155
+class ScalerHLumFilter : public ScalerFilter {
156
+private:
157
+    HFilterScaler* m_hFilterScaler;
158
+public:
159
+    ScalerHLumFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;}
160
+    ~ScalerHLumFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); }
161
+    virtual void process(int sliceVer, int sliceHor);
162
+};
163
+
164
+// Horizontal filter for chroma
165
+class ScalerHCrFilter : public ScalerFilter {
166
+private:
167
+    HFilterScaler* m_hFilterScaler;
168
+public:
169
+    ScalerHCrFilter(int bitDepth) { bitDepth == 8 ? m_hFilterScaler = new HFilterScaler8Bit : bitDepth == 10 ? m_hFilterScaler = new HFilterScaler10Bit : NULL;}
170
+    ~ScalerHCrFilter() { if (m_hFilterScaler) X265_FREE(m_hFilterScaler); }
171
+    virtual void process(int sliceVer, int sliceHor);
172
+};
173
+
174
+// Vertical filter for luma
175
+class ScalerVLumFilter : public ScalerFilter {
176
+private:
177
+    VFilterScaler* m_vFilterScaler;
178
+public:
179
+    ScalerVLumFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;}
180
+    ~ScalerVLumFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); }
181
+    virtual void process(int sliceVer, int sliceHor);
182
+};
183
+
184
+// Vertical filter for chroma
185
+class ScalerVCrFilter : public ScalerFilter {
186
+private:
187
+    VFilterScaler*    m_vFilterScaler;
188
+public:
189
+    ScalerVCrFilter(int bitDepth) { bitDepth == 8 ? m_vFilterScaler = new VFilterScaler8Bit : bitDepth == 10 ? m_vFilterScaler = new VFilterScaler10Bit : NULL;}
190
+    ~ScalerVCrFilter() { if (m_vFilterScaler) X265_FREE(m_vFilterScaler); }
191
+    virtual void process(int sliceVer, int sliceHor);
192
+};
193
+
194
+class ScalerSlice
195
+{
196
+private:
197
+    enum ScalerSlicePlaneNum { m_numSlicePlane = 4 };
198
+public:
199
+    int m_width;        // Slice line width
200
+    int m_hCrSubSample; // horizontal Chroma subsampling factor
201
x265_3.3.tar.gz/source/common/threading.h -> x265_3.4.tar.gz/source/common/threading.h Changed
31
 
1
@@ -238,6 +238,14 @@
2
         LeaveCriticalSection(&m_cs);
3
     }
4
 
5
+    void decr()
6
+    {
7
+        EnterCriticalSection(&m_cs);
8
+        m_val--;
9
+        WakeAllConditionVariable(&m_cv);
10
+        LeaveCriticalSection(&m_cs);
11
+    }
12
+
13
 protected:
14
 
15
     CRITICAL_SECTION   m_cs;
16
@@ -436,6 +444,14 @@
17
         pthread_mutex_unlock(&m_mutex);
18
     }
19
 
20
+    void decr()
21
+    {
22
+        pthread_mutex_lock(&m_mutex);
23
+        m_val--;
24
+        pthread_cond_broadcast(&m_cond);
25
+        pthread_mutex_unlock(&m_mutex);
26
+    }
27
+
28
 protected:
29
 
30
     pthread_mutex_t m_mutex;
31
x265_3.3.tar.gz/source/encoder/analysis.cpp -> x265_3.4.tar.gz/source/encoder/analysis.cpp Changed
151
 
1
@@ -1272,7 +1272,7 @@
2
                     md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
3
                     checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
4
 
5
-                    skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
6
+                    skipRecursion = !!m_param->recursionSkipMode && md.bestMode;
7
                     if (m_param->rdLevel)
8
                         skipModes = m_param->bEnableEarlySkip && md.bestMode;
9
                 }
10
@@ -1296,7 +1296,7 @@
11
                     md.pred[PRED_SKIP].cu.initSubCU(parentCTU, cuGeom, qp);
12
                     checkMerge2Nx2N_rd0_4(md.pred[PRED_SKIP], md.pred[PRED_MERGE], cuGeom);
13
 
14
-                    skipRecursion = !!m_param->bEnableRecursionSkip && md.bestMode;
15
+                    skipRecursion = !!m_param->recursionSkipMode && md.bestMode;
16
                     if (m_param->rdLevel)
17
                         skipModes = m_param->bEnableEarlySkip && md.bestMode;
18
                 }
19
@@ -1314,15 +1314,23 @@
20
                 skipModes = (m_param->bEnableEarlySkip || m_refineLevel == 2)
21
                 && md.bestMode && md.bestMode->cu.isSkipped(0); // TODO: sa8d threshold per depth
22
         }
23
-        if (md.bestMode && m_param->bEnableRecursionSkip && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))
24
+        if (md.bestMode && m_param->recursionSkipMode && !bCtuInfoCheck && !(m_param->bAnalysisType == AVC_INFO && m_param->analysisLoadReuseLevel == 7 && (m_modeFlag[0] || m_modeFlag[1])))
25
         {
26
             skipRecursion = md.bestMode->cu.isSkipped(0);
27
-            if (mightSplit && depth >= minDepth && !skipRecursion)
28
+            if (mightSplit && !skipRecursion)
29
             {
30
-                if (depth)
31
-                    skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
32
-                if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
33
+                if (depth >= minDepth && m_param->recursionSkipMode == RDCOST_BASED_RSKIP)
34
+                {
35
+                    if (depth)
36
+                        skipRecursion = recursionDepthCheck(parentCTU, cuGeom, *md.bestMode);
37
+                    if (m_bHD && !skipRecursion && m_param->rdLevel == 2 && md.fencYuv.m_size != MAX_CU_SIZE)
38
+                        skipRecursion = complexityCheckCU(*md.bestMode);
39
+                }
40
+                else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
41
+                {
42
                     skipRecursion = complexityCheckCU(*md.bestMode);
43
+                }
44
+
45
             }
46
         }
47
         if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7)
48
@@ -1972,7 +1980,7 @@
49
                     checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
50
                     checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
51
 
52
-                    if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
53
+                    if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode)
54
                         skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
55
                 }
56
                 if (m_param->analysisLoadReuseLevel > 4 && m_reusePartSize[cuGeom.absPartIdx] == SIZE_2Nx2N)
57
@@ -1996,7 +2004,7 @@
58
                     checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
59
                     checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
60
 
61
-                    if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
62
+                    if (m_param->recursionSkipMode && depth && m_modeDepth[depth - 1].bestMode)
63
                         skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
64
                 }
65
             }
66
@@ -2015,8 +2023,10 @@
67
             checkInter_rd5_6(md.pred[PRED_2Nx2N], cuGeom, SIZE_2Nx2N, refMasks);
68
             checkBestMode(md.pred[PRED_2Nx2N], cuGeom.depth);
69
 
70
-            if (m_param->bEnableRecursionSkip && depth && m_modeDepth[depth - 1].bestMode)
71
+            if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP && depth && m_modeDepth[depth - 1].bestMode)
72
                 skipRecursion = md.bestMode && !md.bestMode->cu.getQtRootCbf(0);
73
+            else if (cuGeom.log2CUSize >= MAX_LOG2_CU_SIZE - 1 && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
74
+                skipRecursion = md.bestMode && complexityCheckCU(*md.bestMode);
75
         }
76
         if (m_param->bAnalysisType == AVC_INFO && md.bestMode && cuGeom.numPartitions <= 16 && m_param->analysisLoadReuseLevel == 7)
77
             skipRecursion = true;
78
@@ -3525,27 +3535,47 @@
79
 
80
 bool Analysis::complexityCheckCU(const Mode& bestMode)
81
 {
82
-    uint32_t mean = 0;
83
-    uint32_t homo = 0;
84
-    uint32_t cuSize = bestMode.fencYuv->m_size;
85
-    for (uint32_t y = 0; y < cuSize; y++) {
86
-        for (uint32_t x = 0; x < cuSize; x++) {
87
-            mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]);
88
+    if (m_param->recursionSkipMode == RDCOST_BASED_RSKIP)
89
+    {
90
+        uint32_t mean = 0;
91
+        uint32_t homo = 0;
92
+        uint32_t cuSize = bestMode.fencYuv->m_size;
93
+        for (uint32_t y = 0; y < cuSize; y++) {
94
+            for (uint32_t x = 0; x < cuSize; x++) {
95
+                mean += (bestMode.fencYuv->m_buf[0][y * cuSize + x]);
96
+            }
97
         }
98
-    }
99
-    mean = mean / (cuSize * cuSize);
100
-    for (uint32_t y = 0 ; y < cuSize; y++){
101
-        for (uint32_t x = 0 ; x < cuSize; x++){
102
-            homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean));
103
+        mean = mean / (cuSize * cuSize);
104
+        for (uint32_t y = 0; y < cuSize; y++) {
105
+            for (uint32_t x = 0; x < cuSize; x++) {
106
+                homo += abs(int(bestMode.fencYuv->m_buf[0][y * cuSize + x] - mean));
107
+            }
108
         }
109
-    }
110
-    homo = homo / (cuSize * cuSize);
111
+        homo = homo / (cuSize * cuSize);
112
 
113
-    if (homo < (.1 * mean))
114
-        return true;
115
+        if (homo < (.1 * mean))
116
+            return true;
117
 
118
-    return false;
119
-}
120
+        return false;
121
+    }
122
+    else
123
+    {
124
+        int blockType = bestMode.cu.m_log2CUSize[0] - LOG2_UNIT_SIZE;
125
+        int shift = bestMode.cu.m_log2CUSize[0] * LOG2_UNIT_SIZE;
126
+        intptr_t stride = m_frame->m_fencPic->m_stride;
127
+        intptr_t blockOffsetLuma = bestMode.cu.m_cuPelX + bestMode.cu.m_cuPelY * stride;
128
+        uint64_t sum_ss = primitives.cu[blockType].var(m_frame->m_edgeBitPic + blockOffsetLuma, stride);
129
+        uint32_t sum = (uint32_t)sum_ss;
130
+        uint32_t ss = (uint32_t)(sum_ss >> 32);
131
+        uint32_t pixelCount = 1 << shift;
132
+        double cuEdgeVariance = (ss - ((double)sum * sum / pixelCount)) / pixelCount;
133
+
134
+        if (cuEdgeVariance > (double)m_param->edgeVarThreshold)
135
+            return false;
136
+        else
137
+            return true;
138
+    }
139
+ }
140
 
141
 uint32_t Analysis::calculateCUVariance(const CUData& ctu, const CUGeom& cuGeom)
142
 {
143
@@ -3570,7 +3600,6 @@
144
             cnt++;
145
         }
146
     }
147
-    
148
     return cuVariance / cnt;
149
 }
150
 
151
x265_3.3.tar.gz/source/encoder/analysis.h -> x265_3.4.tar.gz/source/encoder/analysis.h Changed
18
 
1
@@ -52,7 +52,7 @@
2
         splitRefs = 0;
3
         mvCost[0] = 0; // L0
4
         mvCost[1] = 0; // L1
5
-        sa8dCost    = 0;
6
+        sa8dCost  = 0;
7
     }
8
 };
9
 
10
@@ -120,7 +120,6 @@
11
 
12
     Mode& compressCTU(CUData& ctu, Frame& frame, const CUGeom& cuGeom, const Entropy& initialContext);
13
     int32_t loadTUDepth(CUGeom cuGeom, CUData parentCTU);
14
-
15
 protected:
16
     /* Analysis data for save/load mode, writes/reads data based on absPartIdx */
17
     x265_analysis_inter_data*  m_reuseInterDataCTU;
18
x265_3.3.tar.gz/source/encoder/api.cpp -> x265_3.4.tar.gz/source/encoder/api.cpp Changed
35
 
1
@@ -1016,12 +1016,12 @@
2
 
3
 void x265_zone_free(x265_param *param)
4
 {
5
-    if (param && param->rc.zonefileCount) {
6
+    if (param && param->rc.zones && (param->rc.zoneCount || param->rc.zonefileCount))
7
+    {
8
         for (int i = 0; i < param->rc.zonefileCount; i++)
9
             x265_free(param->rc.zones[i].zoneParam);
10
-    }
11
-    if (param && (param->rc.zoneCount || param->rc.zonefileCount))
12
         x265_free(param->rc.zones);
13
+    }
14
 }
15
 
16
 static const x265_api libapi =
17
@@ -1294,6 +1294,8 @@
18
                     fprintf(csvfp, "RateFactor, ");
19
                 if (param->rc.vbvBufferSize)
20
                     fprintf(csvfp, "BufferFill, BufferFillFinal, ");
21
+                if (param->rc.vbvBufferSize && param->csvLogLevel >= 2)
22
+                    fprintf(csvfp, "UnclippedBufferFillFinal, ");
23
                 if (param->bEnablePsnr)
24
                     fprintf(csvfp, "Y PSNR, U PSNR, V PSNR, YUV PSNR, ");
25
                 if (param->bEnableSsim)
26
@@ -1405,6 +1407,8 @@
27
         fprintf(param->csvfpt, "%.3lf,", frameStats->rateFactor);
28
     if (param->rc.vbvBufferSize)
29
         fprintf(param->csvfpt, "%.3lf, %.3lf,", frameStats->bufferFill, frameStats->bufferFillFinal);
30
+    if (param->rc.vbvBufferSize && param->csvLogLevel >= 2)
31
+        fprintf(param->csvfpt, "%.3lf,", frameStats->unclippedBufferFillFinal);
32
     if (param->bEnablePsnr)
33
         fprintf(param->csvfpt, "%.3lf, %.3lf, %.3lf, %.3lf,", frameStats->psnrY, frameStats->psnrU, frameStats->psnrV, frameStats->psnr);
34
     if (param->bEnableSsim)
35
x265_3.3.tar.gz/source/encoder/encoder.cpp -> x265_3.4.tar.gz/source/encoder/encoder.cpp Changed
201
 
1
@@ -218,10 +218,7 @@
2
 
3
     if (m_param->bHistBasedSceneCut)
4
     {
5
-        for (int i = 0; i < x265_cli_csps[m_param->internalCsp].planes; i++)
6
-        {
7
-            m_planeSizes[i] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[i]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[i]);
8
-        }
9
+        m_planeSizes[0] = (m_param->sourceWidth >> x265_cli_csps[p->internalCsp].width[0]) * (m_param->sourceHeight >> x265_cli_csps[m_param->internalCsp].height[0]);
10
         uint32_t pixelbytes = m_param->internalBitDepth > 8 ? 2 : 1;
11
         m_edgePic = X265_MALLOC(pixel, m_planeSizes[0] * pixelbytes);
12
         m_edgeHistThreshold = m_param->edgeTransitionThreshold;
13
@@ -1443,9 +1440,9 @@
14
     int32_t planeCount = x265_cli_csps[m_param->internalCsp].planes;
15
     memset(m_edgePic, 0, bufSize);
16
 
17
-    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false))
18
+    if (!computeEdge(m_edgePic, src, NULL, pic->width, pic->height, pic->width, false, 1))
19
     {
20
-        x265_log(m_param, X265_LOG_ERROR, "Failed edge computation!");
21
+        x265_log(m_param, X265_LOG_ERROR, "Failed to compute edge!");
22
         return false;
23
     }
24
 
25
@@ -1605,6 +1602,14 @@
26
         if (m_param->bHistBasedSceneCut && pic_in)
27
         {
28
             x265_picture *pic = (x265_picture *) pic_in;
29
+
30
+            if (pic->poc == 0)
31
+            {
32
+                /* for entire encode compute the chroma plane sizes only once */
33
+                for (int i = 1; i < x265_cli_csps[m_param->internalCsp].planes; i++)
34
+                    m_planeSizes[i] = (pic->width >> x265_cli_csps[m_param->internalCsp].width[i]) * (pic->height >> x265_cli_csps[m_param->internalCsp].height[i]);
35
+            }
36
+
37
             if (computeHistograms(pic))
38
             {
39
                 double maxUVSad = 0.0, edgeSad = 0.0;
40
@@ -1752,6 +1757,12 @@
41
                         }
42
                     }
43
                 }
44
+                if (m_param->recursionSkipMode == EDGE_BASED_RSKIP && m_param->bHistBasedSceneCut)
45
+                {
46
+                    pixel* src = m_edgePic;
47
+                    primitives.planecopy_pp_shr(src, inFrame->m_fencPic->m_picWidth, inFrame->m_edgeBitPic, inFrame->m_fencPic->m_stride,
48
+                        inFrame->m_fencPic->m_picWidth, inFrame->m_fencPic->m_picHeight, 0);
49
+                }
50
             }
51
             else
52
             {
53
@@ -2414,7 +2425,7 @@
54
         encParam->maxNumReferences = param->maxNumReferences; // never uses more refs than specified in stream headers
55
         encParam->bEnableFastIntra = param->bEnableFastIntra;
56
         encParam->bEnableEarlySkip = param->bEnableEarlySkip;
57
-        encParam->bEnableRecursionSkip = param->bEnableRecursionSkip;
58
+        encParam->recursionSkipMode = param->recursionSkipMode;
59
         encParam->searchMethod = param->searchMethod;
60
         /* Scratch buffer prevents me_range from being increased for esa/tesa */
61
         if (param->searchRange < encParam->searchRange)
62
@@ -3006,6 +3017,8 @@
63
             frameStats->ipCostRatio = curFrame->m_lowres.ipCostRatio;
64
         frameStats->bufferFill = m_rateControl->m_bufferFillActual;
65
         frameStats->bufferFillFinal = m_rateControl->m_bufferFillFinal;
66
+        if (m_param->csvLogLevel >= 2)
67
+            frameStats->unclippedBufferFillFinal = m_rateControl->m_unclippedBufferFillFinal;
68
         frameStats->frameLatency = inPoc - poc;
69
         if (m_param->rc.rateControlMode == X265_RC_CRF)
70
             frameStats->rateFactor = curEncData.m_rateFactor;
71
@@ -3400,7 +3413,7 @@
72
         p->maxNumReferences = zone->maxNumReferences;
73
         p->bEnableFastIntra = zone->bEnableFastIntra;
74
         p->bEnableEarlySkip = zone->bEnableEarlySkip;
75
-        p->bEnableRecursionSkip = zone->bEnableRecursionSkip;
76
+        p->recursionSkipMode = zone->recursionSkipMode;
77
         p->searchMethod = zone->searchMethod;
78
         p->searchRange = zone->searchRange;
79
         p->subpelRefine = zone->subpelRefine;
80
@@ -3681,20 +3694,6 @@
81
     if (p->analysisLoad && !p->analysisLoadReuseLevel)
82
         p->analysisLoadReuseLevel = 5;
83
 
84
-    if ((p->bAnalysisType == DEFAULT) && p->rc.cuTree)
85
-    {
86
-        if (p->analysisSaveReuseLevel && p->analysisSaveReuseLevel < 10)
87
-        {
88
-            x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-save-reuse-level 10, Disabling cu-tree\n");
89
-            p->rc.cuTree = 0;
90
-        }
91
-        if (p->analysisLoadReuseLevel && p->analysisLoadReuseLevel < 10)
92
-        {
93
-            x265_log(p, X265_LOG_WARNING, "cu-tree works only with analysis-load-reuse-level 10, Disabling cu-tree\n");
94
-            p->rc.cuTree = 0;
95
-        }
96
-    }
97
-
98
     if ((p->analysisLoad || p->analysisSave) && (p->bDistributeModeAnalysis || p->bDistributeMotionEstimation))
99
     {
100
         x265_log(p, X265_LOG_WARNING, "Analysis load/save options incompatible with pmode/pme, Disabling pmode/pme\n");
101
@@ -3867,29 +3866,30 @@
102
         }
103
         else
104
         {
105
-            if (fread(&m_conformanceWindow.rightOffset, sizeof(int), 1, m_analysisFileIn) != 1)
106
+            int rightOffset, bottomOffset;
107
+            if (fread(&rightOffset, sizeof(int), 1, m_analysisFileIn) != 1)
108
             {
109
                 x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window right offset missing\n");
110
                 m_aborted = true;
111
             }
112
-            else if (m_conformanceWindow.rightOffset && p->analysisLoadReuseLevel > 1)
113
+            else if (rightOffset && p->analysisLoadReuseLevel > 1)
114
             {
115
                 int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor;
116
-                padsize = m_conformanceWindow.rightOffset * scaleFactor;
117
+                padsize = rightOffset * scaleFactor;
118
                 p->sourceWidth += padsize;
119
                 m_conformanceWindow.bEnabled = true;
120
                 m_conformanceWindow.rightOffset = padsize;
121
             }
122
 
123
-            if (fread(&m_conformanceWindow.bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1)
124
+            if (fread(&bottomOffset, sizeof(int), 1, m_analysisFileIn) != 1)
125
             {
126
                 x265_log(NULL, X265_LOG_ERROR, "Error reading analysis data. Conformance window bottom offset missing\n");
127
                 m_aborted = true;
128
             }
129
-            else if (m_conformanceWindow.bottomOffset && p->analysisLoadReuseLevel > 1)
130
+            else if (bottomOffset && p->analysisLoadReuseLevel > 1)
131
             {
132
                 int scaleFactor = p->scaleFactor < 2 ? 1 : p->scaleFactor;
133
-                padsize = m_conformanceWindow.bottomOffset * scaleFactor;
134
+                padsize = bottomOffset * scaleFactor;
135
                 p->sourceHeight += padsize;
136
                 m_conformanceWindow.bEnabled = true;
137
                 m_conformanceWindow.bottomOffset = padsize;
138
@@ -4196,7 +4196,7 @@
139
         x265_log(p, X265_LOG_WARNING, "Radl requires fixed gop-length (keyint == min-keyint). Disabling radl.\n");
140
     }
141
 
142
-    if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP)
143
+    if ((p->chunkStart || p->chunkEnd) && p->bOpenGOP && m_param->bResetZoneConfig)
144
     {
145
         p->chunkStart = p->chunkEnd = 0;
146
         x265_log(p, X265_LOG_WARNING, "Chunking requires closed gop structure. Disabling chunking.\n");
147
@@ -4229,12 +4229,6 @@
148
         x265_log(p, X265_LOG_WARNING, "Turning on repeat - headers for zone encoding\n");
149
     }
150
 
151
-    if (!m_param->bResetZoneConfig && (p->keyframeMax != p->keyframeMin))
152
-        x265_log(p, X265_LOG_WARNING, "External zone reconfiguration requires a fixed GOP size to enable appropriate signaling of HRD info\n");
153
-
154
-    if (!m_param->bResetZoneConfig && (p->reconfigWindowSize != (uint64_t)p->keyframeMax))
155
-        x265_log(p, X265_LOG_WARNING, "Zone size must be multiple of GOP size to enable appropriate signaling of HRD info\n");
156
-
157
     if (m_param->bEnableHME)
158
     {
159
         if (m_param->sourceHeight < 540)
160
@@ -4311,18 +4305,27 @@
161
         }
162
     }
163
 
164
+    uint32_t numCUsLoad, numCUsInHeightLoad;
165
+
166
     /* Now arrived at the right frame, read the record */
167
     analysis->poc = poc;
168
     analysis->frameRecordSize = frameRecordSize;
169
     X265_FREAD(&analysis->sliceType, sizeof(int), 1, m_analysisFileIn, &(picData->sliceType));
170
     X265_FREAD(&analysis->bScenecut, sizeof(int), 1, m_analysisFileIn, &(picData->bScenecut));
171
     X265_FREAD(&analysis->satdCost, sizeof(int64_t), 1, m_analysisFileIn, &(picData->satdCost));
172
-    X265_FREAD(&analysis->numCUsInFrame, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame));
173
+    X265_FREAD(&numCUsLoad, sizeof(int), 1, m_analysisFileIn, &(picData->numCUsInFrame));
174
     X265_FREAD(&analysis->numPartitions, sizeof(int), 1, m_analysisFileIn, &(picData->numPartitions));
175
 
176
+    /* Update analysis info to save current settings */
177
+    uint32_t widthInCU = (m_param->sourceWidth + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize;
178
+    uint32_t heightInCU = (m_param->sourceHeight + m_param->maxCUSize - 1) >> m_param->maxLog2CUSize;
179
+    uint32_t numCUsInFrame = widthInCU * heightInCU;
180
+    analysis->numCUsInFrame = numCUsInFrame;
181
+    analysis->numCuInHeight = heightInCU;
182
+
183
     if (m_param->bDisableLookahead)
184
     {
185
-        X265_FREAD(&analysis->numCuInHeight, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight));
186
+        X265_FREAD(&numCUsInHeightLoad, sizeof(uint32_t), 1, m_analysisFileIn, &(picData->numCuInHeight));
187
         X265_FREAD(&analysis->lookahead, sizeof(x265_lookahead_data), 1, m_analysisFileIn, &(picData->lookahead));
188
     }
189
     int scaledNumPartition = analysis->numPartitions;
190
@@ -4335,16 +4338,16 @@
191
 
192
     if (m_param->ctuDistortionRefine == CTU_DISTORTION_INTERNAL)
193
     {
194
-        X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), analysis->numCUsInFrame, m_analysisFileIn, picDistortion);
195
+        X265_FREAD((analysis->distortionData)->ctuDistortion, sizeof(sse_t), numCUsLoad, m_analysisFileIn, picDistortion);
196
         computeDistortionOffset(analysis);
197
     }
198
     if (m_param->bDisableLookahead && m_rateControl->m_isVbv)
199
     {
200
         size_t vbvCount = m_param->lookaheadDepth + m_param->bframes + 2;
201
x265_3.3.tar.gz/source/encoder/frameencoder.cpp -> x265_3.4.tar.gz/source/encoder/frameencoder.cpp Changed
29
 
1
@@ -130,7 +130,7 @@
2
         {
3
             rowSum += sliceGroupSizeAccu;
4
             m_sliceBaseRow[++sidx] = i;
5
-        }        
6
+        }
7
     }
8
     X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
9
     m_sliceBaseRow[0] = 0;
10
@@ -448,6 +448,18 @@
11
     m_ssimCnt = 0;
12
     memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats));
13
 
14
+    if (!m_param->bHistBasedSceneCut && m_param->rc.aqMode != X265_AQ_EDGE && m_param->recursionSkipMode == EDGE_BASED_RSKIP)
15
+    {
16
+        int height = m_frame->m_fencPic->m_picHeight;
17
+        int width = m_frame->m_fencPic->m_picWidth;
18
+        intptr_t stride = m_frame->m_fencPic->m_stride;
19
+
20
+        if (!computeEdge(m_frame->m_edgeBitPic, m_frame->m_fencPic->m_picOrg[0], NULL, stride, height, width, false, 1))
21
+        {
22
+            x265_log(m_param, X265_LOG_ERROR, " Failed to compute edge !");
23
+        }
24
+    }
25
+
26
     /* Emit access unit delimiter unless this is the first frame and the user is
27
      * not repeating headers (since AUD is supposed to be the first NAL in the access
28
      * unit) */
29
x265_3.3.tar.gz/source/encoder/ratecontrol.cpp -> x265_3.4.tar.gz/source/encoder/ratecontrol.cpp Changed
173
 
1
@@ -269,7 +269,7 @@
2
         x265_log(m_param, X265_LOG_WARNING, "NAL HRD parameters require VBV parameters, ignored\n");
3
         m_param->bEmitHRDSEI = 0;
4
     }
5
-    m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && !m_2pass && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate;
6
+    m_isCbr = m_param->rc.rateControlMode == X265_RC_ABR && m_isVbv && m_param->rc.vbvMaxBitrate <= m_param->rc.bitrate;
7
     if (m_param->rc.bStrictCbr && !m_isCbr)
8
     {
9
         x265_log(m_param, X265_LOG_WARNING, "strict CBR set without CBR mode, ignored\n");
10
@@ -335,7 +335,7 @@
11
         int vbvBufferSize = m_param->rc.vbvBufferSize * 1000;
12
         int vbvMaxBitrate = m_param->rc.vbvMaxBitrate * 1000;
13
 
14
-        if (m_param->bEmitHRDSEI)
15
+        if (m_param->bEmitHRDSEI && !m_param->decoderVbvMaxRate)
16
         {
17
             const HRDInfo* hrd = &sps.vuiParameters.hrdParameters;
18
             vbvBufferSize = hrd->cpbSizeValue << (hrd->cpbSizeScale + CPB_SHIFT);
19
@@ -509,6 +509,7 @@
20
                 CMP_OPT_FIRST_PASS(" keyint", m_param->keyframeMax);
21
                 CMP_OPT_FIRST_PASS("scenecut", m_param->scenecutThreshold);
22
                 CMP_OPT_FIRST_PASS("intra-refresh", m_param->bIntraRefresh);
23
+                CMP_OPT_FIRST_PASS("frame-dup", m_param->bEnableFrameDuplication);
24
                 if (m_param->bMultiPassOptRPS)
25
                 {
26
                     CMP_OPT_FIRST_PASS("multi-pass-opt-rps", m_param->bMultiPassOptRPS);
27
@@ -546,7 +547,7 @@
28
                 x265_log(m_param, X265_LOG_WARNING, "2nd pass has fewer frames than 1st pass (%d vs %d)\n",
29
                          m_param->totalFrames, m_numEntries);
30
             }
31
-            if (m_param->totalFrames > m_numEntries)
32
+            if (m_param->totalFrames > m_numEntries && !m_param->bEnableFrameDuplication)
33
             {
34
                 x265_log(m_param, X265_LOG_ERROR, "2nd pass has more frames than 1st pass (%d vs %d)\n",
35
                          m_param->totalFrames, m_numEntries);
36
@@ -781,6 +782,10 @@
37
     // Init HRD
38
     HRDInfo* hrd = &sps.vuiParameters.hrdParameters;
39
     hrd->cbrFlag = m_isCbr;
40
+    if (m_param->reconfigWindowSize) {
41
+        hrd->cbrFlag = 0;
42
+        vbvMaxBitrate = m_param->decoderVbvMaxRate * 1000;
43
+    }
44
 
45
     // normalize HRD size and rate to the value / scale notation
46
     hrd->bitRateScale = x265_clip3(0, 15, calcScale(vbvMaxBitrate) - BR_SHIFT);
47
@@ -829,7 +834,7 @@
48
         /* weighted average of cplx of future frames */
49
         for (int j = 1; j < cplxBlur * 2 && j < m_numEntries - i; j++)
50
         {
51
-            int index = m_encOrder[i + j];
52
+            int index = i+j;
53
             RateControlEntry *rcj = &m_rce2Pass[index];
54
             weight *= 1 - pow(rcj->iCuCount / m_ncu, 2);
55
             if (weight < 0.0001)
56
@@ -842,7 +847,7 @@
57
         weight = 1.0;
58
         for (int j = 0; j <= cplxBlur * 2 && j <= i; j++)
59
         {
60
-            int index = m_encOrder[i - j];
61
+            int index = i-j;
62
             RateControlEntry *rcj = &m_rce2Pass[index];
63
             gaussianWeight = weight * exp(-j * j / 200.0);
64
             weightSum += gaussianWeight;
65
@@ -851,7 +856,7 @@
66
             if (weight < .0001)
67
                 break;
68
         }
69
-        m_rce2Pass[m_encOrder[i]].blurredComplexity = cplxSum / weightSum;
70
+        m_rce2Pass[i].blurredComplexity= cplxSum / weightSum;
71
     }
72
     CHECKED_MALLOC(qScale, double, m_numEntries);
73
     if (filterSize > 1)
74
@@ -870,7 +875,7 @@
75
     expectedBits = 1;
76
     for (int i = 0; i < m_numEntries; i++)
77
     {
78
-        RateControlEntry* rce = &m_rce2Pass[m_encOrder[i]];
79
+        RateControlEntry* rce = &m_rce2Pass[i];
80
         double q = getQScale(rce, 1.0);
81
         expectedBits += qScale2bits(rce, q);
82
         m_lastQScaleFor[rce->sliceType] = q;
83
@@ -893,15 +898,15 @@
84
         /* find qscale */
85
         for (int i = 0; i < m_numEntries; i++)
86
         {
87
-            RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]];
88
+            RateControlEntry *rce = &m_rce2Pass[i];
89
             qScale[i] = getQScale(rce, rateFactor);
90
             m_lastQScaleFor[rce->sliceType] = qScale[i];
91
         }
92
 
93
         /* fixed I/B qscale relative to P */
94
-        for (int i = m_numEntries - 1; i >= 0; i--)
95
+        for (int i = 0; i < m_numEntries; i++)
96
         {
97
-            qScale[i] = getDiffLimitedQScale(&m_rce2Pass[m_encOrder[i]], qScale[i]);
98
+            qScale[i] = getDiffLimitedQScale(&m_rce2Pass[i], qScale[i]);
99
             X265_CHECK(qScale[i] >= 0, "qScale became negative\n");
100
         }
101
 
102
@@ -912,7 +917,6 @@
103
             for (int i = 0; i < m_numEntries; i++)
104
             {
105
                 double q = 0.0, sum = 0.0;
106
-
107
                 for (int j = 0; j < filterSize; j++)
108
                 {
109
                     int idx = i + j - filterSize / 2;
110
@@ -920,7 +924,7 @@
111
                     double coeff = qBlur == 0 ? 1.0 : exp(-d * d / (qBlur * qBlur));
112
                     if (idx < 0 || idx >= m_numEntries)
113
                         continue;
114
-                    if (m_rce2Pass[m_encOrder[i]].sliceType != m_rce2Pass[m_encOrder[idx]].sliceType)
115
+                    if (m_rce2Pass[i].sliceType != m_rce2Pass[idx].sliceType)
116
                         continue;
117
                     q += qScale[idx] * coeff;
118
                     sum += coeff;
119
@@ -932,7 +936,7 @@
120
         /* find expected bits */
121
         for (int i = 0; i < m_numEntries; i++)
122
         {
123
-            RateControlEntry *rce = &m_rce2Pass[m_encOrder[i]];
124
+            RateControlEntry *rce = &m_rce2Pass[i];
125
             rce->newQScale = clipQscale(NULL, rce, blurredQscale[i]); // check if needed
126
             X265_CHECK(rce->newQScale >= 0, "new Qscale is negative\n");
127
             expectedBits += qScale2bits(rce, rce->newQScale);
128
@@ -1279,6 +1283,7 @@
129
                 m_param->rc.vbvMaxBitrate = m_param->rc.zones[i].zoneParam->rc.vbvMaxBitrate;
130
                 memcpy(m_relativeComplexity, m_param->rc.zones[i].relativeComplexity, sizeof(double) * m_param->reconfigWindowSize);
131
                 reconfigureRC();
132
+                m_isCbr = 1; /* Always vbvmaxrate == bitrate here*/
133
                 m_top->zoneReadCount[i].incr();
134
             }
135
         }
136
@@ -1951,7 +1956,7 @@
137
                 /* Adjust quant based on the difference between
138
                  * achieved and expected bitrate so far */
139
                 double curTime = (double)rce->encodeOrder / m_numEntries;
140
-                double w = x265_clip3(0.0, 1.0, curTime * 100);
141
+                double w = x265_clip3(0.0, 1.0, curTime);
142
                 q *= pow((double)m_totalBits / m_expectedBitsSum, w);
143
             }
144
             if (m_framesDone == 0 && m_param->rc.rateControlMode == X265_RC_ABR && m_isGrainEnabled)
145
@@ -2742,7 +2747,9 @@
146
         x265_log(m_param, X265_LOG_WARNING, "poc:%d, VBV underflow (%.0f bits)\n", rce->poc, m_bufferFillFinal);
147
 
148
     m_bufferFillFinal = X265_MAX(m_bufferFillFinal, 0);
149
-    m_bufferFillFinal += m_bufferRate;
150
+    m_bufferFillFinal += rce->bufferRate;
151
+    if (m_param->csvLogLevel >= 2)
152
+        m_unclippedBufferFillFinal = m_bufferFillFinal;
153
 
154
     if (m_param->rc.bStrictCbr)
155
     {
156
@@ -2752,14 +2759,14 @@
157
             filler += FILLER_OVERHEAD * 8;
158
         }
159
         m_bufferFillFinal -= filler;
160
-        bufferBits = X265_MIN(bits + filler + m_bufferExcess, m_bufferRate);
161
+        bufferBits = X265_MIN(bits + filler + m_bufferExcess, rce->bufferRate);
162
         m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits + filler, 0);
163
         m_bufferFillActual += bufferBits - bits - filler;
164
     }
165
     else
166
     {
167
         m_bufferFillFinal = X265_MIN(m_bufferFillFinal, m_bufferSize);
168
-        bufferBits = X265_MIN(bits + m_bufferExcess, m_bufferRate);
169
+        bufferBits = X265_MIN(bits + m_bufferExcess, rce->bufferRate);
170
         m_bufferExcess = X265_MAX(m_bufferExcess - bufferBits + bits, 0);
171
         m_bufferFillActual += bufferBits - bits;
172
         m_bufferFillActual = X265_MIN(m_bufferFillActual, m_bufferSize);
173
x265_3.3.tar.gz/source/encoder/ratecontrol.h -> x265_3.4.tar.gz/source/encoder/ratecontrol.h Changed
9
 
1
@@ -157,6 +157,7 @@
2
     double m_rateFactorConstant;
3
     double m_bufferSize;
4
     double m_bufferFillFinal;  /* real buffer as of the last finished frame */
5
+    double m_unclippedBufferFillFinal; /* real unclipped buffer as of the last finished frame used to log in CSV*/
6
     double m_bufferFill;       /* planned buffer, if all in-progress frames hit their bit budget */
7
     double m_bufferRate;       /* # of bits added to buffer_fill after each frame */
8
     double m_vbvMaxRate;       /* in kbps */
9
x265_3.3.tar.gz/source/encoder/slicetype.cpp -> x265_3.4.tar.gz/source/encoder/slicetype.cpp Changed
33
 
1
@@ -87,7 +87,7 @@
2
 
3
 namespace X265_NS {
4
 
5
-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta)
6
+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel)
7
 {
8
     intptr_t rowOne = 0, rowTwo = 0, rowThree = 0, colOne = 0, colTwo = 0, colThree = 0;
9
     intptr_t middle = 0, topLeft = 0, topRight = 0, bottomLeft = 0, bottomRight = 0;
10
@@ -141,7 +141,7 @@
11
                        theta = 180 + theta;
12
                     edgeTheta[middle] = (pixel)theta;
13
                 }
14
-                edgePic[middle] = (pixel)(gradientMagnitude >= edgeThreshold ? edgeThreshold : blackPixel);
15
+                edgePic[middle] = (pixel)(gradientMagnitude >= EDGE_THRESHOLD ? whitePixel : blackPixel);
16
             }
17
         }
18
         return true;
19
@@ -519,6 +519,13 @@
20
                 if (param->rc.aqMode == X265_AQ_EDGE)
21
                     edgeFilter(curFrame, param);
22
 
23
+                if (param->rc.aqMode == X265_AQ_EDGE && !param->bHistBasedSceneCut && param->recursionSkipMode == EDGE_BASED_RSKIP)
24
+                {
25
+                    pixel* src = curFrame->m_edgePic + curFrame->m_fencPic->m_lumaMarginY * curFrame->m_fencPic->m_stride + curFrame->m_fencPic->m_lumaMarginX;
26
+                    primitives.planecopy_pp_shr(src, curFrame->m_fencPic->m_stride, curFrame->m_edgeBitPic,
27
+                        curFrame->m_fencPic->m_stride, curFrame->m_fencPic->m_picWidth, curFrame->m_fencPic->m_picHeight, SHIFT_TO_BITPLANE);
28
+                }
29
+
30
                 if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE || param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED || param->rc.aqMode == X265_AQ_EDGE)
31
                 {
32
                     double bit_depth_correction = 1.f / (1 << (2 * (X265_DEPTH - 8)));
33
x265_3.3.tar.gz/source/encoder/slicetype.h -> x265_3.4.tar.gz/source/encoder/slicetype.h Changed
31
 
1
@@ -44,9 +44,9 @@
2
 #define EDGE_INCLINATION 45
3
 
4
 #if HIGH_BIT_DEPTH
5
-#define edgeThreshold 1023.0
6
+#define EDGE_THRESHOLD 1023.0
7
 #else
8
-#define edgeThreshold 255.0
9
+#define EDGE_THRESHOLD 255.0
10
 #endif
11
 #define PI 3.14159265
12
 
13
@@ -101,7 +101,7 @@
14
 protected:
15
 
16
     uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
17
-    uint32_t edgeDensityCu(Frame*curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
18
+    uint32_t edgeDensityCu(Frame* curFrame, uint32_t &avgAngle, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
19
     uint32_t lumaSumCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, uint32_t qgSize);
20
     uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
21
     bool     allocWeightedRef(Lowres& fenc);
22
@@ -265,7 +265,6 @@
23
     CostEstimateGroup& operator=(const CostEstimateGroup&);
24
 };
25
 
26
-bool computeEdge(pixel *edgePic, pixel *refPic, pixel *edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta);
27
-
28
+bool computeEdge(pixel* edgePic, pixel* refPic, pixel* edgeTheta, intptr_t stride, int height, int width, bool bcalcTheta, pixel whitePixel = EDGE_THRESHOLD);
29
 }
30
 #endif // ifndef X265_SLICETYPE_H
31
x265_3.3.tar.gz/source/test/CMakeLists.txt -> x265_3.4.tar.gz/source/test/CMakeLists.txt Changed
24
 
1
@@ -23,13 +23,15 @@
2
 
3
 # add ARM assembly files
4
 if(ARM OR CROSS_COMPILE_ARM)
5
-    enable_language(ASM)
6
-    set(NASM_SRC checkasm-arm.S)
7
-    add_custom_command(
8
-        OUTPUT checkasm-arm.obj
9
-        COMMAND ${CMAKE_CXX_COMPILER}
10
-        ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
11
-        DEPENDS checkasm-arm.S)
12
+    if(NOT ARM64)
13
+        enable_language(ASM)
14
+        set(NASM_SRC checkasm-arm.S)
15
+        add_custom_command(
16
+            OUTPUT checkasm-arm.obj
17
+            COMMAND ${CMAKE_CXX_COMPILER}
18
+            ARGS ${NASM_FLAGS} ${CMAKE_CURRENT_SOURCE_DIR}/checkasm-arm.S -o checkasm-arm.obj
19
+            DEPENDS checkasm-arm.S)
20
+    endif()
21
 endif(ARM OR CROSS_COMPILE_ARM)
22
 
23
 # add PowerPC assembly files
24
x265_3.3.tar.gz/source/test/regression-tests.txt -> x265_3.4.tar.gz/source/test/regression-tests.txt Changed
23
 
1
@@ -75,7 +75,7 @@
2
 News-4k.y4m,--preset superfast --lookahead-slices 6 --aq-mode 0
3
 News-4k.y4m,--preset superfast --slices 4 --aq-mode 0 
4
 News-4k.y4m,--preset medium --tune ssim --no-sao --qg-size 16
5
-News-4k.y4m,--preset veryslow --no-rskip
6
+News-4k.y4m,--preset veryslow --rskip 0
7
 News-4k.y4m,--preset veryslow --pme --crf 40
8
 OldTownCross_1920x1080_50_10bit_422.yuv,--preset superfast --weightp
9
 OldTownCross_1920x1080_50_10bit_422.yuv,--preset medium --no-weightp
10
@@ -162,7 +162,11 @@
11
 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02 --frame-dup --dup-threshold 60 --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
12
 sintel_trailer_2k_1920x1080_24.yuv, --preset medium --hist-scenecut --hist-threshold 0.02
13
 sintel_trailer_2k_1920x1080_24.yuv, --preset ultrafast --hist-scenecut --hist-threshold 0.02
14
-
15
+crowd_run_1920x1080_50.yuv, --preset faster --ctu 32 --rskip 2 --rskip-edge-threshold 5
16
+crowd_run_1920x1080_50.yuv, --preset fast --ctu 64 --rskip 2 --rskip-edge-threshold 5 --aq-mode 4
17
+crowd_run_1920x1080_50.yuv, --preset slow --ctu 32 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1
18
+crowd_run_1920x1080_50.yuv, --preset slower --ctu 16 --rskip 2 --rskip-edge-threshold 5 --hist-scenecut --hist-threshold 0.1 --aq-mode 4
19
+ 
20
 # Main12 intraCost overflow bug test
21
 720p50_parkrun_ter.y4m,--preset medium
22
 
23
x265_3.3.tar.gz/source/test/save-load-tests.txt -> x265_3.4.tar.gz/source/test/save-load-tests.txt Changed
6
 
1
@@ -18,3 +18,4 @@
2
 RaceHorses_416x240_30.y4m,   --preset slow --no-cutree --ctu 16 --analysis-save x265_analysis.dat --analysis-save-reuse-level 10 --scale-factor 2 --crf 22  --vbv-maxrate 1000 --vbv-bufsize 1000::RaceHorses_832x480_30.y4m,    --preset slow --no-cutree --ctu 32 --analysis-load x265_analysis.dat  --analysis-save x265_analysis_2.dat --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --crf 16 --vbv-maxrate 4000 --vbv-bufsize 4000 --refine-intra 0 --refine-inter 1::RaceHorses_1664x960_30.y4m,   --preset slow --no-cutree --ctu 64 --analysis-load x265_analysis_2.dat  --analysis-load-reuse-level 10 --scale-factor 2 --crf 12 --vbv-maxrate 7000 --vbv-bufsize 7000 --refine-intra 2 --refine-inter 2
3
 crowd_run_540p50.y4m,   --preset veryslow --no-cutree --analysis-save x265_analysis_540.dat  --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m,   --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat  --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m,  --preset veryslow --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m,  --preset veryslow --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m,  --preset veryslow --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000
4
 crowd_run_540p50.y4m,  --preset medium --no-cutree --analysis-save x265_analysis_540.dat  --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 5000 --vbv-bufsize 15000 --vbv-maxrate 9000::crowd_run_1080p50.y4m,  --preset medium --no-cutree --analysis-save x265_analysis_1080.dat  --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_1080p50.y4m,  --preset medium --no-cutree --analysis-save x265_analysis_1080.dat --analysis-load x265_analysis_540.dat --refine-intra 4 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 10000 --vbv-bufsize 30000 --vbv-maxrate 17500::crowd_run_2160p50.y4m,  --preset medium --no-cutree --analysis-save x265_analysis_2160.dat --analysis-load x265_analysis_1080.dat --refine-intra 3 --dynamic-refine --analysis-load-reuse-level 10 --analysis-save-reuse-level 10 --scale-factor 2 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000::crowd_run_2160p50.y4m,  --preset medium --no-cutree --analysis-load x265_analysis_2160.dat --refine-intra 2 --dynamic-refine --analysis-load-reuse-level 10 --scale-factor 1 --bitrate 24000 --vbv-bufsize 84000 --vbv-maxrate 49000
5
+News-4k.y4m,  --preset medium --analysis-save x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000::News-4k.y4m, --analysis-load x265_analysis_fdup.dat --frame-dup --hrd --bitrate 10000 --vbv-bufsize 15000 --vbv-maxrate 12000
6
x265_3.3.tar.gz/source/test/testbench.cpp -> x265_3.4.tar.gz/source/test/testbench.cpp Changed
38
 
1
@@ -5,6 +5,7 @@
2
  *          Mandar Gurav <mandar@multicorewareinc.com>
3
  *          Mahesh Pittala <mahesh@multicorewareinc.com>
4
  *          Min Chen <chenm003@163.com>
5
+ *          Yimeng Su <yimeng.su@huawei.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -208,6 +209,14 @@
10
         EncoderPrimitives asmprim;
11
         memset(&asmprim, 0, sizeof(asmprim));
12
         setupAssemblyPrimitives(asmprim, test_arch[i].flag);
13
+
14
+#if X265_ARCH_ARM64
15
+        /* Temporary workaround because luma_vsp assembly primitive has not been completed
16
+         * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive.
17
+         * Otherwise, segment fault occurs. */
18
+        setupAliasCPrimitives(cprim, asmprim, test_arch[i].flag);
19
+#endif
20
+
21
         setupAliasPrimitives(asmprim);
22
         memcpy(&primitives, &asmprim, sizeof(EncoderPrimitives));
23
         for (size_t h = 0; h < sizeof(harness) / sizeof(TestHarness*); h++)
24
@@ -232,6 +241,13 @@
25
 #endif
26
     setupAssemblyPrimitives(optprim, cpuid);
27
 
28
+#if X265_ARCH_ARM64
29
+    /* Temporary workaround because luma_vsp assembly primitive has not been completed
30
+     * but interp_8tap_hv_pp_cpu uses mixed C primitive and assembly primitive.
31
+     * Otherwise, segment fault occurs. */
32
+    setupAliasCPrimitives(cprim, optprim, cpuid);
33
+#endif
34
+
35
     /* Note that we do not setup aliases for performance tests, that would be
36
      * redundant. The testbench only verifies they are correctly aliased */
37
 
38
x265_3.3.tar.gz/source/test/testharness.h -> x265_3.4.tar.gz/source/test/testharness.h Changed
26
 
1
@@ -3,6 +3,7 @@
2
  *
3
  * Authors: Steve Borho <steve@borho.org>
4
  *          Min Chen <chenm003@163.com>
5
+ *          Yimeng Su <yimeng.su@huawei.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -81,12 +82,16 @@
10
 #if X265_ARCH_X86
11
     asm volatile("rdtsc" : "=a" (a) ::"edx");
12
 #elif X265_ARCH_ARM
13
+#if X265_ARCH_ARM64
14
+    asm volatile("mrs %0, cntvct_el0" : "=r"(a));
15
+#else
16
     // TOD-DO: verify following inline asm to get cpu Timestamp Counter for ARM arch
17
     // asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(a));
18
 
19
     // TO-DO: replace clock() function with appropriate ARM cpu instructions
20
     a = clock();
21
 #endif
22
+#endif
23
     return a;
24
 }
25
 #endif // ifdef _MSC_VER
26
x265_3.3.tar.gz/source/x265.cpp -> x265_3.4.tar.gz/source/x265.cpp Changed
201
 
1
@@ -27,11 +27,7 @@
2
 
3
 #include "x265.h"
4
 #include "x265cli.h"
5
-
6
-#include "input/input.h"
7
-#include "output/output.h"
8
-#include "output/reconplay.h"
9
-#include "svt.h"
10
+#include "abrEncApp.h"
11
 
12
 #if HAVE_VLD
13
 /* Visual Leak Detector */
14
@@ -47,191 +43,59 @@
15
 #include <fstream>
16
 #include <queue>
17
 
18
-#define CONSOLE_TITLE_SIZE 200
19
-#ifdef _WIN32
20
-#include <windows.h>
21
-#define SetThreadExecutionState(es)
22
-static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = "";
23
-#else
24
-#define GetConsoleTitle(t, n)
25
-#define SetConsoleTitle(t)
26
-#define SetThreadExecutionState(es)
27
-#endif
28
-
29
 using namespace X265_NS;
30
 
31
-/* Ctrl-C handler */
32
-static volatile sig_atomic_t b_ctrl_c /* = 0 */;
33
-static void sigint_handler(int)
34
-{
35
-    b_ctrl_c = 1;
36
-}
37
-#define START_CODE 0x00000001
38
-#define START_CODE_BYTES 4
39
-
40
-struct CLIOptions
41
-{
42
-    InputFile* input;
43
-    ReconFile* recon;
44
-    OutputFile* output;
45
-    FILE*       qpfile;
46
-    FILE*       zoneFile;
47
-    FILE*    dolbyVisionRpu;    /* File containing Dolby Vision BL RPU metadata */
48
-    const char* reconPlayCmd;
49
-    const x265_api* api;
50
-    x265_param* param;
51
-    x265_vmaf_data* vmafData;
52
-    bool bProgress;
53
-    bool bForceY4m;
54
-    bool bDither;
55
-    uint32_t seek;              // number of frames to skip from the beginning
56
-    uint32_t framesToBeEncoded; // number of frames to encode
57
-    uint64_t totalbytes;
58
-    int64_t startTime;
59
-    int64_t prevUpdateTime;
60
-
61
-    /* in microseconds */
62
-    static const int UPDATE_INTERVAL = 250000;
63
-
64
-    CLIOptions()
65
-    {
66
-        input = NULL;
67
-        recon = NULL;
68
-        output = NULL;
69
-        qpfile = NULL;
70
-        zoneFile = NULL;
71
-        dolbyVisionRpu = NULL;
72
-        reconPlayCmd = NULL;
73
-        api = NULL;
74
-        param = NULL;
75
-        vmafData = NULL;
76
-        framesToBeEncoded = seek = 0;
77
-        totalbytes = 0;
78
-        bProgress = true;
79
-        bForceY4m = false;
80
-        startTime = x265_mdate();
81
-        prevUpdateTime = 0;
82
-        bDither = false;
83
-    }
84
+#define X265_HEAD_ENTRIES 3
85
 
86
-    void destroy();
87
-    void printStatus(uint32_t frameNum);
88
-    bool parse(int argc, char **argv);
89
-    bool parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount);
90
-    bool parseQPFile(x265_picture &pic_org);
91
-    bool parseZoneFile();
92
-};
93
-
94
-void CLIOptions::destroy()
95
-{
96
-    if (input)
97
-        input->release();
98
-    input = NULL;
99
-    if (recon)
100
-        recon->release();
101
-    recon = NULL;
102
-    if (qpfile)
103
-        fclose(qpfile);
104
-    qpfile = NULL;
105
-    if (zoneFile)
106
-        fclose(zoneFile);
107
-    zoneFile = NULL;
108
-    if (dolbyVisionRpu)
109
-        fclose(dolbyVisionRpu);
110
-    dolbyVisionRpu = NULL;
111
-    if (output)
112
-        output->release();
113
-    output = NULL;
114
-}
115
-
116
-void CLIOptions::printStatus(uint32_t frameNum)
117
-{
118
-    char buf[200];
119
-    int64_t time = x265_mdate();
120
-
121
-    if (!bProgress || !frameNum || (prevUpdateTime && time - prevUpdateTime < UPDATE_INTERVAL))
122
-        return;
123
-
124
-    int64_t elapsed = time - startTime;
125
-    double fps = elapsed > 0 ? frameNum * 1000000. / elapsed : 0;
126
-    float bitrate = 0.008f * totalbytes * (param->fpsNum / param->fpsDenom) / ((float)frameNum);
127
-    if (framesToBeEncoded)
128
-    {
129
-        int eta = (int)(elapsed * (framesToBeEncoded - frameNum) / ((int64_t)frameNum * 1000000));
130
-        sprintf(buf, "x265 [%.1f%%] %d/%d frames, %.2f fps, %.2f kb/s, eta %d:%02d:%02d",
131
-            100. * frameNum / (param->chunkEnd ? param->chunkEnd : param->totalFrames), frameNum, (param->chunkEnd ? param->chunkEnd : param->totalFrames), fps, bitrate,
132
-                eta / 3600, (eta / 60) % 60, eta % 60);
133
-    }
134
-    else
135
-        sprintf(buf, "x265 %d frames: %.2f fps, %.2f kb/s", frameNum, fps, bitrate);
136
-
137
-    fprintf(stderr, "%s  \r", buf + 5);
138
-    SetConsoleTitle(buf);
139
-    fflush(stderr); // needed in windows
140
-    prevUpdateTime = time;
141
-}
142
+#ifdef _WIN32
143
+#define strdup _strdup
144
+#endif
145
 
146
-bool CLIOptions::parseZoneParam(int argc, char **argv, x265_param* globalParam, int zonefileCount)
147
+#ifdef _WIN32
148
+/* Copy of x264 code, which allows for Unicode characters in the command line.
149
+ * Retrieve command line arguments as UTF-8. */
150
+static int get_argv_utf8(int *argc_ptr, char ***argv_ptr)
151
 {
152
-    bool bError = false;
153
-    int bShowHelp = false;
154
-    int outputBitDepth = 0;
155
-    const char *profile = NULL;
156
-
157
-    /* Presets are applied before all other options. */
158
-    for (optind = 0;;)
159
-    {
160
-        int c = getopt_long(argc, argv, short_options, long_options, NULL);
161
-        if (c == -1)
162
-            break;
163
-        else if (c == 'D')
164
-            outputBitDepth = atoi(optarg);
165
-        else if (c == 'P')
166
-            profile = optarg;
167
-        else if (c == '?')
168
-            bShowHelp = true;
169
-    }
170
-
171
-    if (!outputBitDepth && profile)
172
-    {
173
-        /* try to derive the output bit depth from the requested profile */
174
-        if (strstr(profile, "10"))
175
-            outputBitDepth = 10;
176
-        else if (strstr(profile, "12"))
177
-            outputBitDepth = 12;
178
-        else
179
-            outputBitDepth = 8;
180
-    }
181
-
182
-    api = x265_api_get(outputBitDepth);
183
-    if (!api)
184
+    int ret = 0;
185
+    wchar_t **argv_utf16 = CommandLineToArgvW(GetCommandLineW(), argc_ptr);
186
+    if (argv_utf16)
187
     {
188
-        x265_log(NULL, X265_LOG_WARNING, "falling back to default bit-depth\n");
189
-        api = x265_api_get(0);
190
-    }
191
+        int argc = *argc_ptr;
192
+        int offset = (argc + 1) * sizeof(char*);
193
+        int size = offset;
194
 
195
-    if (bShowHelp)
196
-    {
197
-        printVersion(globalParam, api);
198
-        showHelp(globalParam);
199
-    }
200
+        for (int i = 0; i < argc; i++)
201
x265_3.3.tar.gz/source/x265.h -> x265_3.4.tar.gz/source/x265.h Changed
60
 
1
@@ -134,6 +134,7 @@
2
     int     ctuDistortionRefine;
3
     int     rightOffset;
4
     int     bottomOffset;
5
+    int     frameDuplication;
6
 }x265_analysis_validate;
7
 
8
 /* Stores intra analysis data for a single frame. This struct needs better packing */
9
@@ -304,6 +305,7 @@
10
     double           totalFrameTime;
11
     double           vmafFrameScore;
12
     double           bufferFillFinal;
13
+    double           unclippedBufferFillFinal;
14
 } x265_frame_stats;
15
 
16
 typedef struct x265_ctu_info_t
17
@@ -1255,9 +1257,9 @@
18
      * skip blocks. Default is disabled */
19
     int       bEnableEarlySkip;
20
 
21
-    /* Enable early CU size decisions to avoid recursing to higher depths. 
22
+    /* Enable early CU size decisions to avoid recursing to higher depths.
23
      * Default is enabled */
24
-    int bEnableRecursionSkip;
25
+    int       recursionSkipMode;
26
 
27
     /* Use a faster search method to find the best intra mode. Default is 0 */
28
     int       bEnableFastIntra;
29
@@ -1857,7 +1859,7 @@
30
     double    edgeTransitionThreshold;
31
 
32
     /* Enables histogram based scenecut detection algorithm to detect scenecuts. Default disabled */
33
-    int      bHistBasedSceneCut;
34
+    int       bHistBasedSceneCut;
35
 
36
     /* Enable HME search ranges for L0, L1 and L2 respectively. */
37
     int       hmeRange[3];
38
@@ -1874,7 +1876,7 @@
39
     * analysis information stored in analysis-save. Higher the refine level higher
40
     * the information stored. Default is 5 */
41
     int       analysisSaveReuseLevel;
42
-    
43
+
44
     /* A value between 1 and 10 (both inclusive) determines the level of
45
     * analysis information reused in analysis-load. Higher the refine level higher
46
     * the information reused. Default is 5 */
47
@@ -1901,6 +1903,12 @@
48
     * info is available from the corresponding analysis-save. */
49
 
50
     int      confWinBottomOffset;
51
+
52
+    /* Edge variance threshold for quad tree establishment. */
53
+    float    edgeVarThreshold;
54
+
55
+    /* Maxrate that could be signaled to the decoder. Default 0. API only. */
56
+    int      decoderVbvMaxRate;
57
 } x265_param;
58
 
59
 /* x265_param_alloc:
60
x265_3.4.tar.gz/source/x265cli.cpp Added
201
 
1
@@ -0,0 +1,1062 @@
2
+/*****************************************************************************
3
+ * Copyright (C) 2013-2020 MulticoreWare, Inc
4
+ *
5
+ * Authors: Steve Borho <steve@borho.org>
6
+ *          Min Chen <chenm003@163.com>
7
+ *
8
+ * This program is free software; you can redistribute it and/or modify
9
+ * it under the terms of the GNU General Public License as published by
10
+ * the Free Software Foundation; either version 2 of the License, or
11
+ * (at your option) any later version.
12
+ *
13
+ * This program is distributed in the hope that it will be useful,
14
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
+ * GNU General Public License for more details.
17
+ *
18
+ * You should have received a copy of the GNU General Public License
19
+ * along with this program; if not, write to the Free Software
20
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21
+ *
22
+ * This program is also available under a commercial proprietary license.
23
+ * For more information, contact us at license @ x265.com.
24
+ *****************************************************************************/
25
+#if _MSC_VER
26
+#pragma warning(disable: 4127) // conditional expression is constant, yes I know
27
+#endif
28
+
29
+#include "x265cli.h"
30
+#include "svt.h"
31
+
32
+#define START_CODE 0x00000001
33
+#define START_CODE_BYTES 4
34
+
35
+#ifdef __cplusplus
36
+namespace X265_NS {
37
+#endif
38
+
39
+    static void printVersion(x265_param *param, const x265_api* api)
40
+    {
41
+        x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str);
42
+        x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str);
43
+    }
44
+
45
+    static void showHelp(x265_param *param)
46
+    {
47
+        int level = param->logLevel;
48
+
49
+#define OPT(value) (value ? "enabled" : "disabled")
50
+#define H0 printf
51
+#define H1 if (level >= X265_LOG_DEBUG) printf
52
+
53
+        H0("\nSyntax: x265 [options] infile [-o] outfile\n");
54
+        H0("    infile can be YUV or Y4M\n");
55
+        H0("    outfile is raw HEVC bitstream\n");
56
+        H0("\nExecutable Options:\n");
57
+        H0("-h/--help                        Show this help text and exit\n");
58
+        H0("   --fullhelp                    Show all options and exit\n");
59
+        H0("-V/--version                     Show version info and exit\n");
60
+        H0("\nOutput Options:\n");
61
+        H0("-o/--output <filename>           Bitstream output file name\n");
62
+        H0("-D/--output-depth 8|10|12        Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth);
63
+        H0("   --log-level <string>          Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]);
64
+        H0("   --no-progress                 Disable CLI progress reports\n");
65
+        H0("   --csv <filename>              Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n");
66
+        H0("   --csv-log-level <integer>     Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n");
67
+        H0("\nInput Options:\n");
68
+        H0("   --input <filename>            Raw YUV or Y4M input file name. `-` for stdin\n");
69
+        H1("   --y4m                         Force parsing of input stream as YUV4MPEG2 regardless of file extension\n");
70
+        H0("   --fps <float|rational>        Source frame rate (float or num/denom), auto-detected if Y4M\n");
71
+        H0("   --input-res WxH               Source picture size [w x h], auto-detected if Y4M\n");
72
+        H1("   --input-depth <integer>       Bit-depth of input file. Default 8\n");
73
+        H1("   --input-csp <string>          Chroma subsampling, auto-detected if Y4M\n");
74
+        H1("                                 0 - i400 (4:0:0 monochrome)\n");
75
+        H1("                                 1 - i420 (4:2:0 default)\n");
76
+        H1("                                 2 - i422 (4:2:2)\n");
77
+        H1("                                 3 - i444 (4:4:4)\n");
78
+#if ENABLE_HDR10_PLUS
79
+        H0("   --dhdr10-info <filename>      JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n");
80
+        H0("   --[no-]dhdr10-opt             Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n");
81
+#endif
82
+        H0("   --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n");
83
+        H0("   --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n"
84
+            "                                 If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n");
85
+        H0("   --nalu-file <filename>        Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n");
86
+        H0("-f/--frames <integer>            Maximum number of frames to encode. Default all\n");
87
+        H0("   --seek <integer>              First frame to encode\n");
88
+        H1("   --[no-]interlace <bff|tff>    Indicate input pictures are interlace fields in temporal order. Default progressive\n");
89
+        H0("   --[no-]field                  Enable or disable field coding. Default %s\n", OPT(param->bField));
90
+        H1("   --dither                      Enable dither if downscaling to 8 bit pixels. Default disabled\n");
91
+        H0("   --[no-]copy-pic               Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame));
92
+        H0("\nQuality reporting metrics:\n");
93
+        H0("   --[no-]ssim                   Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim));
94
+        H0("   --[no-]psnr                   Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr));
95
+        H0("\nProfile, Level, Tier:\n");
96
+        H0("-P/--profile <string>            Enforce an encode profile: main, main10, mainstillpicture\n");
97
+        H0("   --level-idc <integer|float>   Force a minimum required decoder level (as '5.0' or '50')\n");
98
+        H0("   --[no-]high-tier              If a decoder level is specified, this modifier selects High tier of that level\n");
99
+        H0("   --uhd-bd                      Enable UHD Bluray compatibility support\n");
100
+        H0("   --[no-]allow-non-conformance  Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance));
101
+        H0("\nThreading, performance:\n");
102
+        H0("   --pools <integer,...>         Comma separated thread count per thread pool (pool per NUMA node)\n");
103
+        H0("                                 '-' implies no threads on node, '+' implies one thread per core on node\n");
104
+        H0("-F/--frame-threads <integer>     Number of concurrently encoded frames. 0: auto-determined by core count\n");
105
+        H0("   --[no-]wpp                    Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront));
106
+        H0("   --[no-]slices <integer>       Enable Multiple Slices feature. Default %d\n", param->maxSlices);
107
+        H0("   --[no-]pmode                  Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis));
108
+        H0("   --[no-]pme                    Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation));
109
+        H0("   --[no-]asm <bool|int|string>  Override CPU detection. Default: auto\n");
110
+        H0("\nPresets:\n");
111
+        H0("-p/--preset <string>             Trade off performance for compression efficiency. Default medium\n");
112
+        H0("                                 ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n");
113
+        H0("-t/--tune <string>               Tune the settings for a particular type of source or situation:\n");
114
+        H0("                                 psnr, ssim, grain, zerolatency, fastdecode\n");
115
+        H0("\nQuad-Tree size and depth:\n");
116
+        H0("-s/--ctu <64|32|16>              Maximum CU size (WxH). Default %d\n", param->maxCUSize);
117
+        H0("   --min-cu-size <64|32|16|8>    Minimum CU size (WxH). Default %d\n", param->minCUSize);
118
+        H0("   --max-tu-size <32|16|8|4>     Maximum TU size (WxH). Default %d\n", param->maxTUSize);
119
+        H0("   --tu-intra-depth <integer>    Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth);
120
+        H0("   --tu-inter-depth <integer>    Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth);
121
+        H0("   --limit-tu <0..4>             Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU);
122
+        H0("\nAnalysis:\n");
123
+        H0("   --rd <1..6>                   Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel);
124
+        H0("   --[no-]psy-rd <0..5.0>        Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd);
125
+        H0("   --[no-]rdoq-level <0|1|2>     Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel);
126
+        H0("   --[no-]psy-rdoq <0..50.0>     Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq);
127
+        H0("   --dynamic-rd <0..4.0>         Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd);
128
+        H0("   --[no-]ssim-rd                Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd));
129
+        H0("   --[no-]rd-refine              Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine));
130
+        H0("   --[no-]early-skip             Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip));
131
+        H0("   --rskip <mode>                Set mode for early exit from recursion. Mode 1: exit using rdcost & CU homogenity. Mode 2: exit using CU edge density.\n"
132
+            "                                 Mode 0: disabled. Default %d\n", param->recursionSkipMode);
133
+        H1("   --rskip-edge-threshold        Threshold in terms of percentage (integer of range [0,100]) for minimum edge density in CUs used to prun the recursion depth. Applicable only for rskip mode 2. Value is preset dependent. Default: %.f\n", param->edgeVarThreshold*100.0f);
134
+        H1("   --[no-]tskip-fast             Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast));
135
+        H1("   --[no-]splitrd-skip           Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip));
136
+        H1("   --nr-intra <integer>          An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n");
137
+        H1("   --nr-inter <integer>          An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n");
138
+        H0("   --ctu-info <integer>          Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n"
139
+            "                                    - 1: force the partitions if CTU information is present\n"
140
+            "                                    - 2: functionality of (1) and reduce qp if CTU information has changed\n"
141
+            "                                    - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n"
142
+            "                                    Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n");
143
+        H0("\nCoding tools:\n");
144
+        H0("-w/--[no-]weightp                Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred));
145
+        H0("   --[no-]weightb                Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred));
146
+        H0("   --[no-]cu-lossless            Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless));
147
+        H0("   --[no-]signhide               Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding));
148
+        H1("   --[no-]tskip                  Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip));
149
+        H0("\nTemporal / motion search options:\n");
150
+        H0("   --max-merge <1..5>            Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand);
151
+        H0("   --ref <integer>               max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
152
+        H0("   --limit-refs <0|1|2|3>        Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences);
153
+        H0("   --me <string>                 Motion search method dia hex umh star full. Default %d\n", param->searchMethod);
154
+        H0("-m/--subme <integer>             Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine);
155
+        H0("   --merange <integer>           Motion search range. Default %d\n", param->searchRange);
156
+        H0("   --[no-]rect                   Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter));
157
+        H0("   --[no-]amp                    Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP));
158
+        H0("   --[no-]limit-modes            Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes);
159
+        H1("   --[no-]temporal-mvp           Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp));
160
+        H1("   --[no-]hme                    Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME));
161
+        H1("   --hme-search <string>         Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
162
+        H1("   --hme-range <int>,<int>,<int> Motion search-range for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeRange[0], param->hmeRange[1], param->hmeRange[2]);
163
+        H0("\nSpatial / intra options:\n");
164
+        H0("   --[no-]strong-intra-smoothing Enable strong intra smoothing for 32x32 blocks. Default %s\n", OPT(param->bEnableStrongIntraSmoothing));
165
+        H0("   --[no-]constrained-intra      Constrained intra prediction (use only intra coded reference pixels) Default %s\n", OPT(param->bEnableConstrainedIntra));
166
+        H0("   --[no-]b-intra                Enable intra in B frames in veryslow presets. Default %s\n", OPT(param->bIntraInBFrames));
167
+        H0("   --[no-]fast-intra             Enable faster search method for angular intra predictions. Default %s\n", OPT(param->bEnableFastIntra));
168
+        H0("   --rdpenalty <0..2>            penalty for 32x32 intra TU in non-I slices. 0:disabled 1:RD-penalty 2:maximum. Default %d\n", param->rdPenalty);
169
+        H0("\nSlice decision options:\n");
170
+        H0("   --[no-]open-gop               Enable open-GOP, allows I slices to be non-IDR. Default %s\n", OPT(param->bOpenGOP));
171
+        H0("-I/--keyint <integer>            Max IDR period in frames. -1 for infinite-gop. Default %d\n", param->keyframeMax);
172
+        H0("-i/--min-keyint <integer>        Scenecuts closer together than this are coded as I, not IDR. Default: auto\n");
173
+        H0("   --gop-lookahead <integer>     Extends gop boundary if a scenecut is found within this from keyint boundary. Default 0\n");
174
+        H0("   --no-scenecut                 Disable adaptive I-frame decision\n");
175
+        H0("   --scenecut <integer>          How aggressively to insert extra I-frames. Default %d\n", param->scenecutThreshold);
176
+        H1("   --scenecut-bias <0..100.0>    Bias for scenecut detection. Default %.2f\n", param->scenecutBias);
177
+        H0("   --hist-scenecut               Enables histogram based scene-cut detection using histogram based algorithm.\n");
178
+        H0("   --no-hist-scenecut            Disables histogram based scene-cut detection using histogram based algorithm.\n");
179
+        H1("   --hist-threshold <0.0..2.0>   Luma Edge histogram's Normalized SAD threshold for histogram based scenecut detection Default %.2f\n", param->edgeTransitionThreshold);
180
+        H0("   --[no-]fades                  Enable detection and handling of fade-in regions. Default %s\n", OPT(param->bEnableFades));
181
+        H1("   --[no-]scenecut-aware-qp      Enable increasing QP for frames inside the scenecut window after scenecut. Default %s\n", OPT(param->bEnableSceneCutAwareQp));
182
+        H1("   --scenecut-window <0..1000>   QP incremental duration(in milliseconds) when scenecut-aware-qp is enabled. Default %d\n", param->scenecutWindow);
183
+        H1("   --max-qp-delta <0..10>        QP offset to increment with base QP for inter-frames. Default %d\n", param->maxQpDelta);
184
+        H0("   --radl <integer>              Number of RADL pictures allowed in front of IDR. Default %d\n", param->radl);
185
+        H0("   --intra-refresh               Use Periodic Intra Refresh instead of IDR frames\n");
186
+        H0("   --rc-lookahead <integer>      Number of frames for frame-type lookahead (determines encoder latency) Default %d\n", param->lookaheadDepth);
187
+        H1("   --lookahead-slices <0..16>    Number of slices to use per lookahead cost estimate. Default %d\n", param->lookaheadSlices);
188
+        H0("   --lookahead-threads <integer> Number of threads to be dedicated to perform lookahead only. Default %d\n", param->lookaheadThreads);
189
+        H0("-b/--bframes <0..16>             Maximum number of consecutive b-frames. Default %d\n", param->bframes);
190
+        H1("   --bframe-bias <integer>       Bias towards B frame decisions. Default %d\n", param->bFrameBias);
191
+        H0("   --b-adapt <0..2>              0 - none, 1 - fast, 2 - full (trellis) adaptive B frame scheduling. Default %d\n", param->bFrameAdaptive);
192
+        H0("   --[no-]b-pyramid              Use B-frames as references. Default %s\n", OPT(param->bBPyramid));
193
+        H1("   --qpfile <string>             Force frametypes and QPs for some or all frames\n");
194
+        H1("                                 Format of each line: framenumber frametype QP\n");
195
+        H1("                                 QP is optional (none lets x265 choose). Frametypes: I,i,K,P,B,b.\n");
196
+        H1("                                 QPs are restricted by qpmin/qpmax.\n");
197
+        H1("   --force-flush <integer>       Force the encoder to flush frames. Default %d\n", param->forceFlush);
198
+        H1("                                 0 - flush the encoder only when all the input pictures are over.\n");
199
+        H1("                                 1 - flush all the frames even when the input is not over. Slicetype decision may change with this option.\n");
200
+        H1("                                 2 - flush the slicetype decided frames only.\n");
201
x265_3.3.tar.gz/source/x265cli.h -> x265_3.4.tar.gz/source/x265cli.h Changed
201
 
1
@@ -27,9 +27,23 @@
2
 
3
 #include "common.h"
4
 #include "param.h"
5
+#include "input/input.h"
6
+#include "output/output.h"
7
+#include "output/reconplay.h"
8
 
9
 #include <getopt.h>
10
 
11
+#define CONSOLE_TITLE_SIZE 200
12
+#ifdef _WIN32
13
+#include <windows.h>
14
+#define SetThreadExecutionState(es)
15
+static char orgConsoleTitle[CONSOLE_TITLE_SIZE] = "";
16
+#else
17
+#define GetConsoleTitle(t, n)
18
+#define SetConsoleTitle(t)
19
+#define SetThreadExecutionState(es)
20
+#endif
21
+
22
 #ifdef __cplusplus
23
 namespace X265_NS {
24
 #endif
25
@@ -105,8 +119,8 @@
26
     { "amp",                  no_argument, NULL, 0 },
27
     { "no-early-skip",        no_argument, NULL, 0 },
28
     { "early-skip",           no_argument, NULL, 0 },
29
-    { "no-rskip",             no_argument, NULL, 0 },
30
-    { "rskip",                no_argument, NULL, 0 },
31
+    { "rskip",                required_argument, NULL, 0 },
32
+    { "rskip-edge-threshold", required_argument, NULL, 0 },
33
     { "no-fast-cbf",          no_argument, NULL, 0 },
34
     { "fast-cbf",             no_argument, NULL, 0 },
35
     { "no-tskip",             no_argument, NULL, 0 },
36
@@ -358,6 +372,7 @@
37
     { "cll", no_argument, NULL, 0 },
38
     { "no-cll", no_argument, NULL, 0 },
39
     { "hme-range", required_argument, NULL, 0 },
40
+    { "abr-ladder", required_argument, NULL, 0 },
41
     { 0, 0, 0, 0 },
42
     { 0, 0, 0, 0 },
43
     { 0, 0, 0, 0 },
44
@@ -365,336 +380,82 @@
45
     { 0, 0, 0, 0 }
46
 };
47
 
48
-static void printVersion(x265_param *param, const x265_api* api)
49
-{
50
-    x265_log(param, X265_LOG_INFO, "HEVC encoder version %s\n", api->version_str);
51
-    x265_log(param, X265_LOG_INFO, "build info %s\n", api->build_info_str);
52
-}
53
+    struct CLIOptions
54
+    {
55
+        InputFile* input;
56
+        ReconFile* recon;
57
+        OutputFile* output;
58
+        FILE*       qpfile;
59
+        FILE*       zoneFile;
60
+        FILE*    dolbyVisionRpu;    /* File containing Dolby Vision BL RPU metadata */
61
+        const char* reconPlayCmd;
62
+        const x265_api* api;
63
+        x265_param* param;
64
+        x265_vmaf_data* vmafData;
65
+        bool bProgress;
66
+        bool bForceY4m;
67
+        bool bDither;
68
+        uint32_t seek;              // number of frames to skip from the beginning
69
+        uint32_t framesToBeEncoded; // number of frames to encode
70
+        uint64_t totalbytes;
71
+        int64_t startTime;
72
+        int64_t prevUpdateTime;
73
 
74
-static void showHelp(x265_param *param)
75
-{
76
-    int level = param->logLevel;
77
+        int argCnt;
78
+        char** argString;
79
 
80
-#define OPT(value) (value ? "enabled" : "disabled")
81
-#define H0 printf
82
-#define H1 if (level >= X265_LOG_DEBUG) printf
83
+        /* ABR ladder settings */
84
+        bool isAbrLadderConfig;
85
+        bool enableScaler;
86
+        char*    encName;
87
+        char*    reuseName;
88
+        uint32_t encId;
89
+        int      refId;
90
+        uint32_t loadLevel;
91
+        uint32_t saveLevel;
92
+        uint32_t numRefs;
93
 
94
-    H0("\nSyntax: x265 [options] infile [-o] outfile\n");
95
-    H0("    infile can be YUV or Y4M\n");
96
-    H0("    outfile is raw HEVC bitstream\n");
97
-    H0("\nExecutable Options:\n");
98
-    H0("-h/--help                        Show this help text and exit\n");
99
-    H0("   --fullhelp                    Show all options and exit\n");
100
-    H0("-V/--version                     Show version info and exit\n");
101
-    H0("\nOutput Options:\n");
102
-    H0("-o/--output <filename>           Bitstream output file name\n");
103
-    H0("-D/--output-depth 8|10|12        Output bit depth (also internal bit depth). Default %d\n", param->internalBitDepth);
104
-    H0("   --log-level <string>          Logging level: none error warning info debug full. Default %s\n", X265_NS::logLevelNames[param->logLevel + 1]);
105
-    H0("   --no-progress                 Disable CLI progress reports\n");
106
-    H0("   --csv <filename>              Comma separated log file, if csv-log-level > 0 frame level statistics, else one line per run\n");
107
-    H0("   --csv-log-level <integer>     Level of csv logging, if csv-log-level > 0 frame level statistics, else one line per run: 0-2\n");
108
-    H0("\nInput Options:\n");
109
-    H0("   --input <filename>            Raw YUV or Y4M input file name. `-` for stdin\n");
110
-    H1("   --y4m                         Force parsing of input stream as YUV4MPEG2 regardless of file extension\n");
111
-    H0("   --fps <float|rational>        Source frame rate (float or num/denom), auto-detected if Y4M\n");
112
-    H0("   --input-res WxH               Source picture size [w x h], auto-detected if Y4M\n");
113
-    H1("   --input-depth <integer>       Bit-depth of input file. Default 8\n");
114
-    H1("   --input-csp <string>          Chroma subsampling, auto-detected if Y4M\n");
115
-    H1("                                 0 - i400 (4:0:0 monochrome)\n");
116
-    H1("                                 1 - i420 (4:2:0 default)\n");
117
-    H1("                                 2 - i422 (4:2:2)\n");
118
-    H1("                                 3 - i444 (4:4:4)\n");
119
-#if ENABLE_HDR10_PLUS
120
-    H0("   --dhdr10-info <filename>      JSON file containing the Creative Intent Metadata to be encoded as Dynamic Tone Mapping\n");
121
-    H0("   --[no-]dhdr10-opt             Insert tone mapping SEI only for IDR frames and when the tone mapping information changes. Default disabled\n");
122
-#endif
123
-    H0("   --dolby-vision-profile <float|integer> Specifies Dolby Vision profile ID. Currently only profile 5, profile 8.1 and profile 8.2 enabled. Specified as '5' or '50'. Default 0 (disabled).\n");
124
-    H0("   --dolby-vision-rpu <filename> File containing Dolby Vision RPU metadata.\n"
125
-       "                                 If given, x265's Dolby Vision metadata parser will fill the RPU field of input pictures with the metadata read from the file. Default NULL(disabled).\n");
126
-    H0("   --nalu-file <filename>        Text file containing SEI messages in the following format : <POC><space><PREFIX><space><NAL UNIT TYPE>/<SEI TYPE><space><SEI Payload>\n");
127
-    H0("-f/--frames <integer>            Maximum number of frames to encode. Default all\n");
128
-    H0("   --seek <integer>              First frame to encode\n");
129
-    H1("   --[no-]interlace <bff|tff>    Indicate input pictures are interlace fields in temporal order. Default progressive\n");
130
-    H0("   --[no-]field                  Enable or disable field coding. Default %s\n", OPT( param->bField));
131
-    H1("   --dither                      Enable dither if downscaling to 8 bit pixels. Default disabled\n");
132
-    H0("   --[no-]copy-pic               Copy buffers of input picture in frame. Default %s\n", OPT(param->bCopyPicToFrame));
133
-    H0("\nQuality reporting metrics:\n");
134
-    H0("   --[no-]ssim                   Enable reporting SSIM metric scores. Default %s\n", OPT(param->bEnableSsim));
135
-    H0("   --[no-]psnr                   Enable reporting PSNR metric scores. Default %s\n", OPT(param->bEnablePsnr));
136
-    H0("\nProfile, Level, Tier:\n");
137
-    H0("-P/--profile <string>            Enforce an encode profile: main, main10, mainstillpicture\n");
138
-    H0("   --level-idc <integer|float>   Force a minimum required decoder level (as '5.0' or '50')\n");
139
-    H0("   --[no-]high-tier              If a decoder level is specified, this modifier selects High tier of that level\n");
140
-    H0("   --uhd-bd                      Enable UHD Bluray compatibility support\n");
141
-    H0("   --[no-]allow-non-conformance  Allow the encoder to generate profile NONE bitstreams. Default %s\n", OPT(param->bAllowNonConformance));
142
-    H0("\nThreading, performance:\n");
143
-    H0("   --pools <integer,...>         Comma separated thread count per thread pool (pool per NUMA node)\n");
144
-    H0("                                 '-' implies no threads on node, '+' implies one thread per core on node\n");
145
-    H0("-F/--frame-threads <integer>     Number of concurrently encoded frames. 0: auto-determined by core count\n");
146
-    H0("   --[no-]wpp                    Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront));
147
-    H0("   --[no-]slices <integer>       Enable Multiple Slices feature. Default %d\n", param->maxSlices);
148
-    H0("   --[no-]pmode                  Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis));
149
-    H0("   --[no-]pme                    Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation));
150
-    H0("   --[no-]asm <bool|int|string>  Override CPU detection. Default: auto\n");
151
-    H0("\nPresets:\n");
152
-    H0("-p/--preset <string>             Trade off performance for compression efficiency. Default medium\n");
153
-    H0("                                 ultrafast, superfast, veryfast, faster, fast, medium, slow, slower, veryslow, or placebo\n");
154
-    H0("-t/--tune <string>               Tune the settings for a particular type of source or situation:\n");
155
-    H0("                                 psnr, ssim, grain, zerolatency, fastdecode\n");
156
-    H0("\nQuad-Tree size and depth:\n");
157
-    H0("-s/--ctu <64|32|16>              Maximum CU size (WxH). Default %d\n", param->maxCUSize);
158
-    H0("   --min-cu-size <64|32|16|8>    Minimum CU size (WxH). Default %d\n", param->minCUSize);
159
-    H0("   --max-tu-size <32|16|8|4>     Maximum TU size (WxH). Default %d\n", param->maxTUSize);
160
-    H0("   --tu-intra-depth <integer>    Max TU recursive depth for intra CUs. Default %d\n", param->tuQTMaxIntraDepth);
161
-    H0("   --tu-inter-depth <integer>    Max TU recursive depth for inter CUs. Default %d\n", param->tuQTMaxInterDepth);
162
-    H0("   --limit-tu <0..4>             Enable early exit from TU recursion for inter coded blocks. Default %d\n", param->limitTU);
163
-    H0("\nAnalysis:\n");
164
-    H0("   --rd <1..6>                   Level of RDO in mode decision 1:least....6:full RDO. Default %d\n", param->rdLevel);
165
-    H0("   --[no-]psy-rd <0..5.0>        Strength of psycho-visual rate distortion optimization, 0 to disable. Default %.1f\n", param->psyRd);
166
-    H0("   --[no-]rdoq-level <0|1|2>     Level of RDO in quantization 0:none, 1:levels, 2:levels & coding groups. Default %d\n", param->rdoqLevel);
167
-    H0("   --[no-]psy-rdoq <0..50.0>     Strength of psycho-visual optimization in RDO quantization, 0 to disable. Default %.1f\n", param->psyRdoq);
168
-    H0("   --dynamic-rd <0..4.0>         Strength of dynamic RD, 0 to disable. Default %.2f\n", param->dynamicRd);
169
-    H0("   --[no-]ssim-rd                Enable ssim rate distortion optimization, 0 to disable. Default %s\n", OPT(param->bSsimRd));
170
-    H0("   --[no-]rd-refine              Enable QP based RD refinement for rd levels 5 and 6. Default %s\n", OPT(param->bEnableRdRefine));
171
-    H0("   --[no-]early-skip             Enable early SKIP detection. Default %s\n", OPT(param->bEnableEarlySkip));
172
-    H0("   --[no-]rskip                  Enable early exit from recursion. Default %s\n", OPT(param->bEnableRecursionSkip));
173
-    H1("   --[no-]tskip-fast             Enable fast intra transform skipping. Default %s\n", OPT(param->bEnableTSkipFast));
174
-    H1("   --[no-]splitrd-skip           Enable skipping split RD analysis when sum of split CU rdCost larger than one split CU rdCost for Intra CU. Default %s\n", OPT(param->bEnableSplitRdSkip));
175
-    H1("   --nr-intra <integer>          An integer value in range of 0 to 2000, which denotes strength of noise reduction in intra CUs. Default 0\n");
176
-    H1("   --nr-inter <integer>          An integer value in range of 0 to 2000, which denotes strength of noise reduction in inter CUs. Default 0\n");
177
-    H0("   --ctu-info <integer>          Enable receiving ctu information asynchronously and determine reaction to the CTU information (0, 1, 2, 4, 6) Default 0\n"
178
-       "                                    - 1: force the partitions if CTU information is present\n"
179
-       "                                    - 2: functionality of (1) and reduce qp if CTU information has changed\n"
180
-       "                                    - 4: functionality of (1) and force Inter modes when CTU Information has changed, merge/skip otherwise\n"
181
-       "                                    Enable this option only when planning to invoke the API function x265_encoder_ctu_info to copy ctu-info asynchronously\n");
182
-    H0("\nCoding tools:\n");
183
-    H0("-w/--[no-]weightp                Enable weighted prediction in P slices. Default %s\n", OPT(param->bEnableWeightedPred));
184
-    H0("   --[no-]weightb                Enable weighted prediction in B slices. Default %s\n", OPT(param->bEnableWeightedBiPred));
185
-    H0("   --[no-]cu-lossless            Consider lossless mode in CU RDO decisions. Default %s\n", OPT(param->bCULossless));
186
-    H0("   --[no-]signhide               Hide sign bit of one coeff per TU (rdo). Default %s\n", OPT(param->bEnableSignHiding));
187
-    H1("   --[no-]tskip                  Enable intra 4x4 transform skipping. Default %s\n", OPT(param->bEnableTransformSkip));
188
-    H0("\nTemporal / motion search options:\n");
189
-    H0("   --max-merge <1..5>            Maximum number of merge candidates. Default %d\n", param->maxNumMergeCand);
190
-    H0("   --ref <integer>               max number of L0 references to be allowed (1 .. 16) Default %d\n", param->maxNumReferences);
191
-    H0("   --limit-refs <0|1|2|3>        Limit references per depth (1) or CU (2) or both (3). Default %d\n", param->limitReferences);
192
-    H0("   --me <string>                 Motion search method dia hex umh star full. Default %d\n", param->searchMethod);
193
-    H0("-m/--subme <integer>             Amount of subpel refinement to perform (0:least .. 7:most). Default %d \n", param->subpelRefine);
194
-    H0("   --merange <integer>           Motion search range. Default %d\n", param->searchRange);
195
-    H0("   --[no-]rect                   Enable rectangular motion partitions Nx2N and 2NxN. Default %s\n", OPT(param->bEnableRectInter));
196
-    H0("   --[no-]amp                    Enable asymmetric motion partitions, requires --rect. Default %s\n", OPT(param->bEnableAMP));
197
-    H0("   --[no-]limit-modes            Limit rectangular and asymmetric motion predictions. Default %d\n", param->limitModes);
198
-    H1("   --[no-]temporal-mvp           Enable temporal MV predictors. Default %s\n", OPT(param->bEnableTemporalMvp));
199
-    H1("   --[no-]hme                    Enable Hierarchical Motion Estimation. Default %s\n", OPT(param->bEnableHME));
200
-    H1("   --hme-search <string>         Motion search-method for HME L0,L1 and L2. Default(L0,L1,L2) is %d,%d,%d\n", param->hmeSearchMethod[0], param->hmeSearchMethod[1], param->hmeSearchMethod[2]);
201