Changes of Revision 14

x265.changes Changed
x
 
1
@@ -1,4 +1,25 @@
2
 -------------------------------------------------------------------
3
+Thu Sep 29 12:26:59 UTC 2016 - idonmez@suse.com
4
+
5
+- Update to version 2.1
6
+  Encoder enhancements
7
+  * Support for qg-size of 8
8
+  * Support for inserting non-IDR I-frames at scenecuts and when
9
+    running with settings for fixed-GOP (min-keyint = max-keyint)
10
+  * Experimental support for slice-parallelism.
11
+  API changes
12
+  * Encode user-define SEI messages passed in through x265_picture
13
+    object.
14
+  * Disable SEI and VUI messages from the bitstream
15
+  * Specify qpmin and qpmax
16
+  * Control number of bits to encode POC.
17
+  Bug fixes
18
+  * QP fluctuation fix for first B-frame in mini-GOP for 2-pass
19
+    encoding with tune-grain.
20
+  * Assembly fix for crashes in 32-bit from dct_sse4.
21
+  * Threadpool creation fix in windows platform.
22
+
23
+-------------------------------------------------------------------
24
 Sun Aug 28 11:51:23 UTC 2016 - joerg.lorenzen@ki.tng.de
25
 
26
 - Update to version 2.0
27
x265.spec Changed
14
 
1
@@ -1,10 +1,10 @@
2
 # based on the spec file from https://build.opensuse.org/package/view_file/home:Simmphonie/libx265/
3
 
4
 Name:           x265
5
-%define soname  87
6
+%define soname  95
7
 %define libname lib%{name}
8
 %define libsoname %{libname}-%{soname}
9
-Version:        2.0
10
+Version:        2.1
11
 Release:        0
12
 License:        GPL-2.0+
13
 Summary:        A free h265/HEVC encoder - encoder binary
14
arm.patch Changed
67
 
1
@@ -1,8 +1,8 @@
2
-Index: x265_2.0/source/CMakeLists.txt
3
+Index: x265_2.1/source/CMakeLists.txt
4
 ===================================================================
5
---- x265_2.0.orig/source/CMakeLists.txt
6
-+++ x265_2.0/source/CMakeLists.txt
7
-@@ -60,15 +60,22 @@
8
+--- x265_2.1.orig/source/CMakeLists.txt
9
++++ x265_2.1/source/CMakeLists.txt
10
+@@ -60,15 +60,22 @@ elseif(POWERMATCH GREATER "-1")
11
      message(STATUS "Detected POWER target processor")
12
      set(POWER 1)
13
      add_definitions(-DX265_ARCH_POWER=1)
14
@@ -34,8 +34,8 @@
15
  else()
16
      message(STATUS "CMAKE_SYSTEM_PROCESSOR value `${CMAKE_SYSTEM_PROCESSOR}` is unknown")
17
      message(STATUS "Please add this value near ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE}")
18
-@@ -186,18 +193,9 @@
19
-             add_definitions(-march=i686)
20
+@@ -190,18 +197,9 @@ if(GCC)
21
+             endif()
22
          endif()
23
      endif()
24
 -    if(ARM AND CROSS_COMPILE_ARM)
25
@@ -48,18 +48,17 @@
26
 -        else()
27
 -            set(ARM_ARGS -mcpu=native -mfloat-abi=hard -mfpu=vfp -marm)
28
 -        endif()
29
--    endif()
30
--    add_definitions(${ARM_ARGS})
31
 +    if(ARMV7)
32
 +        add_definitions(-fPIC)
33
-+    endif()
34
+     endif()
35
+-    add_definitions(${ARM_ARGS})
36
      if(FPROFILE_GENERATE)
37
          if(INTEL_CXX)
38
              add_definitions(-prof-gen -prof-dir="${CMAKE_CURRENT_BINARY_DIR}")
39
-Index: x265_2.0/source/common/cpu.cpp
40
+Index: x265_2.1/source/common/cpu.cpp
41
 ===================================================================
42
---- x265_2.0.orig/source/common/cpu.cpp
43
-+++ x265_2.0/source/common/cpu.cpp
44
+--- x265_2.1.orig/source/common/cpu.cpp
45
++++ x265_2.1/source/common/cpu.cpp
46
 @@ -37,7 +37,7 @@
47
  #include <machine/cpu.h>
48
  #endif
49
@@ -69,7 +68,7 @@
50
  #include <signal.h>
51
  #include <setjmp.h>
52
  static sigjmp_buf jmpbuf;
53
-@@ -340,7 +340,6 @@
54
+@@ -340,7 +340,6 @@ uint32_t cpu_detect(void)
55
      }
56
  
57
      canjump = 1;
58
@@ -77,7 +76,7 @@
59
      canjump = 0;
60
      signal(SIGILL, oldsig);
61
  #endif // if !HAVE_NEON
62
-@@ -356,7 +355,7 @@
63
+@@ -356,7 +355,7 @@ uint32_t cpu_detect(void)
64
      // which may result in incorrect detection and the counters stuck enabled.
65
      // right now Apple does not seem to support performance counters for this test
66
  #ifndef __MACH__
67
x265_2.0.tar.gz/.hg_archival.txt -> x265_2.1.tar.gz/.hg_archival.txt Changed
10
 
1
@@ -1,4 +1,6 @@
2
 repo: 09fe40627f03a0f9c3e6ac78b22ac93da23f9fdf
3
-node: 960c9991d0dcf46559c32e070418d3cbb7e8aa2f
4
+node: 3e8ce3b26319dbd53ab6369e4c4e986bf30f1315
5
 branch: stable
6
-tag: 2.0
7
+latesttag: 2.1
8
+latesttagdistance: 1
9
+changessincelatesttag: 1
10
x265_2.0.tar.gz/.hgtags -> x265_2.1.tar.gz/.hgtags Changed
7
 
1
@@ -18,3 +18,5 @@
2
 8425278def1edf0931dc33fc518e1950063e76b0 1.7
3
 e27327f5da35c5feb660360336fdc94bd0afe719 1.8
4
 1d3b6e448e01ec40b392ef78b7e55a86249fbe68 1.9
5
+960c9991d0dcf46559c32e070418d3cbb7e8aa2f 2.0
6
+981e3bfef16a997bce6f46ce1b15631a0e234747 2.1
7
x265_2.0.tar.gz/doc/reST/cli.rst -> x265_2.1.tar.gz/doc/reST/cli.rst Changed
196
 
1
@@ -59,10 +59,9 @@
2
 
3
 .. option:: --log-level <integer|string>
4
 
5
-   Logging level. Debug level enables per-frame QP, metric, and bitrate
6
-   logging. If a CSV file is being generated, frame level makes the log
7
-   be per-frame rather than per-encode. Full level enables hash and
8
-   weight logging. -1 disables all logging, except certain fatal
9
+   Controls the level of information displayed on the console. Debug level
10
+   enables per-frame QP, metric, and bitrate logging. Full level enables
11
+   hash and weight logging. -1 disables all logging, except certain fatal
12
    errors, and can be specified by the string "none".
13
 
14
    0. error
15
@@ -79,8 +78,8 @@
16
 
17
 .. option:: --csv <filename>
18
 
19
-   Writes encoding results to a comma separated value log file. Creates
20
-   the file if it doesnt already exist. If :option:`--csv-log-level` is 0, 
21
+   Write encoding statistics to a Comma Separated Values log file. Creates
22
+   the file if it doesn't already exist. If :option:`--csv-log-level` is 0, 
23
    it adds one line per run. If :option:`--csv-log-level` is greater than
24
    0, it writes one line per frame. Default none
25
 
26
@@ -128,12 +127,13 @@
27
 
28
 .. option:: --csv-log-level <integer>
29
 
30
-        CSV logging level. Default 0
31
-        0. summary
32
-        1. frame level logging
33
-        2. frame level logging with performance statistics
34
+    Controls the level of detail (and size) of --csv log files
35
+       
36
+    0. summary **(default)**
37
+    1. frame level logging
38
+    2. frame level logging with performance statistics
39
 
40
-        **CLI ONLY**
41
+    **CLI ONLY**
42
 
43
 .. option:: --ssim, --no-ssim
44
 
45
@@ -334,6 +334,17 @@
46
 
47
    **Values:** psnr, ssim, grain, zero-latency, fast-decode.
48
 
49
+.. option:: --slices <integer>
50
+
51
+   Encode each incoming frame as multiple parallel slices that may be decoded
52
+   independently. Support available only for rectangular slices that cover the
53
+   entire width of the image. 
54
+
55
+   Recommended for improving encoder performance only if frame-parallelism and
56
+   WPP are unable to maximize utilization on given hardware.
57
+
58
+   Default: 1 slice per frame. **Experimental feature**
59
+
60
 Input/Output File Options
61
 =========================
62
 
63
@@ -474,21 +485,22 @@
64
 
65
    8bit profiles::
66
 
67
-   main, main-intra, mainstillpicture (or msp for short)
68
-   main444-8 main444-intra main444-stillpicture
69
+   * main, main-intra, mainstillpicture (or msp for short)
70
+   * main444-8, main444-intra, main444-stillpicture
71
+
72
    See note below on signaling intra and stillpicture profiles.
73
    
74
    10bit profiles::
75
 
76
-   main10, main10-intra
77
-   main422-10, main422-10-intra
78
-   main444-10, main444-10-intra
79
+   * main10, main10-intra
80
+   * main422-10, main422-10-intra
81
+   * main444-10, main444-10-intra
82
 
83
    12bit profiles::
84
 
85
-   main12, main12-intra
86
-   main422-12, main422-12-intra
87
-   main444-12, main444-12-intra
88
+   * main12, main12-intra
89
+   * main422-12, main422-12-intra
90
+   * main444-12, main444-12-intra
91
 
92
 
93
    **CLI ONLY**
94
@@ -1009,6 +1021,11 @@
95
 
96
    Enable weighted prediction in B slices. Default disabled
97
 
98
+.. option:: --analyze-src-pics, --no-analyze-src-pics
99
+
100
+    Enalbe motion estimation with source frame pixels, in this mode, 
101
+    motion estimation can be computed independently. Default disabled.
102
+
103
 Spatial/intra options
104
 =====================
105
 
106
@@ -1123,11 +1140,9 @@
107
 
108
 .. option:: --min-keyint, -i <integer>
109
 
110
-   Minimum GOP size. Scenecuts closer together than this are coded as I
111
-   or P, not IDR. Minimum keyint is clamped to be at least half of
112
-   :option:`--keyint`. If you wish to force regular keyframe intervals
113
-   and disable adaptive I frame placement, you must use
114
-   :option:`--no-scenecut`.
115
+   Minimum GOP size. Scenecuts beyond this interval are coded as IDR and start
116
+   a new keyframe, while scenecuts closer together are coded as I or P. For
117
+   fixed keyframe interval, set value to be equal to keyint.
118
 
119
    **Range of values:** >=0 (0: auto)
120
 
121
@@ -1314,20 +1329,25 @@
122
    0. disabled
123
    1. AQ enabled **(default)**
124
    2. AQ enabled with auto-variance
125
-   3. AQ enabled with auto-variance and bias to dark scenes
126
+   3. AQ enabled with auto-variance and bias to dark scenes. This is 
127
+   recommended for 8-bit encodes or low-bitrate 10-bit encodes, to 
128
+   prevent color banding/blocking. 
129
 
130
 .. option:: --aq-strength <float>
131
 
132
    Adjust the strength of the adaptive quantization offsets. Setting
133
-   :option:`--aq-strength` to 0 disables AQ. Default 1.0.
134
+   :option:`--aq-strength` to 0 disables AQ. At aq-modes 2 and 3, high 
135
+   aq-strengths will lead to high QP offsets resulting in a large 
136
+   difference in achieved bitrates. 
137
 
138
+   Default 1.0.
139
    **Range of values:** 0.0 to 3.0
140
 
141
-.. option:: --qg-size <64|32|16>
142
+.. option:: --qg-size <64|32|16|8>
143
 
144
    Enable adaptive quantization for sub-CTUs. This parameter specifies 
145
    the minimum CU size at which QP can be adjusted, ie. Quantization Group
146
-   size. Allowed range of values are 64, 32, 16 provided this falls within 
147
+   size. Allowed range of values are 64, 32, 16, 8 provided this falls within 
148
    the inclusive range [maxCUSize, minCUSize]. Experimental.
149
    Default: same as maxCUSize
150
 
151
@@ -1434,6 +1454,14 @@
152
    The maximum single adjustment in QP allowed to rate control. Default
153
    4
154
    
155
+.. option:: --qpmin <integer>
156
+
157
+   sets a hard lower limit on QP allowed to ratecontrol. Default 0
158
+
159
+.. option:: --qpmax <integer>
160
+
161
+   sets a hard upper limit on QP allowed to ratecontrol. Default 69
162
+   
163
 .. option:: --rc-grain, --no-rc-grain
164
 
165
    Enables a specialised ratecontrol algorithm for film grain content. This 
166
@@ -1722,7 +1750,7 @@
167
    Example for MaxCLL=1000 candela per square meter, MaxFALL=400
168
    candela per square meter:
169
 
170
-       --max-cll “1000,400”
171
+       --max-cll “1000,400”
172
 
173
    Note that this string value will need to be escaped or quoted to
174
    protect against shell expansion on many platforms. No default.
175
@@ -1801,6 +1829,20 @@
176
    PbBbP. You probably also want :option:`--no-scenecut` and a keyframe
177
    interval that is a multiple of 4.
178
 
179
+.. option:: --log2-max-poc-lsb <integer>
180
+
181
+  Maximum of the picture order count. Default 8
182
+
183
+.. option:: --discard-sei
184
+
185
+  Discard SEI messages generated from the final bitstream. HDR-related SEI
186
+  messages are always dumped, immaterial of this option. Default disabled.
187
+   
188
+.. option:: --discard-vui
189
+
190
+   Discard optional VUI information (timing, HRD info) from the
191
+   bitstream. Default disabled.
192
+
193
 Debugging options
194
 =================
195
 
196
x265_2.0.tar.gz/source/CMakeLists.txt -> x265_2.1.tar.gz/source/CMakeLists.txt Changed
21
 
1
@@ -30,7 +30,7 @@
2
 mark_as_advanced(FPROFILE_USE FPROFILE_GENERATE NATIVE_BUILD)
3
 
4
 # X265_BUILD must be incremented each time the public API is changed
5
-set(X265_BUILD 87)
6
+set(X265_BUILD 95)
7
 configure_file("${PROJECT_SOURCE_DIR}/x265.def.in"
8
                "${PROJECT_BINARY_DIR}/x265.def")
9
 configure_file("${PROJECT_SOURCE_DIR}/x265_config.h.in"
10
@@ -184,6 +184,10 @@
11
         string(FIND "${CMAKE_CXX_FLAGS}" "-march" marchPos)
12
         if(marchPos LESS "0")
13
             add_definitions(-march=i686)
14
+            if(WIN32 AND NOT INTEL_CXX AND NOT CLANG AND
15
+               CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 6.0 AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0)
16
+                add_definitions(-mpreferred-stack-boundary=2)
17
+            endif()
18
         endif()
19
     endif()
20
     if(ARM AND CROSS_COMPILE_ARM)
21
x265_2.0.tar.gz/source/common/arm/asm-primitives.cpp -> x265_2.1.tar.gz/source/common/arm/asm-primitives.cpp Changed
9
 
1
@@ -1010,6 +1010,7 @@
2
         p.cu[BLOCK_16x16].dct = PFX(dct_16x16_neon);
3
 #if !HIGH_BIT_DEPTH
4
         p.cu[BLOCK_4x4].psy_cost_pp = PFX(psyCost_4x4_neon);
5
+        p.cu[BLOCK_8x8].psy_cost_pp = PFX(psyCost_8x8_neon);
6
 #endif // !HIGH_BIT_DEPTH
7
     }
8
     if (cpuMask & X265_CPU_ARMV6)
9
x265_2.0.tar.gz/source/common/arm/dct-a.S -> x265_2.1.tar.gz/source/common/arm/dct-a.S Changed
24
 
1
@@ -215,9 +215,7 @@
2
     vqrshrn.s32 d22, q13, 2
3
     vqrshrn.s32 d19, q14, 2
4
     vqrshrn.s32 d23, q15, 2
5
-
6
-    vstm r1!, {d16-d23]
7
-
8
+    vstm r1!, {d16-d23}
9
     // bottom half
10
     vld1.16 {q12}, [r0], r2
11
     vld1.16 {q13}, [r0], r2
12
@@ -261,10 +259,8 @@
13
     vqrshrn.s32 d22, q13, 2
14
     vqrshrn.s32 d19, q14, 2
15
     vqrshrn.s32 d23, q15, 2
16
-
17
-    vstm r1, {d16-d23]
18
+    vstm r1, {d16-d23}
19
     mov r1, r3
20
-
21
     // DCT-2D
22
     // left half
23
     vld1.16 {d24}, [r1], r2
24
x265_2.0.tar.gz/source/common/arm/pixel-util.S -> x265_2.1.tar.gz/source/common/arm/pixel-util.S Changed
193
 
1
@@ -2449,3 +2449,191 @@
2
     bx              lr
3
 endfunc
4
 
5
+function x265_psyCost_8x8_neon
6
+
7
+    vpush           {q4-q7}
8
+
9
+    vld1.8          {d0}, [r0], r1
10
+    vld1.8          {d1}, [r0], r1
11
+    vmovl.u8        q8, d0
12
+    vld1.8          {d2}, [r0], r1
13
+    vmovl.u8        q9, d1
14
+    vld1.8          {d3}, [r0], r1
15
+    vmovl.u8        q10, d2
16
+    vld1.8          {d4}, [r0], r1
17
+    vmovl.u8        q11, d3
18
+    vld1.8          {d5}, [r0], r1
19
+    vmovl.u8        q12, d4
20
+    vld1.8          {d6}, [r0], r1
21
+    vmovl.u8        q13, d5
22
+    vld1.8          {d7}, [r0], r1
23
+    vmovl.u8        q14, d6
24
+    vmovl.u8        q15, d7
25
+
26
+    // SAD Stage-0
27
+    vadd.u16        q4, q8, q9
28
+    vadd.u16        q5, q10, q11
29
+    vadd.u16        q6, q12, q13
30
+    vadd.u16        q7, q14, q15
31
+
32
+    // SAD Stage-1
33
+    vadd.u16        q4, q5
34
+    vadd.u16        q6, q7
35
+    vadd.u16        q4, q6
36
+    vpadd.u16       d8, d9
37
+    vpaddl.u16      d8, d8
38
+    vpadd.u32       d8, d8
39
+    vshr.u32        d8, #2
40
+
41
+    // sa8d
42
+    SUMSUB_AB       q0,  q1,  q8,  q9
43
+    SUMSUB_AB       q2,  q3,  q10, q11
44
+    SUMSUB_AB       q8,  q10, q0,  q2
45
+    SUMSUB_AB       q9,  q11, q1,  q3
46
+
47
+    HADAMARD4_V     q12, q13, q14, q15,  q0,  q1,  q2,  q3
48
+
49
+    SUMSUB_ABCD     q0,  q8,  q1,  q9,   q8,  q12, q9,  q13
50
+    SUMSUB_AB       q2,  q10, q10, q14
51
+    vtrn.16         q8,  q9
52
+    SUMSUB_AB       q3,  q11, q11, q15
53
+    vtrn.16         q0,  q1
54
+    SUMSUB_AB       q12, q13, q8,  q9
55
+    vtrn.16         q10, q11
56
+    SUMSUB_AB       q8,  q9,  q0,  q1
57
+    vtrn.16         q2,  q3
58
+    SUMSUB_AB       q14, q15, q10, q11
59
+    vadd.i16        q10, q2,  q3
60
+    vtrn.32         q12, q14
61
+    vsub.i16        q11, q2,  q3
62
+    vtrn.32         q13, q15
63
+    SUMSUB_AB       q0,  q2,  q12, q14
64
+    vtrn.32         q8,  q10
65
+    SUMSUB_AB       q1,  q3,  q13, q15
66
+    vtrn.32         q9,  q11
67
+    SUMSUB_AB       q12, q14, q8,  q10
68
+    SUMSUB_AB       q13, q15, q9,  q11
69
+
70
+    vswp            d1,  d24
71
+    ABS2            q0,  q12
72
+    vswp            d3,  d26
73
+    ABS2            q1,  q13
74
+    vswp            d5,  d28
75
+    ABS2            q2,  q14
76
+    vswp            d7,  d30
77
+    ABS2            q3,  q15
78
+    vmax.s16        q8,  q0,  q12
79
+    vmax.s16        q9,  q1,  q13
80
+    vmax.s16        q10, q2,  q14
81
+    vmax.s16        q11, q3,  q15
82
+    vadd.i16        q8,  q8,  q9
83
+    vadd.i16        q9,  q10, q11
84
+    vadd.u16        q0, q8, q9
85
+    vadd.u16        d0, d1
86
+    vpaddl.u16      d0, d0
87
+    vpadd.u32       d0, d0
88
+    vmov.32         r0, d0[0]
89
+    add             r0, r0, #1
90
+    lsr             r0, r0, #1
91
+//-------------------------------------------------------------
92
+    vld1.8          d0, [r2], r3
93
+    vld1.8          d1, [r2], r3
94
+    vmovl.u8        q8, d0
95
+    vld1.8          d2, [r2], r3
96
+    vmovl.u8        q9, d1
97
+    vld1.8          d3, [r2], r3
98
+    vmovl.u8        q10, d2
99
+    vld1.8          d4, [r2], r3
100
+    vmovl.u8        q11, d3
101
+    vld1.8          d5, [r2], r3
102
+    vmovl.u8        q12, d4
103
+    vld1.8          d6, [r2], r3
104
+    vmovl.u8        q13, d5
105
+    vld1.8          d7, [r2], r3
106
+    vmovl.u8        q14, d6
107
+    vmovl.u8        q15, d7
108
+
109
+    // SAD Stage-0
110
+    vadd.u16       q5, q8, q9
111
+    vadd.u16       q6, q10, q11
112
+    vadd.u16       q7, q12, q13
113
+    vadd.u16       q0, q14, q15
114
+
115
+    // SAD Stage-1
116
+    vadd.u16        q5, q6
117
+    vadd.u16        q7, q0
118
+    vadd.u16        q5, q7
119
+    vadd.u16        d10, d11
120
+    vpaddl.u16      d10, d10
121
+    vpadd.u32       d10, d10
122
+    vshr.u32        d10, #2
123
+
124
+    // sa8d
125
+    SUMSUB_AB       q0,  q1,  q8,  q9
126
+    SUMSUB_AB       q2,  q3,  q10, q11
127
+    SUMSUB_AB       q8,  q10, q0,  q2
128
+    SUMSUB_AB       q9,  q11, q1,  q3
129
+
130
+    HADAMARD4_V     q12, q13, q14, q15,  q0,  q1,  q2,  q3
131
+
132
+    SUMSUB_ABCD     q0,  q8,  q1,  q9,   q8,  q12, q9,  q13
133
+    SUMSUB_AB       q2,  q10, q10, q14
134
+    vtrn.16         q8,  q9
135
+    SUMSUB_AB       q3,  q11, q11, q15
136
+    vtrn.16         q0,  q1
137
+    SUMSUB_AB       q12, q13, q8,  q9
138
+    vtrn.16         q10, q11
139
+    SUMSUB_AB       q8,  q9,  q0,  q1
140
+    vtrn.16         q2,  q3
141
+    SUMSUB_AB       q14, q15, q10, q11
142
+    vadd.i16        q10, q2,  q3
143
+    vtrn.32         q12, q14
144
+    vsub.i16        q11, q2,  q3
145
+    vtrn.32         q13, q15
146
+    SUMSUB_AB       q0,  q2,  q12, q14
147
+    vtrn.32         q8,  q10
148
+    SUMSUB_AB       q1,  q3,  q13, q15
149
+    vtrn.32         q9,  q11
150
+    SUMSUB_AB       q12, q14, q8,  q10
151
+    SUMSUB_AB       q13, q15, q9,  q11
152
+
153
+    vswp            d1,  d24
154
+    ABS2            q0,  q12
155
+    vswp            d3,  d26
156
+    ABS2            q1,  q13
157
+    vswp            d5,  d28
158
+    ABS2            q2,  q14
159
+    vswp            d7,  d30
160
+    ABS2            q3,  q15
161
+    vmax.s16        q8,  q0,  q12
162
+    vmax.s16        q9,  q1,  q13
163
+    vmax.s16        q10, q2,  q14
164
+    vmax.s16        q11, q3,  q15
165
+    vadd.i16        q8,  q8,  q9
166
+    vadd.i16        q9,  q10, q11
167
+    vadd.u16        q0, q8, q9
168
+    vadd.u16        d0, d1
169
+    vpaddl.u16      d0, d0
170
+    vpadd.u32       d0, d0
171
+    vmov.32         r2, d0[0]
172
+    add             r2, r2, #1
173
+    lsr             r2, r2, #1
174
+
175
+    // SAD & SA8D Final Stage
176
+    vmov.32         r1, d8[0]
177
+    sub             r0, r1
178
+    vmov.32         r3, d10[0]
179
+    sub             r2, r3
180
+    cmp             r0, r2
181
+    bgt             subr0
182
+    sub             r0, r2, r0
183
+    b               end
184
+subr0:
185
+    sub             r0, r2
186
+end:
187
+
188
+    vpop            {q4-q7}
189
+    bx              lr
190
+endfunc
191
+
192
+
193
x265_2.0.tar.gz/source/common/arm/pixel-util.h -> x265_2.1.tar.gz/source/common/arm/pixel-util.h Changed
8
 
1
@@ -88,5 +88,6 @@
2
 void x265_ssim_4x4x2_core_neon(const pixel* pix1, intptr_t stride1, const pixel* pix2, intptr_t stride2, int sums[2][4]);
3
 
4
 int PFX(psyCost_4x4_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
5
+int PFX(psyCost_8x8_neon)(const pixel* source, intptr_t sstride, const pixel* recon, intptr_t rstride);
6
 
7
 #endif // ifndef X265_PIXEL_UTIL_ARM_H
8
x265_2.0.tar.gz/source/common/common.h -> x265_2.1.tar.gz/source/common/common.h Changed
36
 
1
@@ -71,6 +71,7 @@
2
 #define NUM_INTRA_MODE 35
3
 
4
 #if defined(__GNUC__)
5
+#define ALIGN_VAR_4(T, var)  T var __attribute__((aligned(4)))
6
 #define ALIGN_VAR_8(T, var)  T var __attribute__((aligned(8)))
7
 #define ALIGN_VAR_16(T, var) T var __attribute__((aligned(16)))
8
 #define ALIGN_VAR_32(T, var) T var __attribute__((aligned(32)))
9
@@ -81,6 +82,7 @@
10
 
11
 #elif defined(_MSC_VER)
12
 
13
+#define ALIGN_VAR_4(T, var)  __declspec(align(4)) T var
14
 #define ALIGN_VAR_8(T, var)  __declspec(align(8)) T var
15
 #define ALIGN_VAR_16(T, var) __declspec(align(16)) T var
16
 #define ALIGN_VAR_32(T, var) __declspec(align(32)) T var
17
@@ -157,7 +159,6 @@
18
 #define MIN_QPSCALE     0.21249999999999999
19
 #define MAX_MAX_QPSCALE 615.46574234477100
20
 
21
-#define BITS_FOR_POC 8
22
 
23
 template<typename T>
24
 inline T x265_min(T a, T b) { return a < b ? a : b; }
25
@@ -255,7 +256,9 @@
26
 #define LOG2_UNIT_SIZE          2                           // log2(unitSize)
27
 #define UNIT_SIZE               (1 << LOG2_UNIT_SIZE)       // unit size of CU partition
28
 
29
-#define MAX_NUM_PARTITIONS      256
30
+#define LOG2_RASTER_SIZE        (MAX_LOG2_CU_SIZE - LOG2_UNIT_SIZE)
31
+#define RASTER_SIZE             (1 << LOG2_RASTER_SIZE)
32
+#define MAX_NUM_PARTITIONS      (RASTER_SIZE * RASTER_SIZE)
33
 #define NUM_4x4_PARTITIONS      (1U << (g_unitSizeDepth << 1)) // number of 4x4 units in max CU size
34
 
35
 #define MIN_PU_SIZE             4
36
x265_2.0.tar.gz/source/common/constants.cpp -> x265_2.1.tar.gz/source/common/constants.cpp Changed
87
 
1
@@ -166,9 +166,48 @@
2
 uint32_t g_maxCUSize     = MAX_CU_SIZE;
3
 uint32_t g_unitSizeDepth = NUM_CU_DEPTH;
4
 uint32_t g_maxCUDepth    = NUM_CU_DEPTH - 1;
5
-uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] = { 0, };
6
-uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] = { 0, };
7
-
8
+uint32_t g_maxSlices     = 1;
9
+
10
+const uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS] =
11
+{
12
+    0x00, 0x01, 0x10, 0x11, 0x02, 0x03, 0x12, 0x13, 0x20, 0x21, 0x30, 0x31, 0x22, 0x23, 0x32, 0x33,
13
+    0x04, 0x05, 0x14, 0x15, 0x06, 0x07, 0x16, 0x17, 0x24, 0x25, 0x34, 0x35, 0x26, 0x27, 0x36, 0x37,
14
+    0x40, 0x41, 0x50, 0x51, 0x42, 0x43, 0x52, 0x53, 0x60, 0x61, 0x70, 0x71, 0x62, 0x63, 0x72, 0x73,
15
+    0x44, 0x45, 0x54, 0x55, 0x46, 0x47, 0x56, 0x57, 0x64, 0x65, 0x74, 0x75, 0x66, 0x67, 0x76, 0x77,
16
+    0x08, 0x09, 0x18, 0x19, 0x0A, 0x0B, 0x1A, 0x1B, 0x28, 0x29, 0x38, 0x39, 0x2A, 0x2B, 0x3A, 0x3B,
17
+    0x0C, 0x0D, 0x1C, 0x1D, 0x0E, 0x0F, 0x1E, 0x1F, 0x2C, 0x2D, 0x3C, 0x3D, 0x2E, 0x2F, 0x3E, 0x3F,
18
+    0x48, 0x49, 0x58, 0x59, 0x4A, 0x4B, 0x5A, 0x5B, 0x68, 0x69, 0x78, 0x79, 0x6A, 0x6B, 0x7A, 0x7B,
19
+    0x4C, 0x4D, 0x5C, 0x5D, 0x4E, 0x4F, 0x5E, 0x5F, 0x6C, 0x6D, 0x7C, 0x7D, 0x6E, 0x6F, 0x7E, 0x7F,
20
+    0x80, 0x81, 0x90, 0x91, 0x82, 0x83, 0x92, 0x93, 0xA0, 0xA1, 0xB0, 0xB1, 0xA2, 0xA3, 0xB2, 0xB3,
21
+    0x84, 0x85, 0x94, 0x95, 0x86, 0x87, 0x96, 0x97, 0xA4, 0xA5, 0xB4, 0xB5, 0xA6, 0xA7, 0xB6, 0xB7,
22
+    0xC0, 0xC1, 0xD0, 0xD1, 0xC2, 0xC3, 0xD2, 0xD3, 0xE0, 0xE1, 0xF0, 0xF1, 0xE2, 0xE3, 0xF2, 0xF3,
23
+    0xC4, 0xC5, 0xD4, 0xD5, 0xC6, 0xC7, 0xD6, 0xD7, 0xE4, 0xE5, 0xF4, 0xF5, 0xE6, 0xE7, 0xF6, 0xF7,
24
+    0x88, 0x89, 0x98, 0x99, 0x8A, 0x8B, 0x9A, 0x9B, 0xA8, 0xA9, 0xB8, 0xB9, 0xAA, 0xAB, 0xBA, 0xBB,
25
+    0x8C, 0x8D, 0x9C, 0x9D, 0x8E, 0x8F, 0x9E, 0x9F, 0xAC, 0xAD, 0xBC, 0xBD, 0xAE, 0xAF, 0xBE, 0xBF,
26
+    0xC8, 0xC9, 0xD8, 0xD9, 0xCA, 0xCB, 0xDA, 0xDB, 0xE8, 0xE9, 0xF8, 0xF9, 0xEA, 0xEB, 0xFA, 0xFB,
27
+    0xCC, 0xCD, 0xDC, 0xDD, 0xCE, 0xCF, 0xDE, 0xDF, 0xEC, 0xED, 0xFC, 0xFD, 0xEE, 0xEF, 0xFE, 0xFF
28
+};
29
+
30
+const uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS] =
31
+{
32
+    0x00, 0x01, 0x04, 0x05, 0x10, 0x11, 0x14, 0x15, 0x40, 0x41, 0x44, 0x45, 0x50, 0x51, 0x54, 0x55,
33
+    0x02, 0x03, 0x06, 0x07, 0x12, 0x13, 0x16, 0x17, 0x42, 0x43, 0x46, 0x47, 0x52, 0x53, 0x56, 0x57,
34
+    0x08, 0x09, 0x0C, 0x0D, 0x18, 0x19, 0x1C, 0x1D, 0x48, 0x49, 0x4C, 0x4D, 0x58, 0x59, 0x5C, 0x5D,
35
+    0x0A, 0x0B, 0x0E, 0x0F, 0x1A, 0x1B, 0x1E, 0x1F, 0x4A, 0x4B, 0x4E, 0x4F, 0x5A, 0x5B, 0x5E, 0x5F,
36
+    0x20, 0x21, 0x24, 0x25, 0x30, 0x31, 0x34, 0x35, 0x60, 0x61, 0x64, 0x65, 0x70, 0x71, 0x74, 0x75,
37
+    0x22, 0x23, 0x26, 0x27, 0x32, 0x33, 0x36, 0x37, 0x62, 0x63, 0x66, 0x67, 0x72, 0x73, 0x76, 0x77,
38
+    0x28, 0x29, 0x2C, 0x2D, 0x38, 0x39, 0x3C, 0x3D, 0x68, 0x69, 0x6C, 0x6D, 0x78, 0x79, 0x7C, 0x7D,
39
+    0x2A, 0x2B, 0x2E, 0x2F, 0x3A, 0x3B, 0x3E, 0x3F, 0x6A, 0x6B, 0x6E, 0x6F, 0x7A, 0x7B, 0x7E, 0x7F,
40
+    0x80, 0x81, 0x84, 0x85, 0x90, 0x91, 0x94, 0x95, 0xC0, 0xC1, 0xC4, 0xC5, 0xD0, 0xD1, 0xD4, 0xD5,
41
+    0x82, 0x83, 0x86, 0x87, 0x92, 0x93, 0x96, 0x97, 0xC2, 0xC3, 0xC6, 0xC7, 0xD2, 0xD3, 0xD6, 0xD7,
42
+    0x88, 0x89, 0x8C, 0x8D, 0x98, 0x99, 0x9C, 0x9D, 0xC8, 0xC9, 0xCC, 0xCD, 0xD8, 0xD9, 0xDC, 0xDD,
43
+    0x8A, 0x8B, 0x8E, 0x8F, 0x9A, 0x9B, 0x9E, 0x9F, 0xCA, 0xCB, 0xCE, 0xCF, 0xDA, 0xDB, 0xDE, 0xDF,
44
+    0xA0, 0xA1, 0xA4, 0xA5, 0xB0, 0xB1, 0xB4, 0xB5, 0xE0, 0xE1, 0xE4, 0xE5, 0xF0, 0xF1, 0xF4, 0xF5,
45
+    0xA2, 0xA3, 0xA6, 0xA7, 0xB2, 0xB3, 0xB6, 0xB7, 0xE2, 0xE3, 0xE6, 0xE7, 0xF2, 0xF3, 0xF6, 0xF7,
46
+    0xA8, 0xA9, 0xAC, 0xAD, 0xB8, 0xB9, 0xBC, 0xBD, 0xE8, 0xE9, 0xEC, 0xED, 0xF8, 0xF9, 0xFC, 0xFD,
47
+    0xAA, 0xAB, 0xAE, 0xAF, 0xBA, 0xBB, 0xBE, 0xBF, 0xEA, 0xEB, 0xEE, 0xEF, 0xFA, 0xFB, 0xFE, 0xFF
48
+};
49
+    
50
 const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS] =
51
 {
52
     0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12,
53
@@ -209,33 +248,6 @@
54
     48, 48, 52, 52, 48, 48, 52, 52, 56, 56, 60, 60, 56, 56, 60, 60
55
 };
56
 
57
-void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx)
58
-{
59
-    uint32_t stride = 1 << maxFullDepth;
60
-
61
-    if (depth > maxFullDepth)
62
-    {
63
-        curIdx[0] = startVal;
64
-        curIdx++;
65
-    }
66
-    else
67
-    {
68
-        int step = stride >> depth;
69
-        initZscanToRaster(maxFullDepth, depth + 1, startVal,                        curIdx);
70
-        initZscanToRaster(maxFullDepth, depth + 1, startVal + step,                 curIdx);
71
-        initZscanToRaster(maxFullDepth, depth + 1, startVal + step * stride,        curIdx);
72
-        initZscanToRaster(maxFullDepth, depth + 1, startVal + step * stride + step, curIdx);
73
-    }
74
-}
75
-
76
-void initRasterToZscan(uint32_t maxFullDepth)
77
-{
78
-    uint32_t numPartitions = 1 << (maxFullDepth * 2);
79
-
80
-    for (uint32_t i = 0; i < numPartitions; i++)
81
-        g_rasterToZscan[g_zscanToRaster[i]] = i;
82
-}
83
-
84
 const int16_t g_lumaFilter[4][NTAPS_LUMA] =
85
 {
86
     {  0, 0,   0, 64,  0,   0, 0,  0 },
87
x265_2.0.tar.gz/source/common/constants.h -> x265_2.1.tar.gz/source/common/constants.h Changed
40
 
1
@@ -32,9 +32,6 @@
2
 
3
 extern int g_ctuSizeConfigured;
4
 
5
-void initZscanToRaster(uint32_t maxFullDepth, uint32_t depth, uint32_t startVal, uint32_t*& curIdx);
6
-void initRasterToZscan(uint32_t maxFullDepth);
7
-
8
 extern double x265_lambda_tab[QP_MAX_MAX + 1];
9
 extern double x265_lambda2_tab[QP_MAX_MAX + 1];
10
 extern const uint16_t x265_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET + 1];
11
@@ -46,8 +43,8 @@
12
 extern const uint8_t g_chroma422IntraAngleMappingTable[AngleMapping422TableSize];
13
 
14
 // flexible conversion from relative to absolute index
15
-extern uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS];
16
-extern uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS];
17
+extern const uint32_t g_zscanToRaster[MAX_NUM_PARTITIONS];
18
+extern const uint32_t g_rasterToZscan[MAX_NUM_PARTITIONS];
19
 
20
 // conversion of partition index to picture pel position
21
 extern const uint8_t g_zscanToPelX[MAX_NUM_PARTITIONS];
22
@@ -59,6 +56,7 @@
23
 extern uint32_t g_maxCUSize;
24
 extern uint32_t g_maxCUDepth;
25
 extern uint32_t g_unitSizeDepth; // Depth at which 4x4 unit occurs from max CU size
26
+extern uint32_t g_maxSlices; // number of Slices
27
 
28
 extern const int16_t g_t4[4][4];
29
 extern const int16_t g_t8[8][8];
30
@@ -84,8 +82,7 @@
31
 extern const uint16_t* const g_scanOrder[NUM_SCAN_TYPE][NUM_SCAN_SIZE];
32
 extern const uint16_t* const g_scanOrderCG[NUM_SCAN_TYPE][NUM_SCAN_SIZE];
33
 extern const uint16_t g_scan8x8diag[8 * 8];
34
-extern const uint16_t g_scan4x4[NUM_SCAN_TYPE + 1][4 * 4];  // +1 for safe buffer area for codeCoeffNxN assembly optimize, there have up to 15 bytes beyond bound read
35
-
36
+ALIGN_VAR_16(extern const uint16_t, g_scan4x4[NUM_SCAN_TYPE + 1][4 * 4]);  // +1 for safe buffer area for codeCoeffNxN assembly optimize, there have up to 15 bytes beyond bound read
37
 extern const uint8_t g_lastCoeffTable[32];
38
 extern const uint8_t g_goRiceRange[5]; // maximum value coded with Rice codes
39
 
40
x265_2.0.tar.gz/source/common/cudata.cpp -> x265_2.1.tar.gz/source/common/cudata.cpp Changed
201
 
1
@@ -58,51 +58,46 @@
2
 // file private namespace
3
 
4
 /* Check whether 2 addresses point to the same column */
5
-inline bool isEqualCol(int addrA, int addrB, int numUnits)
6
+inline bool isEqualCol(int addrA, int addrB)
7
 {
8
-    // addrA % numUnits == addrB % numUnits
9
-    return ((addrA ^ addrB) &  (numUnits - 1)) == 0;
10
+    return ((addrA ^ addrB) & (RASTER_SIZE - 1)) == 0;
11
 }
12
 
13
 /* Check whether 2 addresses point to the same row */
14
-inline bool isEqualRow(int addrA, int addrB, int numUnits)
15
+inline bool isEqualRow(int addrA, int addrB)
16
 {
17
-    // addrA / numUnits == addrB / numUnits
18
-    return ((addrA ^ addrB) & ~(numUnits - 1)) == 0;
19
+    return ((addrA ^ addrB) < RASTER_SIZE);
20
 }
21
 
22
 /* Check whether 2 addresses point to the same row or column */
23
-inline bool isEqualRowOrCol(int addrA, int addrB, int numUnits)
24
+inline bool isEqualRowOrCol(int addrA, int addrB)
25
 {
26
-    return isEqualCol(addrA, addrB, numUnits) | isEqualRow(addrA, addrB, numUnits);
27
+    return isEqualCol(addrA, addrB) | isEqualRow(addrA, addrB);
28
 }
29
 
30
 /* Check whether one address points to the first column */
31
-inline bool isZeroCol(int addr, int numUnits)
32
+inline bool isZeroCol(int addr)
33
 {
34
-    // addr % numUnits == 0
35
-    return (addr & (numUnits - 1)) == 0;
36
+    return (addr & (RASTER_SIZE - 1)) == 0;
37
 }
38
 
39
 /* Check whether one address points to the first row */
40
-inline bool isZeroRow(int addr, int numUnits)
41
+inline bool isZeroRow(int addr)
42
 {
43
-    // addr / numUnits == 0
44
-    return (addr & ~(numUnits - 1)) == 0;
45
+    return (addr < RASTER_SIZE);
46
 }
47
 
48
 /* Check whether one address points to a column whose index is smaller than a given value */
49
-inline bool lessThanCol(int addr, int val, int numUnits)
50
+inline bool lessThanCol(int addr, int val)
51
 {
52
-    // addr % numUnits < val
53
-    return (addr & (numUnits - 1)) < val;
54
+    return (addr & (RASTER_SIZE - 1)) < val;
55
 }
56
 
57
 /* Check whether one address points to a row whose index is smaller than a given value */
58
-inline bool lessThanRow(int addr, int val, int numUnits)
59
+inline bool lessThanRow(int addr, int val)
60
 {
61
     // addr / numUnits < val
62
-    return addr < val * numUnits;
63
+    return (addr >> LOG2_RASTER_SIZE) < val;
64
 }
65
 
66
 inline MV scaleMv(MV mv, int scale)
67
@@ -271,7 +266,7 @@
68
     }
69
 }
70
 
71
-void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp)
72
+void CUData::initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCuInSlice)
73
 {
74
     m_encData       = frame.m_encData;
75
     m_slice         = m_encData->m_slice;
76
@@ -280,6 +275,9 @@
77
     m_cuPelY        = (cuAddr / m_slice->m_sps->numCuInWidth) << g_maxLog2CUSize;
78
     m_absIdxInCTU   = 0;
79
     m_numPartitions = NUM_4x4_PARTITIONS;
80
+    m_bFirstRowInSlice = (uint8_t)firstRowInSlice;
81
+    m_bLastRowInSlice  = (uint8_t)lastRowInSlice;
82
+    m_bLastCuInSlice   = (uint8_t)lastCuInSlice;
83
 
84
     /* sequential memsets */
85
     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
86
@@ -300,7 +298,7 @@
87
 
88
     uint32_t widthInCU = m_slice->m_sps->numCuInWidth;
89
     m_cuLeft = (m_cuAddr % widthInCU) ? m_encData->getPicCTU(m_cuAddr - 1) : NULL;
90
-    m_cuAbove = (m_cuAddr / widthInCU) ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
91
+    m_cuAbove = (m_cuAddr >= widthInCU) && !m_bFirstRowInSlice ? m_encData->getPicCTU(m_cuAddr - widthInCU) : NULL;
92
     m_cuAboveLeft = (m_cuLeft && m_cuAbove) ? m_encData->getPicCTU(m_cuAddr - widthInCU - 1) : NULL;
93
     m_cuAboveRight = (m_cuAbove && ((m_cuAddr % widthInCU) < (widthInCU - 1))) ? m_encData->getPicCTU(m_cuAddr - widthInCU + 1) : NULL;
94
 }
95
@@ -318,6 +316,10 @@
96
     m_cuAbove       = ctu.m_cuAbove;
97
     m_cuAboveLeft   = ctu.m_cuAboveLeft;
98
     m_cuAboveRight  = ctu.m_cuAboveRight;
99
+    m_bFirstRowInSlice = ctu.m_bFirstRowInSlice;
100
+    m_bLastRowInSlice = ctu.m_bLastRowInSlice;
101
+    m_bLastCuInSlice = ctu.m_bLastCuInSlice;
102
+
103
     X265_CHECK(m_numPartitions == cuGeom.numPartitions, "initSubCU() size mismatch\n");
104
 
105
     m_partSet((uint8_t*)m_qp, (uint8_t)qp);
106
@@ -341,6 +343,9 @@
107
 
108
     uint32_t offset = childGeom.numPartitions * subPartIdx;
109
 
110
+    m_bFirstRowInSlice = subCU.m_bFirstRowInSlice;
111
+    m_bLastCuInSlice = subCU.m_bLastCuInSlice;
112
+
113
     m_subPartCopy((uint8_t*)m_qp + offset, (uint8_t*)subCU.m_qp);
114
     m_subPartCopy(m_log2CUSize + offset, subCU.m_log2CUSize);
115
     m_subPartCopy(m_lumaIntraDir + offset, subCU.m_lumaIntraDir);
116
@@ -561,11 +566,11 @@
117
 {
118
     uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
119
 
120
-    if (!isZeroCol(absPartIdx, s_numPartInCUSize))
121
+    if (!isZeroCol(absPartIdx))
122
     {
123
         uint32_t absZorderCUIdx   = g_zscanToRaster[m_absIdxInCTU];
124
         lPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
125
-        if (isEqualCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
126
+        if (isEqualCol(absPartIdx, absZorderCUIdx))
127
             return m_encData->getPicCTU(m_cuAddr);
128
         else
129
         {
130
@@ -582,18 +587,18 @@
131
 {
132
     uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
133
 
134
-    if (!isZeroRow(absPartIdx, s_numPartInCUSize))
135
+    if (!isZeroRow(absPartIdx))
136
     {
137
         uint32_t absZorderCUIdx = g_zscanToRaster[m_absIdxInCTU];
138
-        aPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize];
139
-        if (isEqualRow(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
140
+        aPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE];
141
+        if (isEqualRow(absPartIdx, absZorderCUIdx))
142
             return m_encData->getPicCTU(m_cuAddr);
143
         else
144
             aPartUnitIdx -= m_absIdxInCTU;
145
         return this;
146
     }
147
 
148
-    aPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_4x4_PARTITIONS - s_numPartInCUSize];
149
+    aPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE)];
150
     return m_cuAbove;
151
 }
152
 
153
@@ -601,13 +606,13 @@
154
 {
155
     uint32_t absPartIdx = g_zscanToRaster[curPartUnitIdx];
156
 
157
-    if (!isZeroCol(absPartIdx, s_numPartInCUSize))
158
+    if (!isZeroCol(absPartIdx))
159
     {
160
-        if (!isZeroRow(absPartIdx, s_numPartInCUSize))
161
+        if (!isZeroRow(absPartIdx))
162
         {
163
             uint32_t absZorderCUIdx  = g_zscanToRaster[m_absIdxInCTU];
164
-            alPartUnitIdx = g_rasterToZscan[absPartIdx - s_numPartInCUSize - 1];
165
-            if (isEqualRowOrCol(absPartIdx, absZorderCUIdx, s_numPartInCUSize))
166
+            alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE - 1];
167
+            if (isEqualRowOrCol(absPartIdx, absZorderCUIdx))
168
                 return m_encData->getPicCTU(m_cuAddr);
169
             else
170
             {
171
@@ -615,17 +620,17 @@
172
                 return this;
173
             }
174
         }
175
-        alPartUnitIdx = g_rasterToZscan[absPartIdx + NUM_4x4_PARTITIONS - s_numPartInCUSize - 1];
176
+        alPartUnitIdx = g_rasterToZscan[absPartIdx + ((s_numPartInCUSize - 1) << LOG2_RASTER_SIZE) - 1];
177
         return m_cuAbove;
178
     }
179
 
180
-    if (!isZeroRow(absPartIdx, s_numPartInCUSize))
181
+    if (!isZeroRow(absPartIdx))
182
     {
183
-        alPartUnitIdx = g_rasterToZscan[absPartIdx - 1];
184
+        alPartUnitIdx = g_rasterToZscan[absPartIdx - RASTER_SIZE + s_numPartInCUSize - 1];
185
         return m_cuLeft;
186
     }
187
 
188
-    alPartUnitIdx = g_rasterToZscan[NUM_4x4_PARTITIONS - 1];
189
+    alPartUnitIdx = NUM_4x4_PARTITIONS - 1;
190
     return m_cuAboveLeft;
191
 }
192
 
193
@@ -636,15 +641,15 @@
194
 
195
     uint32_t absPartIdxRT = g_zscanToRaster[curPartUnitIdx];
196
 
197
-    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1, s_numPartInCUSize))
198
+    if (lessThanCol(absPartIdxRT, s_numPartInCUSize - 1))
199
     {
200
-        if (!isZeroRow(absPartIdxRT, s_numPartInCUSize))
201
x265_2.0.tar.gz/source/common/cudata.h -> x265_2.1.tar.gz/source/common/cudata.h Changed
22
 
1
@@ -180,6 +180,11 @@
2
     uint32_t      m_hChromaShift;
3
     uint32_t      m_vChromaShift;
4
 
5
+    /* multiple slices informations */
6
+    uint8_t      m_bFirstRowInSlice;
7
+    uint8_t      m_bLastRowInSlice;
8
+    uint8_t      m_bLastCuInSlice;
9
+
10
     /* Per-part data, stored contiguously */
11
     int8_t*       m_qp;               // array of QP values
12
     uint8_t*      m_log2CUSize;       // array of cu log2Size TODO: seems redundant to depth
13
@@ -214,7 +219,7 @@
14
     void     initialize(const CUDataMemPool& dataPool, uint32_t depth, int csp, int instance);
15
     static void calcCTUGeoms(uint32_t ctuWidth, uint32_t ctuHeight, uint32_t maxCUSize, uint32_t minCUSize, CUGeom cuDataArray[CUGeom::MAX_GEOMS]);
16
 
17
-    void     initCTU(const Frame& frame, uint32_t cuAddr, int qp);
18
+    void     initCTU(const Frame& frame, uint32_t cuAddr, int qp, uint32_t firstRowInSlice, uint32_t lastRowInSlice, uint32_t lastCUInSlice);
19
     void     initSubCU(const CUData& ctu, const CUGeom& cuGeom, int qp);
20
     void     initLosslessCU(const CUData& cu, const CUGeom& cuGeom);
21
 
22
x265_2.0.tar.gz/source/common/deblock.cpp -> x265_2.1.tar.gz/source/common/deblock.cpp Changed
105
 
1
@@ -90,7 +90,7 @@
2
     uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
3
     setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
4
     setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
5
-    setEdgefilterMultiple(cu, absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
6
+    setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
7
 
8
     uint32_t numParts = cuGeom.numPartitions;
9
     for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
10
@@ -114,22 +114,20 @@
11
     }
12
 }
13
 
14
-static inline uint32_t calcBsIdx(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
15
+static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
16
 {
17
-    uint32_t numUnits = cu->m_slice->m_sps->numPartInCUSize;
18
-
19
     if (dir)
20
-        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + edgeIdx * numUnits + baseUnitIdx];
21
+        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
22
     else
23
-        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + baseUnitIdx * numUnits + edgeIdx];
24
+        return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
25
 }
26
 
27
-void Deblock::setEdgefilterMultiple(const CUData* cu, uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
28
+void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
29
 {
30
     X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
31
     for (uint32_t i = 0; i < numUnits; i++)
32
     {
33
-        const uint32_t bsidx = calcBsIdx(cu, scanIdx, dir, edgeIdx, i);
34
+        const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
35
         blockStrength[bsidx] = value;
36
     }
37
 }
38
@@ -145,8 +143,8 @@
39
         return;
40
     }
41
 
42
-    uint32_t numUnits  = 1 << (log2TrSize - LOG2_UNIT_SIZE);
43
-    setEdgefilterMultiple(cu, absPartIdx, dir, 0, 2, blockStrength, numUnits);
44
+    uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
45
+    setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
46
 }
47
 
48
 void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
49
@@ -158,30 +156,30 @@
50
     {
51
     case SIZE_2NxN:
52
         if (EDGE_HOR == dir)
53
-            setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
54
+            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
55
         break;
56
     case SIZE_Nx2N:
57
         if (EDGE_VER == dir)
58
-            setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
59
+            setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
60
         break;
61
     case SIZE_NxN:
62
-        setEdgefilterMultiple(cu, absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
63
+        setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
64
         break;
65
     case SIZE_2NxnU:
66
         if (EDGE_HOR == dir)
67
-            setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
68
+            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
69
         break;
70
     case SIZE_nLx2N:
71
         if (EDGE_VER == dir)
72
-            setEdgefilterMultiple(cu, absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
73
+            setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
74
         break;
75
     case SIZE_2NxnD:
76
         if (EDGE_HOR == dir)
77
-            setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
78
+            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
79
         break;
80
     case SIZE_nRx2N:
81
         if (EDGE_VER == dir)
82
-            setEdgefilterMultiple(cu, absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
83
+            setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
84
         break;
85
 
86
     case SIZE_2Nx2N:
87
@@ -350,7 +348,7 @@
88
     uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
89
     for (uint32_t idx = 0; idx < numUnits; idx++)
90
     {
91
-        uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx);
92
+        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
93
         uint32_t bs = blockStrength[partQ];
94
 
95
         if (!bs)
96
@@ -461,7 +459,7 @@
97
     uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
98
     for (uint32_t idx = 0; idx < numUnits; idx++)
99
     {
100
-        uint32_t partQ = calcBsIdx(cuQ, absPartIdx, dir, edge, idx << chromaShift);
101
+        uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
102
         uint32_t bs = blockStrength[partQ];
103
 
104
         if (bs <= 1)
105
x265_2.0.tar.gz/source/common/deblock.h -> x265_2.1.tar.gz/source/common/deblock.h Changed
10
 
1
@@ -48,7 +48,7 @@
2
     // set filtering functions
3
     static void setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[]);
4
     static void setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits);
5
-    static void setEdgefilterMultiple(const CUData* cu, uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits);
6
+    static void setEdgefilterMultiple(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits);
7
 
8
     // get filtering functions
9
     static uint8_t getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[]);
10
x265_2.0.tar.gz/source/common/frame.cpp -> x265_2.1.tar.gz/source/common/frame.cpp Changed
77
 
1
@@ -2,6 +2,7 @@
2
 * Copyright (C) 2013 x265 project
3
 *
4
 * Author: Steve Borho <steve@borho.org>
5
+*         Min Chen <chenm003@163.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
@@ -32,7 +33,7 @@
10
 {
11
     m_bChromaExtended = false;
12
     m_lowresInit = false;
13
-    m_reconRowCount.set(0);
14
+    m_reconRowFlag = NULL;
15
     m_reconColCount = NULL;
16
     m_countRefEncoders = 0;
17
     m_encData = NULL;
18
@@ -41,6 +42,8 @@
19
     m_next = NULL;
20
     m_prev = NULL;
21
     m_param = NULL;
22
+    m_userSEI.numPayloads = 0;
23
+    m_userSEI.payloads = NULL;
24
     memset(&m_lowres, 0, sizeof(m_lowres));
25
     m_rcData = NULL;
26
 }
27
@@ -52,15 +55,20 @@
28
     CHECKED_MALLOC_ZERO(m_rcData, RcStats, 1);
29
 
30
     if (m_fencPic->create(param->sourceWidth, param->sourceHeight, param->internalCsp) &&
31
-        m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode))
32
+        m_lowres.create(m_fencPic, param->bframes, !!param->rc.aqMode, param->rc.qgSize))
33
     {
34
         X265_CHECK((m_reconColCount == NULL), "m_reconColCount was initialized");
35
         m_numRows = (m_fencPic->m_picHeight + g_maxCUSize - 1)  / g_maxCUSize;
36
+        m_reconRowFlag = new ThreadSafeInteger[m_numRows];
37
         m_reconColCount = new ThreadSafeInteger[m_numRows];
38
 
39
         if (quantOffsets)
40
         {
41
-            int32_t cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
42
+            int32_t cuCount;
43
+            if (param->rc.qgSize == 8)
44
+                cuCount = m_lowres.maxBlocksInRowFullRes * m_lowres.maxBlocksInColFullRes;
45
+            else
46
+                cuCount = m_lowres.maxBlocksInRow * m_lowres.maxBlocksInCol;
47
             m_quantOffsets = new float[cuCount];
48
         }
49
         return true;
50
@@ -132,6 +140,12 @@
51
         m_reconPic = NULL;
52
     }
53
 
54
+    if (m_reconRowFlag)
55
+    {
56
+        delete[] m_reconRowFlag;
57
+        m_reconRowFlag = NULL;
58
+    }
59
+
60
     if (m_reconColCount)
61
     {
62
         delete[] m_reconColCount;
63
@@ -143,6 +157,13 @@
64
         delete[] m_quantOffsets;
65
     }
66
 
67
+    if (m_userSEI.numPayloads)
68
+    {
69
+        for (int i = 0; i < m_userSEI.numPayloads; i++)
70
+            delete[] m_userSEI.payloads[i].payload;
71
+        delete[] m_userSEI.payloads;
72
+    }
73
+
74
     m_lowres.destroy();
75
     X265_FREE(m_rcData);
76
 }
77
x265_2.0.tar.gz/source/common/frame.h -> x265_2.1.tar.gz/source/common/frame.h Changed
31
 
1
@@ -2,6 +2,7 @@
2
 * Copyright (C) 2013 x265 project
3
 *
4
 * Author: Steve Borho <steve@borho.org>
5
+*         Min Chen <chenm003@163.com>
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
@@ -49,6 +50,9 @@
10
     double   pCuCount;
11
     double   skipCuCount;
12
     double   qScale;
13
+    double   cumulativePQp;
14
+    double   cumulativePNorm;
15
+    double   lastQScaleFor[3];
16
     int      mvBits;
17
     int      miscBits;
18
     int      coeffBits;
19
@@ -82,9 +86,10 @@
20
     bool                   m_bChromaExtended;    // orig chroma planes motion extended for weight analysis
21
 
22
     float*                 m_quantOffsets;       // points to quantOffsets in x265_picture
23
+    x265_sei               m_userSEI;
24
 
25
     /* Frame Parallelism - notification between FrameEncoders of available motion reference rows */
26
-    ThreadSafeInteger      m_reconRowCount;      // count of CTU rows completely reconstructed and extended for motion reference
27
+    ThreadSafeInteger*     m_reconRowFlag;       // flag of CTU rows completely reconstructed and extended for motion reference
28
     ThreadSafeInteger*     m_reconColCount;      // count of CTU cols completely reconstructed and extended for motion reference
29
     int32_t                m_numRows;
30
     volatile uint32_t      m_countRefEncoders;   // count of FrameEncoder threads monitoring m_reconRowCount
31
x265_2.0.tar.gz/source/common/lowres.cpp -> x265_2.1.tar.gz/source/common/lowres.cpp Changed
53
 
1
@@ -27,7 +27,7 @@
2
 
3
 using namespace X265_NS;
4
 
5
-bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled)
6
+bool Lowres::create(PicYuv *origPic, int _bframes, bool bAQEnabled, uint32_t qgSize)
7
 {
8
     isLowres = true;
9
     bframes = _bframes;
10
@@ -38,7 +38,14 @@
11
         lumaStride += 32 - (lumaStride & 31);
12
     maxBlocksInRow = (width + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
13
     maxBlocksInCol = (lines + X265_LOWRES_CU_SIZE - 1) >> X265_LOWRES_CU_BITS;
14
+    maxBlocksInRowFullRes = maxBlocksInRow * 2;
15
+    maxBlocksInColFullRes = maxBlocksInCol * 2;
16
     int cuCount = maxBlocksInRow * maxBlocksInCol;
17
+    int cuCountFullRes;
18
+    if (qgSize == 8)
19
+        cuCountFullRes = maxBlocksInRowFullRes * maxBlocksInColFullRes;
20
+    else
21
+        cuCountFullRes = cuCount;
22
 
23
     /* rounding the width to multiple of lowres CU size */
24
     width = maxBlocksInRow * X265_LOWRES_CU_SIZE;
25
@@ -46,13 +53,14 @@
26
 
27
     size_t planesize = lumaStride * (lines + 2 * origPic->m_lumaMarginY);
28
     size_t padoffset = lumaStride * origPic->m_lumaMarginY + origPic->m_lumaMarginX;
29
-
30
     if (bAQEnabled)
31
     {
32
-        CHECKED_MALLOC(qpAqOffset, double, cuCount);
33
-        CHECKED_MALLOC(invQscaleFactor, int, cuCount);
34
-        CHECKED_MALLOC(qpCuTreeOffset, double, cuCount);
35
-        CHECKED_MALLOC(blockVariance, uint32_t, cuCount);
36
+        CHECKED_MALLOC_ZERO(qpAqOffset, double, cuCountFullRes);
37
+        CHECKED_MALLOC_ZERO(invQscaleFactor, int, cuCountFullRes);
38
+        CHECKED_MALLOC_ZERO(qpCuTreeOffset, double, cuCountFullRes);
39
+        CHECKED_MALLOC_ZERO(blockVariance, uint32_t, cuCountFullRes);
40
+        if (qgSize == 8)
41
+            CHECKED_MALLOC_ZERO(invQscaleFactor8x8, int, cuCount);
42
     }
43
     CHECKED_MALLOC(propagateCost, uint16_t, cuCount);
44
 
45
@@ -122,6 +130,7 @@
46
     X265_FREE(qpCuTreeOffset);
47
     X265_FREE(propagateCost);
48
     X265_FREE(blockVariance);
49
+    X265_FREE(invQscaleFactor8x8);
50
 }
51
 
52
 // (re) initialize lowres state
53
x265_2.0.tar.gz/source/common/lowres.h -> x265_2.1.tar.gz/source/common/lowres.h Changed
27
 
1
@@ -132,6 +132,8 @@
2
     MV*       lowresMvs[2][X265_BFRAME_MAX + 1];
3
     uint32_t  maxBlocksInRow;
4
     uint32_t  maxBlocksInCol;
5
+    uint32_t  maxBlocksInRowFullRes;
6
+    uint32_t  maxBlocksInColFullRes;
7
 
8
     /* used for vbvLookahead */
9
     int       plannedType[X265_LOOKAHEAD_MAX + 1];
10
@@ -143,6 +145,7 @@
11
     double*   qpAqOffset;      // AQ QP offset values for each 16x16 CU
12
     double*   qpCuTreeOffset;  // cuTree QP offset values for each 16x16 CU
13
     int*      invQscaleFactor; // qScale values for qp Aq Offsets
14
+    int*      invQscaleFactor8x8; // temporary buffer for qg-size 8
15
     uint32_t* blockVariance;
16
     uint64_t  wp_ssd[3];       // This is different than SSDY, this is sum(pixel^2) - sum(pixel)^2 for entire frame
17
     uint64_t  wp_sum[3];
18
@@ -153,7 +156,7 @@
19
     double    weightedCostDelta[X265_BFRAME_MAX + 2];
20
     ReferencePlanes weightedRef[X265_BFRAME_MAX + 2];
21
 
22
-    bool create(PicYuv *origPic, int _bframes, bool bAqEnabled);
23
+    bool create(PicYuv *origPic, int _bframes, bool bAqEnabled, uint32_t qgSize);
24
     void destroy();
25
     void init(PicYuv *origPic, int poc);
26
 };
27
x265_2.0.tar.gz/source/common/param.cpp -> x265_2.1.tar.gz/source/common/param.cpp Changed
150
 
1
@@ -2,6 +2,7 @@
2
  * Copyright (C) 2013 x265 project
3
  *
4
  * Authors: Deepthi Nandakumar <deepthi@multicorewareinc.com>
5
+ *          Min Chen <min.chen@multicorewareinc.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -174,6 +175,7 @@
10
     param->bEnableTSkipFast = 0;
11
     param->maxNumReferences = 3;
12
     param->bEnableTemporalMvp = 1;
13
+    param->bSourceReferenceEstimation = 0;
14
 
15
     /* Loop Filter */
16
     param->bEnableLoopFilter = 1;
17
@@ -224,6 +226,10 @@
18
     param->rc.bEnableSlowFirstPass = 1;
19
     param->rc.bStrictCbr = 0;
20
     param->rc.bEnableGrain = 0;
21
+    param->rc.qpMin = 0;
22
+    param->rc.qpMax = QP_MAX_MAX;
23
+
24
+    param->bDiscardOptionalVUI = 0;
25
 
26
     /* Video Usability Information (VUI) */
27
     param->vui.aspectRatioIdc = 0;
28
@@ -249,6 +255,9 @@
29
     param->maxFALL = 0;
30
     param->minLuma = 0;
31
     param->maxLuma = PIXEL_MAX;
32
+    param->log2MaxPocLsb = 8;
33
+    param->bDiscardSEI = false;
34
+    param->maxSlices = 1;
35
 }
36
 
37
 int x265_param_default_preset(x265_param* param, const char* preset, const char* tune)
38
@@ -509,6 +518,7 @@
39
     bool bError = false;
40
     bool bNameWasBool = false;
41
     bool bValueWasNull = !value;
42
+    bool bExtraParams = false;
43
     char nameBuf[64];
44
 
45
     if (!name)
46
@@ -747,6 +757,7 @@
47
     OPT("vbv-init")    p->rc.vbvBufferInit = atof(value);
48
     OPT("crf-max")     p->rc.rfConstantMax = atof(value);
49
     OPT("crf-min")     p->rc.rfConstantMin = atof(value);
50
+    OPT("qpmax")       p->rc.qpMax = atoi(value);
51
     OPT("crf")
52
     {
53
         p->rc.rfConstant = atof(value);
54
@@ -885,7 +896,29 @@
55
     OPT("max-luma") p->maxLuma = (uint16_t)atoi(value);
56
     OPT("uhd-bd") p->uhdBluray = atobool(value);
57
     else
58
-        return X265_PARAM_BAD_NAME;
59
+        bExtraParams = true;
60
+
61
+    // solve "fatal error C1061: compiler limit : blocks nested too deeply"
62
+    if (bExtraParams)
63
+    {
64
+        bExtraParams = false;
65
+        if (0) ;
66
+        OPT("slices") p->maxSlices = atoi(value);
67
+        else
68
+            bExtraParams = true;
69
+    }
70
+
71
+    if (bExtraParams)
72
+    {
73
+        if (0) ;
74
+        OPT("qpmin") p->rc.qpMin = atoi(value);
75
+        OPT("analyze-src-pics") p->bSourceReferenceEstimation = atobool(value);
76
+        OPT("log2-max-poc-lsb") p->log2MaxPocLsb = atoi(value);
77
+        OPT("discard-sei") p->bDiscardSEI = atobool(value);
78
+        OPT("discard-vui") p->bDiscardOptionalVUI = atobool(value);
79
+        else
80
+            return X265_PARAM_BAD_NAME;
81
+    }
82
 #undef OPT
83
 #undef atobool
84
 #undef atoi
85
@@ -1041,6 +1074,8 @@
86
     uint32_t tuQTMaxLog2Size = X265_MIN(maxLog2CUSize, 5);
87
     uint32_t tuQTMinLog2Size = 2; //log2(4)
88
 
89
+    CHECK((param->maxSlices > 1) && !param->bEnableWavefront,
90
+        "Multiple-Slices mode must be enable Wavefront Parallel Processing (--wpp)");
91
     CHECK(param->internalBitDepth != X265_DEPTH,
92
           "internalBitDepth must match compiled bit depth");
93
     CHECK(param->minCUSize != 64 && param->minCUSize != 32 && param->minCUSize != 16 && param->minCUSize != 8,
94
@@ -1208,6 +1243,14 @@
95
           "Strict-cbr cannot be applied without specifying target bitrate or vbv bufsize");
96
     CHECK(param->analysisMode && (param->analysisMode < X265_ANALYSIS_OFF || param->analysisMode > X265_ANALYSIS_LOAD),
97
         "Invalid analysis mode. Analysis mode 0: OFF 1: SAVE : 2 LOAD");
98
+    CHECK(param->rc.qpMax < QP_MIN || param->rc.qpMax > QP_MAX_MAX,
99
+        "qpmax exceeds supported range (0 to 69)");
100
+    CHECK(param->rc.qpMin < QP_MIN || param->rc.qpMin > QP_MAX_MAX,
101
+        "qpmin exceeds supported range (0 to 69)");
102
+    CHECK(param->log2MaxPocLsb < 4,
103
+        "maximum of the picture order count can not be less than 4");
104
+    CHECK(1 > param->maxSlices || param->maxSlices > ((param->sourceHeight + param->maxCUSize - 1) / param->maxCUSize),
105
+        "The slices can not be more than number of rows");
106
     return check_failed;
107
 }
108
 
109
@@ -1258,12 +1301,9 @@
110
         // compute actual CU depth with respect to config depth and max transform size
111
         g_maxCUDepth    = maxLog2CUSize - minLog2CUSize;
112
         g_unitSizeDepth = maxLog2CUSize - LOG2_UNIT_SIZE;
113
-
114
-        // initialize partition order
115
-        uint32_t* tmp = &g_zscanToRaster[0];
116
-        initZscanToRaster(g_unitSizeDepth, 1, 0, tmp);
117
-        initRasterToZscan(g_unitSizeDepth);
118
     }
119
+
120
+    g_maxSlices = param->maxSlices;
121
     return 0;
122
 }
123
 
124
@@ -1363,6 +1403,8 @@
125
     TOOLOPT(param->bEnableFastIntra, "fast-intra");
126
     TOOLOPT(param->bEnableStrongIntraSmoothing, "strong-intra-smoothing");
127
     TOOLVAL(param->lookaheadSlices, "lslices=%d");
128
+    if (param->maxSlices > 1)
129
+        TOOLVAL(param->maxSlices, "slices=%d");
130
     if (param->bEnableLoopFilter)
131
     {
132
         if (param->deblockingFilterBetaOffset || param->deblockingFilterTCOffset)
133
@@ -1443,6 +1485,7 @@
134
     s += sprintf(s, " psy-rd=%.2f", p->psyRd);
135
     s += sprintf(s, " rdoq-level=%d", p->rdoqLevel);
136
     s += sprintf(s, " psy-rdoq=%.2f", p->psyRdoq);
137
+    s += sprintf(s, " log2-max-poc-lsb=%d", p->log2MaxPocLsb);
138
     BOOL(p->bEnableRdRefine, "rd-refine");
139
     BOOL(p->bEnableSignHiding, "signhide");
140
     BOOL(p->bEnableLoopFilter, "deblock");
141
@@ -1463,7 +1506,7 @@
142
         else
143
             s += sprintf(s, " bitrate=%d", p->rc.bitrate);
144
         s += sprintf(s, " qcomp=%.2f qpmin=%d qpmax=%d qpstep=%d",
145
-                     p->rc.qCompress, QP_MIN, QP_MAX_SPEC, p->rc.qpStep);
146
+                     p->rc.qCompress, p->rc.qpMin, p->rc.qpMax, p->rc.qpStep);
147
         if (p->rc.bStatRead)
148
             s += sprintf( s, " cplxblur=%.1f qblur=%.1f",
149
                           p->rc.complexityBlur, p->rc.qblur);
150
x265_2.0.tar.gz/source/common/pixel.cpp -> x265_2.1.tar.gz/source/common/pixel.cpp Changed
32
 
1
@@ -848,14 +848,13 @@
2
                                     const int32_t* invQscales, const double* fpsFactor, int len)
3
 {
4
     double fps = *fpsFactor / 256;  // range[0.01, 1.00]
5
-
6
     for (int i = 0; i < len; i++)
7
     {
8
         int intraCost = intraCosts[i];
9
         int interCost = X265_MIN(intraCosts[i], interCosts[i] & LOWRES_COST_MASK);
10
-        double propagateIntra  = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
11
+        double propagateIntra = intraCost * invQscales[i]; // Q16 x Q8.8 = Q24.8
12
         double propagateAmount = (double)propagateIn[i] + propagateIntra * fps; // Q16.0 + Q24.8 x Q0.x = Q25.0
13
-        double propagateNum    = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
14
+        double propagateNum = (double)(intraCost - interCost); // Q32 - Q32 = Q33.0
15
 
16
 #if 0
17
         // algorithm that output match to asm
18
@@ -866,10 +865,11 @@
19
         float propagateDenom = intraRcpError2 - intraRcpError1;
20
         dst[i] = (int)(propagateAmount * propagateNum * (double)propagateDenom + 0.5);
21
 #else
22
-        double propagateDenom  = (double)intraCost;             // Q32
23
+        double propagateDenom = (double)intraCost;             // Q32
24
         dst[i] = (int)(propagateAmount * propagateNum / propagateDenom + 0.5);
25
 #endif
26
-    }
27
+        }
28
+    //}
29
 }
30
 
31
 /* Conversion between double and Q8.8 fixed point (big-endian) for storage */
32
x265_2.0.tar.gz/source/common/predict.cpp -> x265_2.1.tar.gz/source/common/predict.cpp Changed
31
 
1
@@ -671,17 +671,14 @@
2
     int numIntraNeighbor;
3
     bool* bNeighborFlags = intraNeighbors->bNeighborFlags;
4
 
5
-    uint32_t numPartInWidth = 1 << (cu.m_log2CUSize[0] - LOG2_UNIT_SIZE - tuDepth);
6
-    uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx;
7
-    uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + numPartInWidth - 1];
8
-
9
     uint32_t tuSize = 1 << log2TrSize;
10
     int  tuWidthInUnits = tuSize >> log2UnitWidth;
11
     int  tuHeightInUnits = tuSize >> log2UnitHeight;
12
     int  aboveUnits = tuWidthInUnits << 1;
13
     int  leftUnits = tuHeightInUnits << 1;
14
-    int  partIdxStride = cu.m_slice->m_sps->numPartInCUSize;
15
-    uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) * partIdxStride)];
16
+    uint32_t partIdxLT = cu.m_absIdxInCTU + absPartIdx;
17
+    uint32_t partIdxRT = g_rasterToZscan[g_zscanToRaster[partIdxLT] + tuWidthInUnits - 1];
18
+    uint32_t partIdxLB = g_rasterToZscan[g_zscanToRaster[partIdxLT] + ((tuHeightInUnits - 1) << LOG2_RASTER_SIZE)];
19
 
20
     if (cu.m_slice->isIntra() || !cu.m_slice->m_pps->bConstrainedIntraPred)
21
     {
22
@@ -910,7 +907,7 @@
23
 {
24
     const uint32_t rasterPartBegin = g_zscanToRaster[partIdxLT];
25
     const uint32_t rasterPartEnd = g_zscanToRaster[partIdxLB];
26
-    const uint32_t idxStep = cu.m_slice->m_sps->numPartInCUSize;
27
+    const uint32_t idxStep = RASTER_SIZE;
28
     int numIntra = 0;
29
 
30
     for (uint32_t rasterPart = rasterPartBegin; rasterPart <= rasterPartEnd; rasterPart += idxStep, bValidFlags--) // opposite direction
31
x265_2.0.tar.gz/source/common/slice.h -> x265_2.1.tar.gz/source/common/slice.h Changed
25
 
1
@@ -2,6 +2,7 @@
2
  * Copyright (C) 2015 x265 project
3
  *
4
  * Authors: Steve Borho <steve@borho.org>
5
+ *          Min Chen <chenm003@163.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -223,6 +224,7 @@
10
 
11
     int      log2MinCodingBlockSize;
12
     int      log2DiffMaxMinCodingBlockSize;
13
+    int      log2MaxPocLsb;
14
 
15
     uint32_t quadtreeTULog2MaxSize;
16
     uint32_t quadtreeTULog2MinSize;
17
@@ -241,6 +243,7 @@
18
     bool     bUseAMP; // use param
19
     bool     bUseStrongIntraSmoothing; // use param
20
     bool     bTemporalMVPEnabled;
21
+    bool     bDiscardOptionalVUI;
22
 
23
     Window   conformanceWindow;
24
     VUI      vuiParameters;
25
x265_2.0.tar.gz/source/common/threadpool.cpp -> x265_2.1.tar.gz/source/common/threadpool.cpp Changed
12
 
1
@@ -299,8 +299,8 @@
2
      * For windows because threads can't be allocated to live across sockets
3
      * changing the default behavior to be per-socket pools -- FIXME */
4
 #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7
5
-     if (!p->numaPools)
6
-     {
7
+    if (!p->numaPools || (strcmp(p->numaPools, "NULL") == 0 || strcmp(p->numaPools, "*") == 0 || strcmp(p->numaPools, "") == 0))
8
+    {
9
          char poolString[50] = "";
10
          for (int i = 0; i < numNumaNodes; i++)
11
          {
12
x265_2.0.tar.gz/source/common/x86/dct8.asm -> x265_2.1.tar.gz/source/common/x86/dct8.asm Changed
21
 
1
@@ -2112,9 +2112,8 @@
2
     pxor     m5,  m5
3
     shr      r3d, 3
4
 .loop:
5
-    mova     m0, [r0]
6
+    movu     m0, [r0]
7
     pabsw    m1, m0
8
-
9
     movu     m2, [r1]
10
     pmovsxwd m3, m1
11
     paddd    m2, m3
12
@@ -2130,7 +2129,7 @@
13
     pcmpgtw  m4, m1, m5
14
     pand     m1, m4
15
     psignw   m1, m0
16
-    mova     [r0], m1
17
+    movu     [r0], m1
18
     add      r0, 16
19
     add      r1, 32
20
     add      r2, 16
21
x265_2.0.tar.gz/source/encoder/analysis.cpp -> x265_2.1.tar.gz/source/encoder/analysis.cpp Changed
143
 
1
@@ -255,7 +255,7 @@
2
             cuPrevCost = origCUCost;
3
 
4
             int modCUQP = qp + dir;
5
-            while (modCUQP >= QP_MIN && modCUQP <= QP_MAX_SPEC)
6
+            while (modCUQP >= m_param->rc.qpMin && modCUQP <= QP_MAX_SPEC)
7
             {
8
                 recodeCU(parentCTU, cuGeom, modCUQP, qp);
9
                 cuCost = md.bestMode->rdCost;
10
@@ -1731,19 +1731,19 @@
11
                     ProfileCounter(parentCTU, skippedIntraCU[cuGeom.depth]);
12
                 }
13
             }
14
-            if ((md.bestMode->cu.isInter(0) && !(md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)) && (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400))
15
-            {
16
-                uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
17
+        }
18
 
19
-                for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
20
-                {
21
-                    PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
22
-                    motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, m_csp != X265_CSP_I400);
23
-                }
24
-                encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
25
+        if ((md.bestMode->cu.isInter(0) && !(md.bestMode->cu.m_mergeFlag[0] && md.bestMode->cu.m_partSize[0] == SIZE_2Nx2N)) && (m_frame->m_fencPic->m_picCsp == X265_CSP_I400 && m_csp != X265_CSP_I400))
26
+        {
27
+            uint32_t numPU = md.bestMode->cu.getNumPartInter(0);
28
+
29
+            for (uint32_t puIdx = 0; puIdx < numPU; puIdx++)
30
+            {
31
+                PredictionUnit pu(md.bestMode->cu, cuGeom, puIdx);
32
+                motionCompensation(md.bestMode->cu, pu, md.bestMode->predYuv, false, m_csp != X265_CSP_I400);
33
             }
34
+            encodeResAndCalcRdInterCU(*md.bestMode, cuGeom);
35
         }
36
-
37
         if (m_bTryLossless)
38
             tryLossless(cuGeom);
39
 
40
@@ -1936,10 +1936,26 @@
41
     }
42
     for (uint32_t i = 0; i < numMergeCand; ++i)
43
     {
44
-        if (m_bFrameParallel &&
45
-            (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
46
-            candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4))
47
-            continue;
48
+        if (m_bFrameParallel)
49
+        {
50
+            // Parallel slices bound check
51
+            if (m_param->maxSlices > 1)
52
+            {
53
+                // NOTE: First row in slice can't negative
54
+                if ((candMvField[i][0].mv.y < m_sliceMinY) | (candMvField[i][1].mv.y < m_sliceMinY))
55
+                    continue;
56
+
57
+                // Last row in slice can't reference beyond bound since it is another slice area
58
+                // TODO: we may beyond bound in future since these area have a chance to finish because we use parallel slices. Necessary prepare research on load balance
59
+                if ((candMvField[i][0].mv.y > m_sliceMaxY) | (candMvField[i][1].mv.y > m_sliceMaxY))
60
+                    continue;
61
+            }
62
+
63
+            if (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
64
+                candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4)
65
+                continue;
66
+        }
67
+
68
         if (m_param->bIntraRefresh && m_slice->m_sliceType == P_SLICE &&
69
             tempPred->cu.m_cuPelX / g_maxCUSize < m_frame->m_encData->m_pir.pirEndCol &&
70
             candMvField[i][0].mv.x > maxSafeMv)
71
@@ -2050,10 +2066,25 @@
72
     }
73
     for (uint32_t i = 0; i < numMergeCand; i++)
74
     {
75
-        if (m_bFrameParallel &&
76
-            (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
77
-            candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4))
78
-            continue;
79
+        if (m_bFrameParallel)
80
+        {
81
+            // Parallel slices bound check
82
+            if (m_param->maxSlices > 1)
83
+            {
84
+                // NOTE: First row in slice can't negative
85
+                if ((candMvField[i][0].mv.y < m_sliceMinY) | (candMvField[i][1].mv.y < m_sliceMinY))
86
+                    continue;
87
+
88
+                // Last row in slice can't reference beyond bound since it is another slice area
89
+                // TODO: we may beyond bound in future since these area have a chance to finish because we use parallel slices. Necessary prepare research on load balance
90
+                if ((candMvField[i][0].mv.y > m_sliceMaxY) | (candMvField[i][1].mv.y > m_sliceMaxY))
91
+                    continue;
92
+            }
93
+
94
+            if (candMvField[i][0].mv.y >= (m_param->searchRange + 1) * 4 ||
95
+                candMvField[i][1].mv.y >= (m_param->searchRange + 1) * 4)
96
+                continue;
97
+        }
98
 
99
         /* the merge candidate list is packed with MV(0,0) ref 0 when it is not full */
100
         if (candDir[i] == 1 && !candMvField[i][0].mv.word && !candMvField[i][0].refIdx)
101
@@ -2637,7 +2668,11 @@
102
 {
103
     FrameData& curEncData = *m_frame->m_encData;
104
     double qp = baseQp >= 0 ? baseQp : curEncData.m_cuStat[ctu.m_cuAddr].baseQp;
105
-
106
+    int loopIncr;
107
+    if (m_param->rc.qgSize == 8)
108
+        loopIncr = 8;
109
+    else
110
+        loopIncr = 16;
111
     /* Use cuTree offsets if cuTree enabled and frame is referenced, else use AQ offsets */
112
     bool isReferenced = IS_REFERENCED(m_frame);
113
     double *qpoffs = (isReferenced && m_param->rc.cuTree) ? m_frame->m_lowres.qpCuTreeOffset : m_frame->m_lowres.qpAqOffset;
114
@@ -2647,17 +2682,17 @@
115
         uint32_t height = m_frame->m_fencPic->m_picHeight;
116
         uint32_t block_x = ctu.m_cuPelX + g_zscanToPelX[cuGeom.absPartIdx];
117
         uint32_t block_y = ctu.m_cuPelY + g_zscanToPelY[cuGeom.absPartIdx];
118
-        uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (16 - 1)) / 16;
119
+        uint32_t maxCols = (m_frame->m_fencPic->m_picWidth + (loopIncr - 1)) / loopIncr;
120
         uint32_t blockSize = g_maxCUSize >> cuGeom.depth;
121
         double qp_offset = 0;
122
         uint32_t cnt = 0;
123
         uint32_t idx;
124
 
125
-        for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += 16)
126
+        for (uint32_t block_yy = block_y; block_yy < block_y + blockSize && block_yy < height; block_yy += loopIncr)
127
         {
128
-            for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += 16)
129
+            for (uint32_t block_xx = block_x; block_xx < block_x + blockSize && block_xx < width; block_xx += loopIncr)
130
             {
131
-                idx = ((block_yy / 16) * (maxCols)) + (block_xx / 16);
132
+                idx = ((block_yy / loopIncr) * (maxCols)) + (block_xx / loopIncr);
133
                 qp_offset += qpoffs[idx];
134
                 cnt++;
135
             }
136
@@ -2667,5 +2702,5 @@
137
         qp += qp_offset;
138
     }
139
 
140
-    return x265_clip3(QP_MIN, QP_MAX_MAX, (int)(qp + 0.5));
141
+    return x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int)(qp + 0.5));
142
 }
143
x265_2.0.tar.gz/source/encoder/analysis.h -> x265_2.1.tar.gz/source/encoder/analysis.h Changed
12
 
1
@@ -126,8 +126,8 @@
2
     uint8_t*             m_reusePartSize;
3
     uint8_t*             m_reuseMergeFlag;
4
 
5
-    uint32_t m_splitRefIdx[4];
6
-    uint64_t* cacheCost;
7
+    uint32_t             m_splitRefIdx[4];
8
+    uint64_t*            cacheCost;
9
 
10
     /* refine RD based on QP for rd-levels 5 and 6 */
11
     void qprdRefine(const CUData& parentCTU, const CUGeom& cuGeom, int32_t qp, int32_t lqp);
12
x265_2.0.tar.gz/source/encoder/api.cpp -> x265_2.1.tar.gz/source/encoder/api.cpp Changed
11
 
1
@@ -282,6 +282,9 @@
2
     pic->colorSpace = param->internalCsp;
3
     pic->forceqp = X265_QP_AUTO;
4
     pic->quantOffsets = NULL;
5
+    pic->userSEI.payloads = NULL;
6
+    pic->userSEI.numPayloads = 0;
7
+
8
     if (param->analysisMode)
9
     {
10
         uint32_t widthInCU       = (param->sourceWidth  + g_maxCUSize - 1) >> g_maxLog2CUSize;
11
x265_2.0.tar.gz/source/encoder/dpb.cpp -> x265_2.1.tar.gz/source/encoder/dpb.cpp Changed
42
 
1
@@ -2,6 +2,7 @@
2
  * Copyright (C) 2013 x265 project
3
  *
4
  * Authors: Steve Borho <steve@borho.org>
5
+ *          Min Chen <chenm003@163.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -71,14 +72,18 @@
10
         iterFrame = iterFrame->m_next;
11
         if (!curFrame->m_encData->m_bHasReferences && !curFrame->m_countRefEncoders)
12
         {
13
-            curFrame->m_reconRowCount.set(0);
14
             curFrame->m_bChromaExtended = false;
15
 
16
             // Reset column counter
17
+            X265_CHECK(curFrame->m_reconRowFlag != NULL, "curFrame->m_reconRowFlag check failure");
18
             X265_CHECK(curFrame->m_reconColCount != NULL, "curFrame->m_reconColCount check failure");
19
             X265_CHECK(curFrame->m_numRows > 0, "curFrame->m_numRows check failure");
20
-            for(int32_t col = 0; col < curFrame->m_numRows; col++)
21
-                curFrame->m_reconColCount[col].set(0);
22
+
23
+            for(int32_t row = 0; row < curFrame->m_numRows; row++)
24
+            {
25
+                curFrame->m_reconRowFlag[row].set(0);
26
+                curFrame->m_reconColCount[row].set(0);
27
+            }
28
 
29
             // iterator is invalidated by remove, restart scan
30
             m_picList.remove(*curFrame);
31
@@ -167,7 +172,9 @@
32
         slice->m_colFromL0Flag = true;
33
         slice->m_colRefIdx = 0;
34
     }
35
-    slice->m_sLFaseFlag = (SLFASE_CONSTANT & (1 << (pocCurr % 31))) > 0;
36
+
37
+    // Disable Loopfilter in bound area, because we will do slice-parallelism in future
38
+    slice->m_sLFaseFlag = (g_maxSlices > 1) ? false : ((SLFASE_CONSTANT & (1 << (pocCurr % 31))) > 0);
39
 
40
     /* Increment reference count of all motion-referenced frames to prevent them
41
      * from being recycled. These counts are decremented at the end of
42
x265_2.0.tar.gz/source/encoder/encoder.cpp -> x265_2.1.tar.gz/source/encoder/encoder.cpp Changed
157
 
1
@@ -151,6 +151,8 @@
2
         m_aborted = true;
3
     }
4
 
5
+    x265_log(p, X265_LOG_INFO, "Slices                              : %d\n", p->maxSlices);
6
+
7
     char buf[128];
8
     int len = 0;
9
     if (p->bEnableWavefront)
10
@@ -589,10 +591,27 @@
11
         inFrame->m_pts       = pic_in->pts;
12
         inFrame->m_forceqp   = pic_in->forceqp;
13
         inFrame->m_param     = m_reconfigure ? m_latestParam : m_param;
14
-        
15
+
16
+        if (pic_in->userSEI.numPayloads)
17
+        {
18
+            int numPayloads = inFrame->m_userSEI.numPayloads = pic_in->userSEI.numPayloads;
19
+            inFrame->m_userSEI.payloads = new x265_sei_payload[numPayloads];
20
+            for (int i = 0; i < numPayloads; i++)
21
+            {
22
+                int size = inFrame->m_userSEI.payloads[i].payloadSize = pic_in->userSEI.payloads[i].payloadSize;
23
+                inFrame->m_userSEI.payloads[i].payloadType = pic_in->userSEI.payloads[i].payloadType;
24
+                inFrame->m_userSEI.payloads[i].payload = new uint8_t[size];
25
+                memcpy(inFrame->m_userSEI.payloads[i].payload, pic_in->userSEI.payloads[i].payload, size);
26
+            }
27
+        }
28
+
29
         if (pic_in->quantOffsets != NULL)
30
         {
31
-            int cuCount = inFrame->m_lowres.maxBlocksInRow * inFrame->m_lowres.maxBlocksInCol;
32
+            int cuCount;
33
+            if (m_param->rc.qgSize == 8)
34
+                cuCount = inFrame->m_lowres.maxBlocksInRowFullRes * inFrame->m_lowres.maxBlocksInColFullRes;
35
+            else
36
+                cuCount = inFrame->m_lowres.maxBlocksInRow * inFrame->m_lowres.maxBlocksInCol;
37
             memcpy(inFrame->m_quantOffsets, pic_in->quantOffsets, cuCount * sizeof(float));
38
         }
39
 
40
@@ -776,9 +795,8 @@
41
             if (m_param->rc.bStatWrite)
42
                 if (m_rateControl->writeRateControlFrameStats(outFrame, &curEncoder->m_rce))
43
                     m_aborted = true;
44
-
45
-            if (pic_out && m_param->rc.bStatWrite)
46
-            {
47
+            if (pic_out)
48
+            { 
49
                 /* m_rcData is allocated for every frame */
50
                 pic_out->rcData = outFrame->m_rcData;
51
                 outFrame->m_rcData->qpaRc = outFrame->m_encData->m_avgQpRc;
52
@@ -1320,7 +1338,7 @@
53
     m_analyzeAll.m_maxFALL += curFrame->m_fencPic->m_avgLumaLevel;
54
     m_analyzeAll.m_maxCLL = X265_MAX(m_analyzeAll.m_maxCLL, curFrame->m_fencPic->m_maxLumaLevel);
55
 
56
-    char c = (slice->isIntra() ? 'I' : slice->isInterP() ? 'P' : 'B');
57
+    char c = (slice->isIntra() ? (curFrame->m_lowres.sliceType == X265_TYPE_IDR ? 'I' : 'i') : slice->isInterP() ? 'P' : 'B');
58
     int poc = slice->m_poc;
59
     if (!IS_REFERENCED(curFrame))
60
         c += 32; // lower case if unreferenced
61
@@ -1411,7 +1429,7 @@
62
     list.serialize(NAL_UNIT_SPS, bs);
63
 
64
     bs.resetBits();
65
-    sbacCoder.codePPS(m_pps);
66
+    sbacCoder.codePPS(m_pps, (m_param->maxSlices <= 1));
67
     bs.writeByteAlignment();
68
     list.serialize(NAL_UNIT_PPS, bs);
69
 
70
@@ -1440,7 +1458,7 @@
71
         list.serialize(NAL_UNIT_PREFIX_SEI, bs);
72
     }
73
 
74
-    if (m_param->bEmitInfoSEI)
75
+    if (!m_param->bDiscardSEI && m_param->bEmitInfoSEI)
76
     {
77
         char *opts = x265_param2string(m_param);
78
         if (opts)
79
@@ -1456,6 +1474,7 @@
80
                 
81
                 bs.resetBits();
82
                 SEIuserDataUnregistered idsei;
83
+                idsei.m_payloadType = USER_DATA_UNREGISTERED;
84
                 idsei.m_userData = (uint8_t*)buffer;
85
                 idsei.m_userDataLength = (uint32_t)strlen(buffer);
86
                 idsei.write(bs, m_sps);
87
@@ -1469,7 +1488,7 @@
88
         }
89
     }
90
 
91
-    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
92
+    if (!m_param->bDiscardSEI && (m_param->bEmitHRDSEI || !!m_param->interlaceMode))
93
     {
94
         /* Picture Timing and Buffering Period SEI require the SPS to be "activated" */
95
         SEIActiveParameterSets sei;
96
@@ -1524,6 +1543,14 @@
97
 
98
     sps->bUseStrongIntraSmoothing = m_param->bEnableStrongIntraSmoothing;
99
     sps->bTemporalMVPEnabled = m_param->bEnableTemporalMvp;
100
+    sps->bDiscardOptionalVUI = m_param->bDiscardOptionalVUI;
101
+    sps->log2MaxPocLsb = m_param->log2MaxPocLsb;
102
+    int maxDeltaPOC = (m_param->bframes + 2) * (!!m_param->bBPyramid + 1) * 2;
103
+    while ((1 << sps->log2MaxPocLsb) <= maxDeltaPOC * 2)
104
+        sps->log2MaxPocLsb++;
105
+
106
+    if (sps->log2MaxPocLsb != m_param->log2MaxPocLsb)
107
+        x265_log(m_param, X265_LOG_WARNING, "Reset log2MaxPocLsb to %d to account for all POC values\n", sps->log2MaxPocLsb);
108
 
109
     VUI& vui = sps->vuiParameters;
110
     vui.aspectRatioInfoPresentFlag = !!m_param->vui.aspectRatioIdc;
111
@@ -1570,7 +1597,7 @@
112
     {
113
         pps->bUseDQP = true;
114
         pps->maxCuDQPDepth = g_log2Size[m_param->maxCUSize] - g_log2Size[m_param->rc.qgSize];
115
-        X265_CHECK(pps->maxCuDQPDepth <= 2, "max CU DQP depth cannot be greater than 2\n");
116
+        X265_CHECK(pps->maxCuDQPDepth <= 3, "max CU DQP depth cannot be greater than 3\n");
117
     }
118
     else
119
     {
120
@@ -1633,7 +1660,7 @@
121
         double fps = (double)p->fpsNum / p->fpsDenom;
122
         p->keyframeMin = X265_MIN((int)fps, p->keyframeMax / 10);
123
     }
124
-    p->keyframeMin = X265_MAX(1, X265_MIN(p->keyframeMin, p->keyframeMax / 2 + 1));
125
+    p->keyframeMin = X265_MAX(1, p->keyframeMin);
126
 
127
     if (!p->bframes)
128
         p->bBPyramid = 0;
129
@@ -1854,10 +1881,10 @@
130
     bool bIsVbv = m_param->rc.vbvBufferSize > 0 && m_param->rc.vbvMaxBitrate > 0;
131
     if (!m_param->bLossless && (m_param->rc.aqMode || bIsVbv))
132
     {
133
-        if (p->rc.qgSize < X265_MAX(16, p->minCUSize))
134
+        if (p->rc.qgSize < X265_MAX(8, p->minCUSize))
135
         {
136
-            p->rc.qgSize = X265_MAX(16, p->minCUSize);
137
-            x265_log(p, X265_LOG_WARNING, "QGSize should be greater than or equal to 16 and minCUSize, setting QGSize = %d\n", p->rc.qgSize);
138
+            p->rc.qgSize = X265_MAX(8, p->minCUSize);
139
+            x265_log(p, X265_LOG_WARNING, "QGSize should be greater than or equal to 8 and minCUSize, setting QGSize = %d\n", p->rc.qgSize);
140
         }
141
         if (p->rc.qgSize > p->maxCUSize)
142
         {
143
@@ -1979,6 +2006,13 @@
144
 
145
     if (p->csvfn)
146
         x265_log(p, X265_LOG_WARNING, "libx265 no longer supports CSV file statistics\n");
147
+
148
+    if (p->log2MaxPocLsb < 4)
149
+    {
150
+        x265_log(p, X265_LOG_WARNING, "maximum of the picture order count can not be less than 4\n");
151
+        p->log2MaxPocLsb = 4;
152
+    }
153
+
154
 }
155
 
156
 void Encoder::allocAnalysis(x265_analysis_data* analysis)
157
x265_2.0.tar.gz/source/encoder/entropy.cpp -> x265_2.1.tar.gz/source/encoder/entropy.cpp Changed
201
 
1
@@ -285,7 +285,7 @@
2
 
3
     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
4
     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
5
-    WRITE_UVLC(BITS_FOR_POC - 4, "log2_max_pic_order_cnt_lsb_minus4");
6
+    WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
7
     WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
8
 
9
     for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
10
@@ -319,12 +319,12 @@
11
     WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
12
 
13
     WRITE_FLAG(1, "vui_parameters_present_flag");
14
-    codeVUI(sps.vuiParameters, sps.maxTempSubLayers);
15
+    codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bDiscardOptionalVUI);
16
 
17
     WRITE_FLAG(0, "sps_extension_flag");
18
 }
19
 
20
-void Entropy::codePPS(const PPS& pps)
21
+void Entropy::codePPS(const PPS& pps, bool filerAcross)
22
 {
23
     WRITE_UVLC(0,                          "pps_pic_parameter_set_id");
24
     WRITE_UVLC(0,                          "pps_seq_parameter_set_id");
25
@@ -353,7 +353,7 @@
26
     WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
27
     WRITE_FLAG(0,                             "tiles_enabled_flag");
28
     WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
29
-    WRITE_FLAG(1,                             "loop_filter_across_slices_enabled_flag");
30
+    WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
31
 
32
     WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
33
     if (pps.bDeblockingFilterControlPresent)
34
@@ -422,65 +422,75 @@
35
     }
36
 }
37
 
38
-void Entropy::codeVUI(const VUI& vui, int maxSubTLayers)
39
+void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bDiscardOptionalVUI)
40
 {
41
-    WRITE_FLAG(vui.aspectRatioInfoPresentFlag,  "aspect_ratio_info_present_flag");
42
+    WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
43
     if (vui.aspectRatioInfoPresentFlag)
44
     {
45
-        WRITE_CODE(vui.aspectRatioIdc, 8,       "aspect_ratio_idc");
46
+        WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
47
         if (vui.aspectRatioIdc == 255)
48
         {
49
-            WRITE_CODE(vui.sarWidth, 16,        "sar_width");
50
-            WRITE_CODE(vui.sarHeight, 16,       "sar_height");
51
+            WRITE_CODE(vui.sarWidth, 16, "sar_width");
52
+            WRITE_CODE(vui.sarHeight, 16, "sar_height");
53
         }
54
     }
55
 
56
-    WRITE_FLAG(vui.overscanInfoPresentFlag,     "overscan_info_present_flag");
57
+    WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
58
     if (vui.overscanInfoPresentFlag)
59
         WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
60
 
61
-    WRITE_FLAG(vui.videoSignalTypePresentFlag,  "video_signal_type_present_flag");
62
+    WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
63
     if (vui.videoSignalTypePresentFlag)
64
     {
65
-        WRITE_CODE(vui.videoFormat, 3,          "video_format");
66
-        WRITE_FLAG(vui.videoFullRangeFlag,      "video_full_range_flag");
67
+        WRITE_CODE(vui.videoFormat, 3, "video_format");
68
+        WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
69
         WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
70
         if (vui.colourDescriptionPresentFlag)
71
         {
72
-            WRITE_CODE(vui.colourPrimaries, 8,         "colour_primaries");
73
+            WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
74
             WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
75
-            WRITE_CODE(vui.matrixCoefficients, 8,      "matrix_coefficients");
76
+            WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
77
         }
78
     }
79
 
80
-    WRITE_FLAG(vui.chromaLocInfoPresentFlag,           "chroma_loc_info_present_flag");
81
+    WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
82
     if (vui.chromaLocInfoPresentFlag)
83
     {
84
-        WRITE_UVLC(vui.chromaSampleLocTypeTopField,    "chroma_sample_loc_type_top_field");
85
+        WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
86
         WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
87
     }
88
 
89
-    WRITE_FLAG(0,                                     "neutral_chroma_indication_flag");
90
-    WRITE_FLAG(vui.fieldSeqFlag,                      "field_seq_flag");
91
-    WRITE_FLAG(vui.frameFieldInfoPresentFlag,         "frame_field_info_present_flag");
92
+    WRITE_FLAG(0, "neutral_chroma_indication_flag");
93
+    WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
94
+    WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
95
 
96
-    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled,    "default_display_window_flag");
97
+    WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
98
     if (vui.defaultDisplayWindow.bEnabled)
99
     {
100
-        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset,   "def_disp_win_left_offset");
101
-        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset,  "def_disp_win_right_offset");
102
-        WRITE_UVLC(vui.defaultDisplayWindow.topOffset,    "def_disp_win_top_offset");
103
+        WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
104
+        WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
105
+        WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
106
         WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
107
     }
108
 
109
-    WRITE_FLAG(1,                                 "vui_timing_info_present_flag");
110
-    WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
111
-    WRITE_CODE(vui.timingInfo.timeScale, 32,      "vui_time_scale");
112
-    WRITE_FLAG(0,                                 "vui_poc_proportional_to_timing_flag");
113
-
114
-    WRITE_FLAG(vui.hrdParametersPresentFlag,  "vui_hrd_parameters_present_flag");
115
-    if (vui.hrdParametersPresentFlag)
116
-        codeHrdParameters(vui.hrdParameters, maxSubTLayers);
117
+    if (bDiscardOptionalVUI)
118
+        WRITE_FLAG(0, "vui_timing_info_present_flag");
119
+    else
120
+    {
121
+        WRITE_FLAG(1, "vui_timing_info_present_flag");
122
+        WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
123
+        WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
124
+        WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
125
+    }
126
+
127
+    if (bDiscardOptionalVUI)
128
+        WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
129
+    else
130
+    {
131
+        WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
132
+        if (vui.hrdParametersPresentFlag)
133
+            codeHrdParameters(vui.hrdParameters, maxSubTLayers);
134
+    }
135
 
136
     WRITE_FLAG(0, "bitstream_restriction_flag");
137
 }
138
@@ -570,22 +580,28 @@
139
     WRITE_CODE(picType, 3, "pic_type");
140
 }
141
 
142
-void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData)
143
+void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp)
144
 {
145
-    WRITE_FLAG(1, "first_slice_segment_in_pic_flag");
146
+    WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
147
     if (slice.getRapPicFlag())
148
         WRITE_FLAG(0, "no_output_of_prior_pics_flag");
149
 
150
     WRITE_UVLC(0, "slice_pic_parameter_set_id");
151
 
152
     /* x265 does not use dependent slices, so always write all this data */
153
+    if (slice_addr)
154
+    {
155
+        // if( dependent_slice_segments_enabled_flag )
156
+        //     dependent_slice_segment_flag             u(1)
157
+        WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
158
+    }
159
 
160
     WRITE_UVLC(slice.m_sliceType, "slice_type");
161
 
162
     if (!slice.getIdrPicFlag())
163
     {
164
-        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << BITS_FOR_POC)) % (1 << BITS_FOR_POC);
165
-        WRITE_CODE(picOrderCntLSB, BITS_FOR_POC, "pic_order_cnt_lsb");
166
+        int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
167
+        WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
168
 
169
 #if _DEBUG || CHECKED_BUILD
170
         // check for bitstream restriction stating that:
171
@@ -657,18 +673,24 @@
172
     if (!slice.isIntra())
173
         WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
174
 
175
-    int code = slice.m_sliceQp - 26;
176
+    int code = sliceQp - 26;
177
     WRITE_SVLC(code, "slice_qp_delta");
178
 
179
-    bool isSAOEnabled = slice.m_sps->bUseSAO ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
180
-    bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
181
+    // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
182
+    //       We didn't support filter across slice board, so disable it now
183
 
184
-    if (isSAOEnabled || isDBFEnabled)
185
-        WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
186
+    if (g_maxSlices <= 1)
187
+    {
188
+        bool isSAOEnabled = slice.m_sps->bUseSAO ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
189
+        bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
190
+
191
+        if (isSAOEnabled || isDBFEnabled)
192
+            WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
193
+    }
194
 }
195
 
196
 /** write wavefront substreams sizes for the slice header */
197
-void Entropy::codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset)
198
+void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
199
 {
200
     uint32_t offsetLen = 1;
201
x265_2.0.tar.gz/source/encoder/entropy.h -> x265_2.1.tar.gz/source/encoder/entropy.h Changed
19
 
1
@@ -142,13 +142,13 @@
2
 
3
     void codeVPS(const VPS& vps);
4
     void codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl);
5
-    void codePPS(const PPS& pps);
6
-    void codeVUI(const VUI& vui, int maxSubTLayers);
7
+    void codePPS(const PPS& pps, bool filerAcross);
8
+    void codeVUI(const VUI& vui, int maxSubTLayers, bool discardOptionalVUI);
9
     void codeAUD(const Slice& slice);
10
     void codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers);
11
 
12
-    void codeSliceHeader(const Slice& slice, FrameData& encData);
13
-    void codeSliceHeaderWPPEntryPoints(const Slice& slice, const uint32_t *substreamSizes, uint32_t maxOffset);
14
+    void codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp);
15
+    void codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset);
16
     void codeShortTermRefPicSet(const RPS& rps);
17
     void finishSlice()                 { encodeBinTrm(1); finish(); dynamic_cast<Bitstream*>(m_bitIf)->writeByteAlignment(); }
18
 
19
x265_2.0.tar.gz/source/encoder/frameencoder.cpp -> x265_2.1.tar.gz/source/encoder/frameencoder.cpp Changed
201
 
1
@@ -85,6 +85,7 @@
2
 
3
     delete[] m_rows;
4
     delete[] m_outStreams;
5
+    X265_FREE(m_sliceBaseRow);
6
     X265_FREE(m_cuGeoms);
7
     X265_FREE(m_ctuGeomMap);
8
     X265_FREE(m_substreamSizes);
9
@@ -113,12 +114,15 @@
10
     m_rows = new CTURow[m_numRows];
11
     bool ok = !!m_numRows;
12
 
13
+    m_sliceBaseRow = X265_MALLOC(uint32_t, m_param->maxSlices + 1);
14
+    ok &= !!m_sliceBaseRow;
15
+
16
     /* determine full motion search range */
17
     int range  = m_param->searchRange;       /* fpel search */
18
     range += !!(m_param->searchMethod < 2);  /* diamond/hex range check lag */
19
     range += NTAPS_LUMA / 2;                 /* subpel filter half-length */
20
     range += 2 + MotionEstimate::hpelIterationCount(m_param->subpelRefine) / 2; /* subpel refine steps */
21
-    m_refLagRows = 1 + ((range + g_maxCUSize - 1) / g_maxCUSize);
22
+    m_refLagRows = /*(m_param->maxSlices > 1 ? 1 : 0) +*/ 1 + ((range + g_maxCUSize - 1) / g_maxCUSize);
23
 
24
     // NOTE: 2 times of numRows because both Encoder and Filter in same queue
25
     if (!WaveFront::init(m_numRows * 2))
26
@@ -145,6 +149,13 @@
27
     else
28
         m_param->noiseReductionIntra = m_param->noiseReductionInter = 0;
29
 
30
+    // 7.4.7.1 - Ceil( Log2( PicSizeInCtbsY ) ) bits
31
+    {
32
+        unsigned long tmp;
33
+        CLZ(tmp, (numRows * numCols));
34
+        m_sliceAddrBits = (uint16_t)(tmp + 1);
35
+    }
36
+
37
     return ok;
38
 }
39
 
40
@@ -444,12 +455,33 @@
41
     /* ensure all rows are blocked prior to initializing row CTU counters */
42
     WaveFront::clearEnabledRowMask();
43
 
44
-    /* reset entropy coders */
45
+    /* reset entropy coders and compute slice id */
46
     m_entropyCoder.load(m_initSliceContext);
47
+    const uint32_t sliceGroupSize = (m_numRows + m_param->maxSlices - 1) / m_param->maxSlices;
48
+    const uint32_t sliceGroupSizeAccu = (m_numRows << 8) / m_param->maxSlices;
49
+    m_sliceGroupSize = (uint16_t)sliceGroupSize;
50
+
51
+    uint32_t rowSum = sliceGroupSizeAccu;
52
+    uint32_t sidx = 0;
53
     for (uint32_t i = 0; i < m_numRows; i++)
54
-        m_rows[i].init(m_initSliceContext);
55
+    {
56
+        const uint32_t rowRange = (rowSum >> 8);
57
 
58
-    uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : 1;
59
+        if ((i >= rowRange) & (sidx != m_param->maxSlices - 1))
60
+        {
61
+            rowSum += sliceGroupSizeAccu;
62
+            m_sliceBaseRow[++sidx] = i;
63
+        }
64
+
65
+        m_rows[i].init(m_initSliceContext, sidx);
66
+    }
67
+    X265_CHECK(sidx < m_param->maxSlices, "sliceID check failed!");
68
+
69
+    m_sliceBaseRow[0] = 0;
70
+    m_sliceBaseRow[m_param->maxSlices] = m_numRows;
71
+
72
+    uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : m_param->maxSlices;
73
+    X265_CHECK(m_param->bEnableWavefront || (m_param->maxSlices == 1), "Multiple slices without WPP unsupport now!");
74
     if (!m_outStreams)
75
     {
76
         m_outStreams = new Bitstream[numSubstreams];
77
@@ -466,7 +498,7 @@
78
 
79
     if (m_frame->m_lowres.bKeyframe)
80
     {
81
-        if (m_param->bEmitHRDSEI)
82
+        if (!m_param->bDiscardSEI && m_param->bEmitHRDSEI)
83
         {
84
             SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;
85
 
86
@@ -488,7 +520,7 @@
87
         }
88
     }
89
 
90
-    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
91
+    if (!m_param->bDiscardSEI && (m_param->bEmitHRDSEI || !!m_param->interlaceMode))
92
     {
93
         SEIPictureTiming *sei = m_rce.picTimingSEI;
94
         const VUI *vui = &slice->m_sps->vuiParameters;
95
@@ -523,6 +555,25 @@
96
         m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
97
     }
98
 
99
+    /* Write user SEI */
100
+    if (!m_param->bDiscardSEI)
101
+    {
102
+        for (int i = 0; i < m_frame->m_userSEI.numPayloads; i++)
103
+        {
104
+            x265_sei_payload *payload = &m_frame->m_userSEI.payloads[i];
105
+            SEIuserDataUnregistered sei;
106
+
107
+            sei.m_payloadType = payload->payloadType;
108
+            sei.m_userDataLength = payload->payloadSize;
109
+            sei.m_userData = payload->payload;
110
+
111
+            m_bs.resetBits();
112
+            sei.write(m_bs, *slice->m_sps);
113
+            m_bs.writeByteAlignment();
114
+            m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
115
+        }
116
+    }
117
+
118
     /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to 
119
      * tune RateControl parameters for other frames.
120
      * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
121
@@ -540,35 +591,54 @@
122
      * compressed in a wave-front pattern if WPP is enabled. Row based loop
123
      * filters runs behind the CTU compression and reconstruction */
124
 
125
-    m_rows[0].active = true;
126
+    for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
127
+    {
128
+        m_rows[m_sliceBaseRow[sliceId]].active = true;
129
+    }
130
+
131
     if (m_param->bEnableWavefront)
132
     {
133
-        for (uint32_t row = 0; row < m_numRows; row++)
134
+        for (uint32_t rowInSlice = 0; rowInSlice < m_sliceGroupSize; rowInSlice++)
135
         {
136
-            // block until all reference frames have reconstructed the rows we need
137
-            for (int l = 0; l < numPredDir; l++)
138
+            for (uint32_t sliceId = 0; sliceId < m_param->maxSlices; sliceId++)
139
             {
140
-                for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
141
+                const uint32_t sliceStartRow = m_sliceBaseRow[sliceId];
142
+                const uint32_t sliceEndRow = m_sliceBaseRow[sliceId + 1] - 1;
143
+                const uint32_t row = sliceStartRow + rowInSlice;
144
+
145
+                if (row >= m_numRows)
146
+                    break;
147
+
148
+                if (row > sliceEndRow)
149
+                    continue;
150
+
151
+                // block until all reference frames have reconstructed the rows we need
152
+                for (int l = 0; l < numPredDir; l++)
153
                 {
154
-                    Frame *refpic = slice->m_refFrameList[l][ref];
155
+                    for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)
156
+                    {
157
+                        Frame *refpic = slice->m_refFrameList[l][ref];
158
 
159
-                    uint32_t reconRowCount = refpic->m_reconRowCount.get();
160
-                    while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))
161
-                        reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);
162
+                        // NOTE: we unnecessary wait row that beyond current slice boundary
163
+                        const int rowIdx = X265_MIN(sliceEndRow, (row + m_refLagRows));
164
 
165
-                    if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
166
-                        m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows);
167
+                        while (refpic->m_reconRowFlag[rowIdx].get() == 0)
168
+                            refpic->m_reconRowFlag[rowIdx].waitForChange(0);
169
+
170
+                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
171
+                            m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows, sliceEndRow + 1, sliceId);
172
+                    }
173
                 }
174
-            }
175
 
176
-            enableRowEncoder(row); /* clear external dependency for this row */
177
-            if (!row)
178
-            {
179
-                m_row0WaitTime = x265_mdate();
180
-                enqueueRowEncoder(0); /* clear internal dependency, start wavefront */
181
-            }
182
-            tryWakeOne();
183
-        }
184
+                enableRowEncoder(row); /* clear external dependency for this row */
185
+                if (!rowInSlice)
186
+                {
187
+                    m_row0WaitTime = x265_mdate();
188
+                    enqueueRowEncoder(row); /* clear internal dependency, start wavefront */
189
+                }
190
+                tryWakeOne();
191
+            } // end of loop rowInSlice
192
+        } // end of loop sliceId
193
 
194
         m_allRowsAvailableTime = x265_mdate();
195
         tryWakeOne(); /* ensure one thread is active or help-wanted flag is set prior to blocking */
196
@@ -591,12 +661,12 @@
197
                     {
198
                         Frame *refpic = slice->m_refFrameList[list][ref];
199
 
200
-                        uint32_t reconRowCount = refpic->m_reconRowCount.get();
201
x265_2.0.tar.gz/source/encoder/frameencoder.h -> x265_2.1.tar.gz/source/encoder/frameencoder.h Changed
42
 
1
@@ -73,6 +73,7 @@
2
 {
3
     Entropy           bufferedEntropy;  /* store CTU2 context for next row CTU0 */
4
     Entropy           rowGoOnCoder;     /* store context between CTUs, code bitstream if !SAO */
5
+    unsigned int      sliceId;          /* store current row slice id */
6
 
7
     FrameStats        rowStats;
8
 
9
@@ -96,11 +97,12 @@
10
     volatile uint32_t completed;
11
 
12
     /* called at the start of each frame to initialize state */
13
-    void init(Entropy& initContext)
14
+    void init(Entropy& initContext, unsigned int sid)
15
     {
16
         active = false;
17
         busy = false;
18
         completed = 0;
19
+        sliceId = sid;
20
         memset(&rowStats, 0, sizeof(rowStats));
21
         rowGoOnCoder.load(initContext);
22
     }
23
@@ -142,6 +144,9 @@
24
     uint32_t                 m_refLagRows;
25
 
26
     CTURow*                  m_rows;
27
+    uint16_t                 m_sliceAddrBits;
28
+    uint16_t                 m_sliceGroupSize;
29
+    uint32_t*                m_sliceBaseRow;
30
     RateControlEntry         m_rce;
31
     SEIDecodedPictureHash    m_seiReconPictureDigest;
32
 
33
@@ -214,7 +219,7 @@
34
     void compressFrame();
35
 
36
     /* called by compressFrame to generate final per-row bitstreams */
37
-    void encodeSlice();
38
+    void encodeSlice(uint32_t sliceAddr);
39
 
40
     void threadMain();
41
     int  collectCTUStatistics(const CUData& ctu, FrameStats* frameLog);
42
x265_2.0.tar.gz/source/encoder/framefilter.cpp -> x265_2.1.tar.gz/source/encoder/framefilter.cpp Changed
201
 
1
@@ -174,11 +174,11 @@
2
         restoreOrigLosslessYuv(cu, frame, absPartIdx);
3
 }
4
 
5
-void FrameFilter::ParallelFilter::copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col)
6
+void FrameFilter::ParallelFilter::copySaoAboveRef(const CUData *ctu, PicYuv* reconPic, uint32_t cuAddr, int col)
7
 {
8
     // Copy SAO Top Reference Pixels
9
     int ctuWidth  = g_maxCUSize;
10
-    const pixel* recY = reconPic->getPlaneAddr(0, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_stride);
11
+    const pixel* recY = reconPic->getPlaneAddr(0, cuAddr) - (ctu->m_bFirstRowInSlice ? 0 : reconPic->m_stride);
12
 
13
     // Luma
14
     memcpy(&m_sao.m_tmpU[0][col * ctuWidth], recY, ctuWidth * sizeof(pixel));
15
@@ -189,8 +189,8 @@
16
     {
17
         ctuWidth  >>= m_sao.m_hChromaShift;
18
 
19
-        const pixel* recU = reconPic->getPlaneAddr(1, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
20
-        const pixel* recV = reconPic->getPlaneAddr(2, cuAddr) - (m_rowAddr == 0 ? 0 : reconPic->m_strideC);
21
+        const pixel* recU = reconPic->getPlaneAddr(1, cuAddr) - (ctu->m_bFirstRowInSlice ? 0 : reconPic->m_strideC);
22
+        const pixel* recV = reconPic->getPlaneAddr(2, cuAddr) - (ctu->m_bFirstRowInSlice ? 0 : reconPic->m_strideC);
23
         memcpy(&m_sao.m_tmpU[1][col * ctuWidth], recU, ctuWidth * sizeof(pixel));
24
         memcpy(&m_sao.m_tmpU[2][col * ctuWidth], recV, ctuWidth * sizeof(pixel));
25
 
26
@@ -325,7 +325,7 @@
27
     int colEnd = m_allowedCol.get();
28
 
29
     // Avoid threading conflict
30
-    if (m_prevRow && colEnd > m_prevRow->m_lastDeblocked.get())
31
+    if (!m_encData->getPicCTU(m_rowAddr)->m_bFirstRowInSlice && colEnd > m_prevRow->m_lastDeblocked.get())
32
         colEnd = m_prevRow->m_lastDeblocked.get();
33
 
34
     if (colStart >= colEnd)
35
@@ -334,29 +334,29 @@
36
     for (uint32_t col = (uint32_t)colStart; col < (uint32_t)colEnd; col++)
37
     {
38
         const uint32_t cuAddr = m_rowAddr + col;
39
+        const CUData* ctu = m_encData->getPicCTU(cuAddr);
40
 
41
         if (m_frameFilter->m_param->bEnableLoopFilter)
42
         {
43
-            const CUData* ctu = m_encData->getPicCTU(cuAddr);
44
             deblockCTU(ctu, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_VER);
45
         }
46
 
47
         if (col >= 1)
48
         {
49
+            const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
50
             if (m_frameFilter->m_param->bEnableLoopFilter)
51
             {
52
-                const CUData* ctuPrev = m_encData->getPicCTU(cuAddr - 1);
53
                 deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr - 1]], Deblock::EDGE_HOR);
54
 
55
                 // When SAO Disable, setting column counter here
56
-                if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))
57
+                if (!m_frameFilter->m_param->bEnableSAO & !ctuPrev->m_bFirstRowInSlice)
58
                     m_prevRow->processPostCu(col - 1);
59
             }
60
 
61
             if (m_frameFilter->m_param->bEnableSAO)
62
             {
63
                 // Save SAO bottom row reference pixels
64
-                copySaoAboveRef(reconPic, cuAddr - 1, col - 1);
65
+                copySaoAboveRef(ctuPrev, reconPic, cuAddr - 1, col - 1);
66
 
67
                 // SAO Decide
68
                 if (col >= 2)
69
@@ -364,11 +364,11 @@
70
                     // NOTE: Delay 2 column to avoid mistake on below case, it is Deblock sync logic issue, less probability but still alive
71
                     //       ... H V |
72
                     //       ..S H V |
73
-                    m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, col - 2, cuAddr - 2);
74
+                    m_sao.rdoSaoUnitCu(saoParam, (ctu->m_bFirstRowInSlice ? 0 : m_rowAddr), col - 2, cuAddr - 2);
75
                 }
76
 
77
                 // Process Previous Row SAO CU
78
-                if (m_row >= 1 && col >= 3)
79
+                if (!ctu->m_bFirstRowInSlice && col >= 3)
80
                 {
81
                     // Must delay 1 row to avoid thread data race conflict
82
                     m_prevRow->processSaoCTU(saoParam, col - 3);
83
@@ -384,52 +384,54 @@
84
     if (colEnd == numCols)
85
     {
86
         const uint32_t cuAddr = m_rowAddr + numCols - 1;
87
+        const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
88
 
89
         if (m_frameFilter->m_param->bEnableLoopFilter)
90
         {
91
-            const CUData* ctuPrev = m_encData->getPicCTU(cuAddr);
92
             deblockCTU(ctuPrev, cuGeoms[ctuGeomMap[cuAddr]], Deblock::EDGE_HOR);
93
 
94
             // When SAO Disable, setting column counter here
95
-            if ((!m_frameFilter->m_param->bEnableSAO) & (m_row >= 1))
96
+            if (!m_frameFilter->m_param->bEnableSAO & !ctuPrev->m_bFirstRowInSlice)
97
                 m_prevRow->processPostCu(numCols - 1);
98
         }
99
 
100
         // TODO: move processPostCu() into processSaoUnitCu()
101
         if (m_frameFilter->m_param->bEnableSAO)
102
         {
103
+            const CUData* ctu = m_encData->getPicCTU(m_rowAddr + numCols - 2);
104
+
105
             // Save SAO bottom row reference pixels
106
-            copySaoAboveRef(reconPic, cuAddr, numCols - 1);
107
+            copySaoAboveRef(ctuPrev, reconPic, cuAddr, numCols - 1);
108
 
109
             // SAO Decide
110
             // NOTE: reduce condition check for 1 CU only video, Why someone play with it?
111
             if (numCols >= 2)
112
-                m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 2, cuAddr - 1);
113
+                m_sao.rdoSaoUnitCu(saoParam, (ctu->m_bFirstRowInSlice ? 0 : m_rowAddr), numCols - 2, cuAddr - 1);
114
 
115
             if (numCols >= 1)
116
-                m_sao.rdoSaoUnitCu(saoParam, m_rowAddr, numCols - 1, cuAddr);
117
+                m_sao.rdoSaoUnitCu(saoParam, (ctuPrev->m_bFirstRowInSlice ? 0 : m_rowAddr), numCols - 1, cuAddr);
118
 
119
             // Process Previous Rows SAO CU
120
-            if (m_row >= 1 && numCols >= 3)
121
+            if (!ctuPrev->m_bFirstRowInSlice & (numCols >= 3))
122
             {
123
                 m_prevRow->processSaoCTU(saoParam, numCols - 3);
124
                 m_prevRow->processPostCu(numCols - 3);
125
             }
126
 
127
-            if (m_row >= 1 && numCols >= 2)
128
+            if (!ctuPrev->m_bFirstRowInSlice & (numCols >= 2))
129
             {
130
                 m_prevRow->processSaoCTU(saoParam, numCols - 2);
131
                 m_prevRow->processPostCu(numCols - 2);
132
             }
133
 
134
-            if (m_row >= 1 && numCols >= 1)
135
+            if (!ctuPrev->m_bFirstRowInSlice & (numCols >= 1))
136
             {
137
                 m_prevRow->processSaoCTU(saoParam, numCols - 1);
138
                 m_prevRow->processPostCu(numCols - 1);
139
             }
140
 
141
             // Setting column sync counter
142
-            if (m_row >= 1)
143
+            if (!ctuPrev->m_bFirstRowInSlice)
144
                 m_frameFilter->m_frame->m_reconColCount[m_row - 1].set(numCols - 1);
145
         }
146
         m_lastDeblocked.set(numCols);
147
@@ -454,6 +456,7 @@
148
 
149
     // SAO: was integrate into encode loop
150
     SAOParam* saoParam = encData.m_saoParam;
151
+    CUData* ctu = encData.getPicCTU(m_parallelFilter[row].m_rowAddr);
152
 
153
     /* Processing left block Deblock with current threading */
154
     {
155
@@ -461,15 +464,15 @@
156
         m_parallelFilter[row].waitForExit();
157
 
158
         /* Check to avoid previous row process slower than current row */
159
-        X265_CHECK((row < 1) || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
160
+        X265_CHECK(ctu->m_bFirstRowInSlice || m_parallelFilter[row - 1].m_lastDeblocked.get() == m_numCols, "previous row not finish");
161
 
162
         m_parallelFilter[row].m_allowedCol.set(m_numCols);
163
         m_parallelFilter[row].processTasks(-1);
164
 
165
-        if (row == m_numRows - 1)
166
+        if (ctu->m_bLastRowInSlice)
167
         {
168
             /* TODO: Early start last row */
169
-            if ((row >= 1) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != m_numCols))
170
+            if ((!ctu->m_bFirstRowInSlice) && (m_parallelFilter[row - 1].m_lastDeblocked.get() != m_numCols))
171
                 x265_log(m_param, X265_LOG_WARNING, "detected ParallelFilter race condition on last row\n");
172
 
173
             /* Apply SAO on last row of CUs, because we always apply SAO on row[X-1] */
174
@@ -493,10 +496,19 @@
175
 
176
     // this row of CTUs has been encoded
177
 
178
-    if (row > 0)
179
+    if (!ctu->m_bFirstRowInSlice)
180
         processPostRow(row - 1);
181
 
182
-    if (row == m_numRows - 1)
183
+    if (ctu->m_bLastRowInSlice)
184
+        processPostRow(row);
185
+
186
+    // NOTE: slices parallelism will be execute out-of-order
187
+    int numRowFinished;
188
+    for(numRowFinished = 0; numRowFinished < m_numRows; numRowFinished++)
189
+        if (!m_frame->m_reconRowFlag[numRowFinished].get())
190
+            break;
191
+
192
+    if (numRowFinished == m_numRows)
193
     {
194
         if (m_param->bEnableSAO)
195
         {
196
@@ -509,7 +521,6 @@
197
 
198
             m_parallelFilter[0].m_sao.rdoSaoUnitRowEnd(saoParam, encData.m_slice->m_sps->numCUsInFrame);
199
         }
200
-        processPostRow(row);
201
x265_2.0.tar.gz/source/encoder/framefilter.h -> x265_2.1.tar.gz/source/encoder/framefilter.h Changed
10
 
1
@@ -93,7 +93,7 @@
2
         void processSaoCTU(SAOParam *saoParam, int col);
3
 
4
         // Copy and Save SAO reference pixels for SAO Rdo decide
5
-        void copySaoAboveRef(PicYuv* reconPic, uint32_t cuAddr, int col);
6
+        void copySaoAboveRef(const CUData *ctu, PicYuv* reconPic, uint32_t cuAddr, int col);
7
 
8
         // Post-Process (Border extension)
9
         void processPostCu(int col) const;
10
x265_2.0.tar.gz/source/encoder/motion.cpp -> x265_2.1.tar.gz/source/encoder/motion.cpp Changed
56
 
1
@@ -581,14 +581,15 @@
2
                                    int              numCandidates,
3
                                    const MV *       mvc,
4
                                    int              merange,
5
-                                   MV &             outQMv)
6
+                                   MV &             outQMv,
7
+                                   pixel *          srcReferencePlane)
8
 {
9
     ALIGN_VAR_16(int, costs[16]);
10
     if (ctuAddr >= 0)
11
         blockOffset = ref->reconPic->getLumaAddr(ctuAddr, absPartIdx) - ref->reconPic->getLumaAddr(0);
12
     intptr_t stride = ref->lumaStride;
13
     pixel* fenc = fencPUYuv.m_buf[0];
14
-    pixel* fref = ref->fpelPlane[0] + blockOffset;
15
+    pixel* fref = srcReferencePlane == 0 ? ref->fpelPlane[0] + blockOffset : srcReferencePlane + blockOffset;
16
 
17
     setMVP(qmvp);
18
 
19
@@ -1094,6 +1095,12 @@
20
 
21
     const SubpelWorkload& wl = workload[this->subpelRefine];
22
 
23
+    // check mv range for slice bound
24
+    if ((g_maxSlices > 1) & ((bmv.y < qmvmin.y) | (bmv.y > qmvmax.y)))
25
+    {
26
+        bmv.y = x265_min(x265_max(bmv.y, qmvmin.y), qmvmax.y);
27
+    }
28
+
29
     if (!bcost)
30
     {
31
         /* if there was zero residual at the clipped MVP, we can skip subpel
32
@@ -1141,6 +1148,11 @@
33
             for (int i = 1; i <= wl.hpel_dirs; i++)
34
             {
35
                 MV qmv = bmv + square1[i] * 2;
36
+
37
+                // check mv range for slice bound
38
+                if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)))
39
+                    continue;
40
+
41
                 int cost = subpelCompare(ref, qmv, hpelcomp) + mvcost(qmv);
42
                 COPY2_IF_LT(bcost, cost, bdir, i);
43
             }
44
@@ -1161,6 +1173,11 @@
45
             for (int i = 1; i <= wl.qpel_dirs; i++)
46
             {
47
                 MV qmv = bmv + square1[i];
48
+
49
+                // check mv range for slice bound
50
+                if ((g_maxSlices > 1) & ((qmv.y < qmvmin.y) | (qmv.y > qmvmax.y)))
51
+                    continue;
52
+
53
                 int cost = subpelCompare(ref, qmv, satd) + mvcost(qmv);
54
                 COPY2_IF_LT(bcost, cost, bdir, i);
55
             }
56
x265_2.0.tar.gz/source/encoder/motion.h -> x265_2.1.tar.gz/source/encoder/motion.h Changed
10
 
1
@@ -90,7 +90,7 @@
2
                chromaSatd(refYuv.getCrAddr(puPartIdx), refYuv.m_csize, fencPUYuv.m_buf[2], fencPUYuv.m_csize);
3
     }
4
 
5
-    int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv);
6
+    int motionEstimate(ReferencePlanes* ref, const MV & mvmin, const MV & mvmax, const MV & qmvp, int numCandidates, const MV * mvc, int merange, MV & outQMv, pixel *srcReferencePlane = 0);
7
 
8
     int subpelCompare(ReferencePlanes* ref, const MV &qmv, pixelcmp_t);
9
 
10
x265_2.0.tar.gz/source/encoder/ratecontrol.cpp -> x265_2.1.tar.gz/source/encoder/ratecontrol.cpp Changed
201
 
1
@@ -284,7 +284,11 @@
2
 #define ABR_SCENECUT_INIT_QP_MIN (12)
3
 #define CRF_INIT_QP (int)m_param->rc.rfConstant
4
     for (int i = 0; i < 3; i++)
5
+    {
6
         m_lastQScaleFor[i] = x265_qp2qScale(m_param->rc.rateControlMode == X265_RC_CRF ? CRF_INIT_QP : ABR_INIT_QP_MIN);
7
+        m_lmin[i] = x265_qp2qScale(m_param->rc.qpMin);
8
+        m_lmax[i] = x265_qp2qScale(m_param->rc.qpMax);
9
+    }
10
 
11
     if (m_param->rc.rateControlMode == X265_RC_CQP)
12
     {
13
@@ -543,8 +547,11 @@
14
                        &rce->mvBits, &rce->miscBits, &rce->iCuCount, &rce->pCuCount,
15
                        &rce->skipCuCount);
16
                 rce->keptAsRef = true;
17
+                rce->isIdr = false;
18
                 if (picType == 'b' || picType == 'p')
19
                     rce->keptAsRef = false;
20
+                if (picType == 'I')
21
+                    rce->isIdr = true;
22
                 if (picType == 'I' || picType == 'i')
23
                     rce->sliceType = I_SLICE;
24
                 else if (picType == 'P' || picType == 'p')
25
@@ -611,9 +618,18 @@
26
         }
27
         if (m_param->rc.cuTree)
28
         {
29
-            m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
30
-            if (m_param->bBPyramid && m_param->rc.bStatRead)
31
-                m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
32
+            if (m_param->rc.qgSize == 8)
33
+            {
34
+                m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * 4 * sizeof(uint16_t));
35
+                if (m_param->bBPyramid && m_param->rc.bStatRead)
36
+                    m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * 4 * sizeof(uint16_t));
37
+            }
38
+            else
39
+            {
40
+                m_cuTreeStats.qpBuffer[0] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
41
+                if (m_param->bBPyramid && m_param->rc.bStatRead)
42
+                    m_cuTreeStats.qpBuffer[1] = X265_MALLOC(uint16_t, m_ncu * sizeof(uint16_t));
43
+            }
44
             m_cuTreeStats.qpBufPos = -1;
45
         }
46
     }
47
@@ -808,13 +824,19 @@
48
                  (double)m_param->rc.bitrate,
49
                  expectedBits * m_fps / (m_numEntries * 1000.),
50
                  avgq);
51
-        if (expectedBits < allAvailableBits && avgq < QP_MIN + 2)
52
+        if (expectedBits < allAvailableBits && avgq < m_param->rc.qpMin + 2)
53
         {
54
-            x265_log(m_param, X265_LOG_WARNING, "try reducing target bitrate\n");
55
+            if (m_param->rc.qpMin > 0)
56
+                x265_log(m_param, X265_LOG_WARNING, "try reducing target bitrate or reducing qp_min (currently %d)\n", m_param->rc.qpMin);
57
+            else
58
+                x265_log(m_param, X265_LOG_WARNING, "try reducing target bitrate\n");
59
         }
60
-        else if (expectedBits > allAvailableBits && avgq > QP_MAX_SPEC - 2)
61
+        else if (expectedBits > allAvailableBits && avgq > m_param->rc.qpMax - 2)
62
         {
63
-            x265_log(m_param, X265_LOG_WARNING, "try increasing target bitrate\n");
64
+            if (m_param->rc.qpMax < QP_MAX_MAX)
65
+                x265_log(m_param, X265_LOG_WARNING, "try increasing target bitrate or increasing qp_max (currently %d)\n", m_param->rc.qpMax);
66
+            else
67
+                x265_log(m_param, X265_LOG_WARNING, "try increasing target bitrate\n");
68
         }
69
         else if (!(m_2pass && m_isVbv))
70
             x265_log(m_param, X265_LOG_WARNING, "internal error\n");
71
@@ -966,6 +988,8 @@
72
     double adjustment;
73
     double prevBits = 0;
74
     int t0, t1;
75
+    double qScaleMin = x265_qp2qScale(m_param->rc.qpMin);
76
+    double qScaleMax = x265_qp2qScale(m_param->rc.qpMax);
77
     int iterations = 0 , adjMin, adjMax;
78
     CHECKED_MALLOC(fills, double, m_numEntries + 1);
79
     fills++;
80
@@ -985,7 +1009,7 @@
81
             adjMin = 1;
82
             while (adjMin && findUnderflow(fills, &t0, &t1, 1, endPos))
83
             {
84
-                adjMin = fixUnderflow(t0, t1, adjustment, MIN_QPSCALE, MAX_MAX_QPSCALE);
85
+                adjMin = fixUnderflow(t0, t1, adjustment, qScaleMin, qScaleMax);
86
                 t0 = t1;
87
             }
88
         }
89
@@ -995,7 +1019,7 @@
90
         /* fix underflows -- should be done after overflow, as we'd better undersize target than underflowing VBV */
91
         adjMax = 1;
92
         while (adjMax && findUnderflow(fills, &t0, &t1, 0, endPos))
93
-            adjMax = fixUnderflow(t0, t1, 1.001, MIN_QPSCALE, MAX_MAX_QPSCALE );
94
+            adjMax = fixUnderflow(t0, t1, 1.001, qScaleMin, qScaleMax);
95
         expectedBits = countExpectedBits(startPos, endPos);
96
     }
97
     while ((expectedBits < .995 * allAvailableBits) && ((int64_t)(expectedBits+.5) > (int64_t)(prevBits+.5)) && !(m_param->rc.rateControlMode == X265_RC_CRF));
98
@@ -1044,7 +1068,7 @@
99
             return X265_TYPE_AUTO;
100
         }
101
         int index = m_encOrder[frameNum];
102
-        int frameType = m_rce2Pass[index].sliceType == I_SLICE ? (frameNum > 0 && m_param->bOpenGOP ? X265_TYPE_I : X265_TYPE_IDR)
103
+        int frameType = m_rce2Pass[index].sliceType == I_SLICE ? (m_rce2Pass[index].isIdr ? X265_TYPE_IDR : X265_TYPE_I)
104
                         : m_rce2Pass[index].sliceType == P_SLICE ? X265_TYPE_P
105
                         : (m_rce2Pass[index].sliceType == B_SLICE && m_rce2Pass[index].keptAsRef ? X265_TYPE_BREF : X265_TYPE_B);
106
         return frameType;
107
@@ -1216,13 +1240,17 @@
108
          * the scene-transition mini-gop */
109
 
110
         double q = x265_qScale2qp(rateEstimateQscale(curFrame, rce));
111
-        q = x265_clip3((double)QP_MIN, (double)QP_MAX_MAX, q);
112
+        q = x265_clip3((double)m_param->rc.qpMin, (double)m_param->rc.qpMax, q);
113
         m_qp = int(q + 0.5);
114
         q = m_isGrainEnabled ? m_qp : q;
115
         rce->qpaRc = curEncData.m_avgQpRc = curEncData.m_avgQpAq = q;
116
         /* copy value of lastRceq into thread local rce struct *to be used in RateControlEnd() */
117
         rce->qRceq = m_lastRceq;
118
         accumPQpUpdate();
119
+        curFrame->m_rcData->cumulativePQp = m_accumPQp;
120
+        curFrame->m_rcData->cumulativePNorm = m_accumPNorm;
121
+        for (int i = 0; i < 3; i++)
122
+            curFrame->m_rcData->lastQScaleFor[i] = m_lastQScaleFor[i];
123
     }
124
     else // CQP
125
     {
126
@@ -1250,7 +1278,7 @@
127
     if (curFrame->m_forceqp)
128
     {
129
         m_qp = (int32_t)(curFrame->m_forceqp + 0.5) - 1;
130
-        m_qp = x265_clip3(QP_MIN, QP_MAX_MAX, m_qp);
131
+        m_qp = x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, m_qp);
132
         rce->qpaRc = curEncData.m_avgQpRc = curEncData.m_avgQpAq = m_qp;
133
         if (m_isAbr || m_2pass)
134
         {
135
@@ -1408,6 +1436,11 @@
136
 {
137
     int index = m_encOrder[frame->m_poc];
138
     uint8_t sliceTypeActual = (uint8_t)m_rce2Pass[index].sliceType;
139
+    int ncu;
140
+    if (m_param->rc.qgSize == 8)
141
+        ncu = m_ncu * 4;
142
+    else
143
+        ncu = m_ncu;
144
     if (m_rce2Pass[index].keptAsRef)
145
     {
146
         /* TODO: We don't need pre-lookahead to measure AQ offsets, but there is currently
147
@@ -1421,7 +1454,7 @@
148
 
149
                 if (!fread(&type, 1, 1, m_cutreeStatFileIn))
150
                     goto fail;
151
-                if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t), m_ncu, m_cutreeStatFileIn) != (size_t)m_ncu)
152
+                if (fread(m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], sizeof(uint16_t), ncu, m_cutreeStatFileIn) != (size_t)ncu)
153
                     goto fail;
154
 
155
                 if (type != sliceTypeActual && m_cuTreeStats.qpBufPos == 1)
156
@@ -1432,8 +1465,8 @@
157
             }
158
             while(type != sliceTypeActual);
159
         }
160
-        primitives.fix8Unpack(frame->m_lowres.qpCuTreeOffset, m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], m_ncu);
161
-        for (int i = 0; i < m_ncu; i++)
162
+        primitives.fix8Unpack(frame->m_lowres.qpCuTreeOffset, m_cuTreeStats.qpBuffer[m_cuTreeStats.qpBufPos], ncu);
163
+        for (int i = 0; i < ncu; i++)
164
             frame->m_lowres.invQscaleFactor[i] = x265_exp2fix8(frame->m_lowres.qpCuTreeOffset[i]);
165
         m_cuTreeStats.qpBufPos--;
166
     }
167
@@ -1643,6 +1676,8 @@
168
         double abrBuffer = 2 * m_rateTolerance * m_bitrate;
169
         if (m_2pass)
170
         {
171
+            double lmin = m_lmin[m_sliceType];
172
+            double lmax = m_lmax[m_sliceType];
173
             int64_t diff;
174
             if (!m_isVbv)
175
             {
176
@@ -1681,8 +1716,16 @@
177
             }
178
             if (m_framesDone == 0 && m_param->rc.rateControlMode == X265_RC_ABR && m_isGrainEnabled)
179
                 q = X265_MIN(x265_qp2qScale(ABR_INIT_QP_GRAIN_MAX), q);
180
-
181
             rce->qpNoVbv = x265_qScale2qp(q);
182
+            if ((m_sliceType == I_SLICE && m_param->keyframeMax > 1
183
+                && m_lastNonBPictType != I_SLICE && !m_isAbrReset) || (m_isNextGop && !m_framesDone))
184
+                m_avgPFrameQp = 0;
185
+            if (m_sliceType == P_SLICE)
186
+            {
187
+                m_avgPFrameQp = m_avgPFrameQp == 0 ? rce->qpNoVbv : m_avgPFrameQp;
188
+                m_avgPFrameQp = (m_avgPFrameQp + rce->qpNoVbv) / 2;
189
+            }
190
+
191
             if (m_isVbv)
192
             {
193
                 /* Do not overflow vbv */
194
@@ -1693,17 +1736,17 @@
195
                 double sizeConstraint = 1 + expectedFullness;
196
                 qmax = X265_MAX(qmax, rce->newQScale);
197
                 if (expectedFullness < .05)
198
-                    qmax = MAX_MAX_QPSCALE;
199
-                qmax = X265_MIN(qmax, MAX_MAX_QPSCALE);
200
+                    qmax = lmax;
201
x265_2.0.tar.gz/source/encoder/ratecontrol.h -> x265_2.1.tar.gz/source/encoder/ratecontrol.h Changed
19
 
1
@@ -108,7 +108,7 @@
2
     int      coeffBits;
3
     bool     keptAsRef;
4
     bool     scenecut;
5
-
6
+    bool     isIdr;
7
     SEIPictureTiming *picTimingSEI;
8
     HRDTiming        *hrdTiming;
9
 };
10
@@ -162,6 +162,8 @@
11
     double  m_accumPNorm;
12
     double  m_lastQScaleFor[3];  /* last qscale for a specific pict type, used for max_diff & ipb factor stuff */
13
     double  m_lstep;
14
+    double  m_lmin[3];
15
+    double  m_lmax[3];
16
     double  m_shortTermCplxSum;
17
     double  m_shortTermCplxCount;
18
     double  m_lastRceq;
19
x265_2.0.tar.gz/source/encoder/reference.cpp -> x265_2.1.tar.gz/source/encoder/reference.cpp Changed
72
 
1
@@ -3,6 +3,7 @@
2
  *
3
  * Authors: Steve Borho <steve@borho.org>
4
  *          Deepthi Devaki <deepthidevaki@multicorewareinc.com>
5
+ *          Min Chen <chenm003@163.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -36,10 +37,12 @@
10
     weightBuffer[0] = NULL;
11
     weightBuffer[1] = NULL;
12
     weightBuffer[2] = NULL;
13
+    numSliceWeightedRows = NULL;
14
 }
15
 
16
 MotionReference::~MotionReference()
17
 {
18
+    X265_FREE(numSliceWeightedRows);
19
     X265_FREE(weightBuffer[0]);
20
     X265_FREE(weightBuffer[1]);
21
     X265_FREE(weightBuffer[2]);
22
@@ -48,11 +51,19 @@
23
 int MotionReference::init(PicYuv* recPic, WeightParam *wp, const x265_param& p)
24
 {
25
     reconPic = recPic;
26
-    numWeightedRows = 0;
27
     lumaStride = recPic->m_stride;
28
     chromaStride = recPic->m_strideC;
29
     numInterpPlanes = p.subpelRefine > 2 ? 3 : 1; /* is chroma satd possible? */
30
 
31
+    if (numSliceWeightedRows)
32
+    {
33
+        // Unnecessary, but avoid risk on parameters dynamic modify in future.
34
+        X265_FREE(numSliceWeightedRows);
35
+        numSliceWeightedRows = NULL;
36
+    }
37
+    numSliceWeightedRows = X265_MALLOC(uint32_t, p.maxSlices);
38
+    memset(numSliceWeightedRows, 0, p.maxSlices * sizeof(uint32_t));
39
+
40
     /* directly reference the extended integer pel planes */
41
     fpelPlane[0] = recPic->m_picOrg[0];
42
     fpelPlane[1] = recPic->m_picOrg[1];
43
@@ -105,9 +116,10 @@
44
     return 0;
45
 }
46
 
47
-void MotionReference::applyWeight(int finishedRows, int maxNumRows)
48
+void MotionReference::applyWeight(uint32_t finishedRows, uint32_t maxNumRows, uint32_t maxNumRowsInSlice, uint32_t sliceId)
49
 {
50
-    finishedRows = X265_MIN(finishedRows, maxNumRows);
51
+    const uint32_t numWeightedRows = numSliceWeightedRows[sliceId];
52
+    finishedRows = X265_MIN(finishedRows, maxNumRowsInSlice);
53
     if (numWeightedRows >= finishedRows)
54
         return;
55
 
56
@@ -116,7 +128,7 @@
57
     intptr_t stride = reconPic->m_stride;
58
     int width   = reconPic->m_picWidth;
59
     int height  = (finishedRows - numWeightedRows) * g_maxCUSize;
60
-    if (finishedRows == maxNumRows && (reconPic->m_picHeight % g_maxCUSize))
61
+    if ((finishedRows == maxNumRows) && (reconPic->m_picHeight % g_maxCUSize))
62
     {
63
         /* the last row may be partial height */
64
         height -= g_maxCUSize;
65
@@ -170,5 +182,5 @@
66
         }
67
     }
68
 
69
-    numWeightedRows = finishedRows;
70
+    numSliceWeightedRows[sliceId] = finishedRows;
71
 }
72
x265_2.0.tar.gz/source/encoder/reference.h -> x265_2.1.tar.gz/source/encoder/reference.h Changed
25
 
1
@@ -2,6 +2,7 @@
2
  * Copyright (C) 2013 x265 project
3
  *
4
  * Authors: Steve Borho <steve@borho.org>
5
+ *          Min Chen <chenm003@163.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -41,11 +42,11 @@
10
     MotionReference();
11
     ~MotionReference();
12
     int  init(PicYuv*, WeightParam* wp, const x265_param& p);
13
-    void applyWeight(int rows, int numRows);
14
+    void applyWeight(uint32_t finishedRows, uint32_t maxNumRows, uint32_t maxNumRowsInSlice, uint32_t sliceId);
15
 
16
-    pixel*  weightBuffer[3];
17
-    int     numInterpPlanes;
18
-    int     numWeightedRows;
19
+    pixel*      weightBuffer[3];
20
+    int         numInterpPlanes;
21
+    uint32_t*   numSliceWeightedRows;
22
 
23
 protected:
24
 
25
x265_2.0.tar.gz/source/encoder/sao.cpp -> x265_2.1.tar.gz/source/encoder/sao.cpp Changed
187
 
1
@@ -283,6 +283,16 @@
2
     int ctuHeight = g_maxCUSize;
3
     uint32_t lpelx = cu->m_cuPelX;
4
     uint32_t tpely = cu->m_cuPelY;
5
+    const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
6
+    const uint32_t lastRowInSlice = cu->m_bLastRowInSlice;
7
+    const uint32_t bAboveUnavail = (!tpely) | firstRowInSlice;
8
+
9
+    // NOTE: Careful! the picHeight for Equal operator only, so I may safe to hack it
10
+    if (lastRowInSlice)
11
+    {
12
+        picHeight = x265_min(picHeight, (tpely + ctuHeight));
13
+    }
14
+
15
     if (plane)
16
     {
17
         picWidth  >>= m_hChromaShift;
18
@@ -367,9 +377,9 @@
19
     }
20
     case SAO_EO_1: // dir: |
21
     {
22
-        int startY = !tpely;
23
+        int startY = bAboveUnavail;
24
         int endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
25
-        if (!tpely)
26
+        if (startY)
27
             rec += stride;
28
 
29
         if (ctuWidth & 15)
30
@@ -408,10 +418,10 @@
31
         int startX = !lpelx;
32
         int endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
33
 
34
-        int startY = !tpely;
35
+        int startY = bAboveUnavail;
36
         int endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
37
 
38
-        if (!tpely)
39
+        if (startY)
40
             rec += stride;
41
 
42
         if (!(ctuWidth & 15))
43
@@ -474,10 +484,10 @@
44
         int startX = !lpelx;
45
         int endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
46
 
47
-        int startY = !tpely;
48
+        int startY = bAboveUnavail;
49
         int endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
50
 
51
-        if (!tpely)
52
+        if (startY)
53
             rec += stride;
54
 
55
         if (ctuWidth & 15)
56
@@ -737,6 +747,10 @@
57
     int ctuHeight = g_maxCUSize;
58
     uint32_t lpelx = cu->m_cuPelX;
59
     uint32_t tpely = cu->m_cuPelY;
60
+    const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
61
+    const uint32_t lastRowInSlice = cu->m_bLastRowInSlice;
62
+    const uint32_t bAboveUnavail = (!tpely) | firstRowInSlice;
63
+
64
     if (plane)
65
     {
66
         picWidth  >>= m_hChromaShift;
67
@@ -751,6 +765,12 @@
68
     ctuWidth  = rpelx - lpelx;
69
     ctuHeight = bpely - tpely;
70
 
71
+    // NOTE: Careful! the picHeight apply for Equal operator only in below, so I may safe to hack it
72
+    if (lastRowInSlice)
73
+    {
74
+        picHeight = bpely;
75
+    }
76
+
77
     int startX;
78
     int startY;
79
     int endX;
80
@@ -825,10 +845,10 @@
81
 
82
             rec  = rec0;
83
 
84
-            startY = !tpely;
85
+            startY = bAboveUnavail;
86
             endX   = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR + plane_offset;
87
             endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB + plane_offset;
88
-            if (!tpely)
89
+            if (startY)
90
             {
91
                 rec += stride;
92
             }
93
@@ -852,9 +872,9 @@
94
             startX = !lpelx;
95
             endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR + plane_offset;
96
 
97
-            startY = !tpely;
98
+            startY = bAboveUnavail;
99
             endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB + plane_offset;
100
-            if (!tpely)
101
+            if (startY)
102
             {
103
                 fenc += stride;
104
                 rec += stride;
105
@@ -879,10 +899,10 @@
106
             startX = !lpelx;
107
             endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR + plane_offset;
108
 
109
-            startY = !tpely;
110
+            startY = bAboveUnavail;
111
             endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB + plane_offset;
112
 
113
-            if (!tpely)
114
+            if (startY)
115
             {
116
                 fenc += stride;
117
                 rec += stride;
118
@@ -911,6 +931,16 @@
119
     int ctuHeight = g_maxCUSize;
120
     uint32_t lpelx = cu->m_cuPelX;
121
     uint32_t tpely = cu->m_cuPelY;
122
+    const uint32_t firstRowInSlice = cu->m_bFirstRowInSlice;
123
+    const uint32_t lastRowInSlice = cu->m_bLastRowInSlice;
124
+    const uint32_t bAboveAvail = (!tpely) | firstRowInSlice;
125
+
126
+    // NOTE: Careful! the picHeight for Equal operator only, so I may safe to hack it
127
+    if (lastRowInSlice)
128
+    {
129
+        picHeight = x265_min(picHeight, (tpely + ctuHeight));
130
+    }
131
+
132
     uint32_t rpelx = x265_min(lpelx + ctuWidth,  picWidth);
133
     uint32_t bpely = x265_min(tpely + ctuHeight, picHeight);
134
     ctuWidth  = rpelx - lpelx;
135
@@ -1028,10 +1058,10 @@
136
 
137
             startX = (rpelx == picWidth) ? ctuWidth : ctuWidth - skipR;
138
             startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
139
-            firstY = !tpely;
140
+            firstY = bAboveAvail;
141
             // endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
142
             endY   = ctuHeight - 1; // not refer below CTU
143
-            if (!tpely)
144
+            if (firstY)
145
             {
146
                 fenc += stride;
147
                 rec += stride;
148
@@ -1074,12 +1104,12 @@
149
             startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
150
             startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
151
             firstX = !lpelx;
152
-            firstY = !tpely;
153
+            firstY = bAboveAvail;
154
             // endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
155
             // endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
156
             endX   = ctuWidth - 1;  // not refer right CTU
157
             endY   = ctuHeight - 1; // not refer below CTU
158
-            if (!tpely)
159
+            if (firstY)
160
             {
161
                 fenc += stride;
162
                 rec += stride;
163
@@ -1126,12 +1156,12 @@
164
             startX = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth - skipR;
165
             startY = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight - skipB;
166
             firstX = !lpelx;
167
-            firstY = !tpely;
168
+            firstY = bAboveAvail;
169
             // endX   = (rpelx == picWidth) ? ctuWidth - 1 : ctuWidth;
170
             // endY   = (bpely == picHeight) ? ctuHeight - 1 : ctuHeight;
171
             endX   = ctuWidth - 1;  // not refer right CTU
172
             endY   = ctuHeight - 1; // not refer below CTU
173
-            if (!tpely)
174
+            if (firstY)
175
             {
176
                 fenc += stride;
177
                 rec += stride;
178
@@ -1197,7 +1227,7 @@
179
 
180
     int qpCb = qp;
181
     if (m_param->internalCsp == X265_CSP_I420)
182
-        qpCb = x265_clip3(QP_MIN, QP_MAX_MAX, (int)g_chromaScale[qp + slice->m_pps->chromaQpOffset[0]]);
183
+        qpCb = x265_clip3(m_param->rc.qpMin, m_param->rc.qpMax, (int)g_chromaScale[qp + slice->m_pps->chromaQpOffset[0]]);
184
     else
185
         qpCb = X265_MIN(qp + slice->m_pps->chromaQpOffset[0], QP_MAX_SPEC);
186
 
187
x265_2.0.tar.gz/source/encoder/search.cpp -> x265_2.1.tar.gz/source/encoder/search.cpp Changed
111
 
1
@@ -1854,10 +1854,26 @@
2
     for (uint32_t mergeCand = 0; mergeCand < numMergeCand; ++mergeCand)
3
     {
4
         /* Prevent TMVP candidates from using unavailable reference pixels */
5
-        if (m_bFrameParallel &&
6
-            (candMvField[mergeCand][0].mv.y >= (m_param->searchRange + 1) * 4 ||
7
-             candMvField[mergeCand][1].mv.y >= (m_param->searchRange + 1) * 4))
8
-            continue;
9
+        if (m_bFrameParallel)
10
+        {
11
+            // Parallel slices bound check
12
+            if (m_param->maxSlices > 1)
13
+            {
14
+                if (cu.m_bFirstRowInSlice &
15
+                    ((candMvField[mergeCand][0].mv.y < (2 * 4)) | (candMvField[mergeCand][1].mv.y < (2 * 4))))
16
+                    continue;
17
+
18
+                // Last row in slice can't reference beyond bound since it is another slice area
19
+                // TODO: we may beyond bound in future since these area have a chance to finish because we use parallel slices. Necessary prepare research on load balance
20
+                if (cu.m_bLastRowInSlice &&
21
+                    ((candMvField[mergeCand][0].mv.y > -3 * 4) | (candMvField[mergeCand][1].mv.y > -3 * 4)))
22
+                    continue;
23
+            }
24
+
25
+            if (candMvField[mergeCand][0].mv.y >= (m_param->searchRange + 1) * 4 ||
26
+                candMvField[mergeCand][1].mv.y >= (m_param->searchRange + 1) * 4)
27
+                continue;
28
+        }
29
 
30
         cu.m_mv[0][pu.puAbsPartIdx] = candMvField[mergeCand][0].mv;
31
         cu.m_refIdx[0][pu.puAbsPartIdx] = (int8_t)candMvField[mergeCand][0].refIdx;
32
@@ -1925,17 +1941,24 @@
33
         MV mvCand = amvp[i];
34
 
35
         // NOTE: skip mvCand if Y is > merange and -FN>1
36
-        if (m_bFrameParallel && (mvCand.y >= (m_param->searchRange + 1) * 4))
37
-            costs[i] = m_me.COST_MAX;
38
-        else
39
+        if (m_bFrameParallel)
40
         {
41
-            cu.clipMv(mvCand);
42
-            predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refReconPicList[list][ref], mvCand);
43
-            costs[i] = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
44
+            costs[i] = m_me.COST_MAX;
45
+
46
+            if (mvCand.y >= (m_param->searchRange + 1) * 4)
47
+                continue;
48
+
49
+            if ((m_param->maxSlices > 1) &
50
+                ((mvCand.y < m_sliceMinY)
51
+              |  (mvCand.y > m_sliceMaxY)))
52
+                continue;
53
         }
54
+        cu.clipMv(mvCand);
55
+        predInterLumaPixel(pu, tmpPredYuv, *m_slice->m_refReconPicList[list][ref], mvCand);
56
+        costs[i] = m_me.bufSAD(tmpPredYuv.getLumaAddr(pu.puAbsPartIdx), tmpPredYuv.m_size);
57
     }
58
 
59
-    return costs[0] <= costs[1] ? 0 : 1;
60
+    return (costs[0] <= costs[1]) ? 0 : 1;
61
 }
62
 
63
 void Search::PME::processTasks(int workerThreadId)
64
@@ -2023,7 +2046,8 @@
65
 
66
     setSearchRange(interMode.cu, mvp, m_param->searchRange, mvmin, mvmax);
67
 
68
-    int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv);
69
+    int satdCost = m_me.motionEstimate(&m_slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, 
70
+      m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
71
 
72
     /* Get total cost of partition, but only include MV bit cost once */
73
     bits += m_me.bitcost(outmv);
74
@@ -2106,9 +2130,10 @@
75
                 const MV* amvp = interMode.amvpCand[list][ref];
76
                 int mvpIdx = selectMVP(cu, pu, amvp, list, ref);
77
                 MV mvmin, mvmax, outmv, mvp = amvp[mvpIdx];
78
-                
79
+
80
                 setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
81
-                int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv);
82
+                int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv,
83
+                  m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
84
 
85
                 /* Get total cost of partition, but only include MV bit cost once */
86
                 bits += m_me.bitcost(outmv);
87
@@ -2206,7 +2231,8 @@
88
                     }
89
 
90
                     setSearchRange(cu, mvp, m_param->searchRange, mvmin, mvmax);
91
-                    int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv);
92
+                    int satdCost = m_me.motionEstimate(&slice->m_mref[list][ref], mvmin, mvmax, mvp, numMvc, mvc, m_param->searchRange, outmv, 
93
+                      m_param->bSourceReferenceEstimation ? m_slice->m_refFrameList[list][ref]->m_fencPic->getLumaAddr(0) : 0);
94
 
95
                     /* Get total cost of partition, but only include MV bit cost once */
96
                     bits += m_me.bitcost(outmv);
97
@@ -2497,6 +2523,13 @@
98
         mvmin.x = X265_MIN(mvmin.x, maxSafeMv);
99
     }
100
 
101
+    // apply restrict on slices
102
+    if ((m_param->maxSlices > 1) & m_bFrameParallel)
103
+    {
104
+        mvmin.y = X265_MAX(mvmin.y, m_sliceMinY);
105
+        mvmax.y = X265_MIN(mvmax.y, m_sliceMaxY);
106
+    }
107
+
108
     /* Clip search range to signaled maximum MV length.
109
      * We do not support this VUI field being changed from the default */
110
     const int maxMvLen = (1 << 15) - 1;
111
x265_2.0.tar.gz/source/encoder/search.h -> x265_2.1.tar.gz/source/encoder/search.h Changed
11
 
1
@@ -275,6 +275,9 @@
2
     uint32_t        m_numLayers;
3
     uint32_t        m_refLagPixels;
4
 
5
+    int16_t         m_sliceMaxY;
6
+    int16_t         m_sliceMinY;
7
+
8
 #if DETAILED_CU_STATS
9
     /* Accumulate CU statistics separately for each frame encoder */
10
     CUStats         m_stats[X265_MAX_FRAME_THREADS];
11
x265_2.0.tar.gz/source/encoder/sei.h -> x265_2.1.tar.gz/source/encoder/sei.h Changed
125
 
1
@@ -46,36 +46,7 @@
2
 
3
 protected:
4
 
5
-    enum PayloadType
6
-    {
7
-        BUFFERING_PERIOD                     = 0,
8
-        PICTURE_TIMING                       = 1,
9
-        PAN_SCAN_RECT                        = 2,
10
-        FILLER_PAYLOAD                       = 3,
11
-        USER_DATA_REGISTERED_ITU_T_T35       = 4,
12
-        USER_DATA_UNREGISTERED               = 5,
13
-        RECOVERY_POINT                       = 6,
14
-        SCENE_INFO                           = 9,
15
-        FULL_FRAME_SNAPSHOT                  = 15,
16
-        PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
17
-        PROGRESSIVE_REFINEMENT_SEGMENT_END   = 17,
18
-        FILM_GRAIN_CHARACTERISTICS           = 19,
19
-        POST_FILTER_HINT                     = 22,
20
-        TONE_MAPPING_INFO                    = 23,
21
-        FRAME_PACKING                        = 45,
22
-        DISPLAY_ORIENTATION                  = 47,
23
-        SOP_DESCRIPTION                      = 128,
24
-        ACTIVE_PARAMETER_SETS                = 129,
25
-        DECODING_UNIT_INFO                   = 130,
26
-        TEMPORAL_LEVEL0_INDEX                = 131,
27
-        DECODED_PICTURE_HASH                 = 132,
28
-        SCALABLE_NESTING                     = 133,
29
-        REGION_REFRESH_INFO                  = 134,
30
-        MASTERING_DISPLAY_INFO               = 137,
31
-        CONTENT_LIGHT_LEVEL_INFO             = 144,
32
-    };
33
-
34
-    virtual PayloadType payloadType() const = 0;
35
+    virtual SEIPayloadType payloadType() const = 0;
36
 
37
     virtual void writeSEI(const SPS&) { X265_CHECK(0, "empty writeSEI method called\n");  }
38
 
39
@@ -86,11 +57,12 @@
40
 {
41
 public:
42
 
43
-    PayloadType payloadType() const { return USER_DATA_UNREGISTERED; }
44
+    SEIPayloadType payloadType() const { return m_payloadType; }
45
 
46
     SEIuserDataUnregistered() : m_userData(NULL) {}
47
 
48
     static const uint8_t m_uuid_iso_iec_11578[16];
49
+    SEIPayloadType m_payloadType;
50
     uint32_t m_userDataLength;
51
     uint8_t *m_userData;
52
 
53
@@ -98,7 +70,7 @@
54
     {
55
         m_bitIf = &bs;
56
 
57
-        WRITE_CODE(USER_DATA_UNREGISTERED, 8, "payload_type");
58
+        WRITE_CODE(m_payloadType, 8, "payload_type");
59
 
60
         uint32_t payloadSize = 16 + m_userDataLength;
61
         for (; payloadSize >= 0xff; payloadSize -= 0xff)
62
@@ -123,7 +95,7 @@
63
     uint32_t maxDisplayMasteringLuminance;
64
     uint32_t minDisplayMasteringLuminance;
65
 
66
-    PayloadType payloadType() const { return MASTERING_DISPLAY_INFO; }
67
+    SEIPayloadType payloadType() const { return MASTERING_DISPLAY_INFO; }
68
 
69
     bool parse(const char* value)
70
     {
71
@@ -161,7 +133,7 @@
72
     uint16_t max_content_light_level;
73
     uint16_t max_pic_average_light_level;
74
 
75
-    PayloadType payloadType() const { return CONTENT_LIGHT_LEVEL_INFO; }
76
+    SEIPayloadType payloadType() const { return CONTENT_LIGHT_LEVEL_INFO; }
77
 
78
     void write(Bitstream& bs, const SPS&)
79
     {
80
@@ -178,7 +150,7 @@
81
 {
82
 public:
83
 
84
-    PayloadType payloadType() const { return DECODED_PICTURE_HASH; }
85
+    SEIPayloadType payloadType() const { return DECODED_PICTURE_HASH; }
86
 
87
     enum Method
88
     {
89
@@ -238,7 +210,7 @@
90
 {
91
 public:
92
 
93
-    PayloadType payloadType() const { return ACTIVE_PARAMETER_SETS; }
94
+    SEIPayloadType payloadType() const { return ACTIVE_PARAMETER_SETS; }
95
 
96
     bool m_selfContainedCvsFlag;
97
     bool m_noParamSetUpdateFlag;
98
@@ -258,7 +230,7 @@
99
 {
100
 public:
101
 
102
-    PayloadType payloadType() const { return BUFFERING_PERIOD; }
103
+    SEIPayloadType payloadType() const { return BUFFERING_PERIOD; }
104
 
105
     SEIBufferingPeriod()
106
         : m_cpbDelayOffset(0)
107
@@ -292,7 +264,7 @@
108
 {
109
 public:
110
 
111
-    PayloadType payloadType() const { return PICTURE_TIMING; }
112
+    SEIPayloadType payloadType() const { return PICTURE_TIMING; }
113
 
114
     uint32_t  m_picStruct;
115
     uint32_t  m_sourceScanType;
116
@@ -327,7 +299,7 @@
117
 {
118
 public:
119
 
120
-    PayloadType payloadType() const { return RECOVERY_POINT; }
121
+    SEIPayloadType payloadType() const { return RECOVERY_POINT; }
122
 
123
     int  m_recoveryPocCnt;
124
     bool m_exactMatchingFlag;
125
x265_2.0.tar.gz/source/encoder/slicetype.cpp -> x265_2.1.tar.gz/source/encoder/slicetype.cpp Changed
201
 
1
@@ -56,22 +56,36 @@
2
 }
3
 
4
 /* Find the energy of each block in Y/Cb/Cr plane */
5
-inline uint32_t acEnergyPlane(Frame *curFrame, pixel* src, intptr_t srcStride, int plane, int colorFormat)
6
+inline uint32_t acEnergyPlane(Frame *curFrame, pixel* src, intptr_t srcStride, int plane, int colorFormat, uint32_t qgSize)
7
 {
8
     if ((colorFormat != X265_CSP_I444) && plane)
9
     {
10
-        ALIGN_VAR_8(pixel, pix[8 * 8]);
11
-        primitives.cu[BLOCK_8x8].copy_pp(pix, 8, src, srcStride);
12
-        return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(pix, 8), 6, plane);
13
+        if (qgSize == 8)
14
+        {
15
+            ALIGN_VAR_4(pixel, pix[4 * 4]);
16
+            primitives.cu[BLOCK_4x4].copy_pp(pix, 4, src, srcStride);
17
+            return acEnergyVar(curFrame, primitives.cu[BLOCK_4x4].var(pix, 4), 4, plane);
18
+        }
19
+        else
20
+        {
21
+            ALIGN_VAR_8(pixel, pix[8 * 8]);
22
+            primitives.cu[BLOCK_8x8].copy_pp(pix, 8, src, srcStride);
23
+            return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(pix, 8), 6, plane);
24
+        }
25
     }
26
     else
27
-        return acEnergyVar(curFrame, primitives.cu[BLOCK_16x16].var(src, srcStride), 8, plane);
28
+    {
29
+        if (qgSize == 8)
30
+            return acEnergyVar(curFrame, primitives.cu[BLOCK_8x8].var(src, srcStride), 6, plane);
31
+        else
32
+            return acEnergyVar(curFrame, primitives.cu[BLOCK_16x16].var(src, srcStride), 8, plane);
33
+    }
34
 }
35
 
36
 } // end anonymous namespace
37
 
38
 /* Find the total AC energy of each block in all planes */
39
-uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp)
40
+uint32_t LookaheadTLD::acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize)
41
 {
42
     intptr_t stride = curFrame->m_fencPic->m_stride;
43
     intptr_t cStride = curFrame->m_fencPic->m_strideC;
44
@@ -82,11 +96,11 @@
45
 
46
     uint32_t var;
47
 
48
-    var  = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp);
49
+    var  = acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[0] + blockOffsetLuma, stride, 0, csp, qgSize);
50
     if (csp != X265_CSP_I400 && curFrame->m_fencPic->m_picCsp != X265_CSP_I400)
51
     {
52
-        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp);
53
-        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp);
54
+        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[1] + blockOffsetChroma, cStride, 1, csp, qgSize);
55
+        var += acEnergyPlane(curFrame, curFrame->m_fencPic->m_picOrg[2] + blockOffsetChroma, cStride, 2, csp, qgSize);
56
     }
57
     x265_emms();
58
     return var;
59
@@ -97,7 +111,22 @@
60
     /* Actual adaptive quantization */
61
     int maxCol = curFrame->m_fencPic->m_picWidth;
62
     int maxRow = curFrame->m_fencPic->m_picHeight;
63
-    int blockCount = curFrame->m_lowres.maxBlocksInRow * curFrame->m_lowres.maxBlocksInCol;
64
+    int blockCount, loopIncr;
65
+    float modeOneConst, modeTwoConst;
66
+    if (param->rc.qgSize == 8)
67
+    {
68
+        blockCount = curFrame->m_lowres.maxBlocksInRowFullRes * curFrame->m_lowres.maxBlocksInColFullRes;
69
+        modeOneConst = 11.427f;
70
+        modeTwoConst = 8.f;
71
+        loopIncr = 8;
72
+    }
73
+    else
74
+    {
75
+        blockCount = widthInCU * heightInCU;
76
+        modeOneConst = 14.427f;
77
+        modeTwoConst = 11.f;
78
+        loopIncr = 16;
79
+    }
80
 
81
     float* quantOffsets = curFrame->m_quantOffsets;
82
     for (int y = 0; y < 3; y++)
83
@@ -106,14 +135,14 @@
84
         curFrame->m_lowres.wp_sum[y] = 0;
85
     }
86
 
87
-    /* Calculate Qp offset for each 16x16 block in the frame */
88
+    /* Calculate Qp offset for each 16x16 or 8x8 block in the frame */
89
     int blockXY = 0;
90
     int blockX = 0, blockY = 0;
91
     double strength = 0.f;
92
     if (param->rc.aqMode == X265_AQ_NONE || param->rc.aqStrength == 0)
93
     {
94
         /* Need to init it anyways for CU tree */
95
-        int cuCount = widthInCU * heightInCU;
96
+        int cuCount = blockCount;
97
 
98
         if (param->rc.aqMode && param->rc.aqStrength == 0)
99
         {
100
@@ -137,9 +166,9 @@
101
         /* Need variance data for weighted prediction */
102
         if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
103
         {
104
-            for (blockY = 0; blockY < maxRow; blockY += 16)
105
-                for (blockX = 0; blockX < maxCol; blockX += 16)
106
-                    acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
107
+            for (blockY = 0; blockY < maxRow; blockY += loopIncr)
108
+                for (blockX = 0; blockX < maxCol; blockX += loopIncr)
109
+                    acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
110
         }
111
     }
112
     else
113
@@ -152,12 +181,12 @@
114
             double bit_depth_correction = 1.f / (1 << (2*(X265_DEPTH-8)));
115
             curFrame->m_lowres.frameVariance = 0;
116
             uint64_t rowVariance = 0;
117
-            for (blockY = 0; blockY < maxRow; blockY += 16)
118
+            for (blockY = 0; blockY < maxRow; blockY += loopIncr)
119
             {
120
                 rowVariance = 0;
121
-                for (blockX = 0; blockX < maxCol; blockX += 16)
122
+                for (blockX = 0; blockX < maxCol; blockX += loopIncr)
123
                 {
124
-                    uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
125
+                    uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp, param->rc.qgSize);
126
                     curFrame->m_lowres.blockVariance[blockXY] = energy;
127
                     rowVariance += energy;
128
                     qp_adj = pow(energy * bit_depth_correction + 1, 0.1);
129
@@ -172,21 +201,21 @@
130
             avg_adj /= blockCount;
131
             avg_adj_pow2 /= blockCount;
132
             strength = param->rc.aqStrength * avg_adj;
133
-            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (11.f)) / avg_adj;
134
+            avg_adj = avg_adj - 0.5f * (avg_adj_pow2 - (modeTwoConst)) / avg_adj;
135
             bias_strength = param->rc.aqStrength;
136
         }
137
         else
138
             strength = param->rc.aqStrength * 1.0397f;
139
 
140
         blockXY = 0;
141
-        for (blockY = 0; blockY < maxRow; blockY += 16)
142
+        for (blockY = 0; blockY < maxRow; blockY += loopIncr)
143
         {
144
-            for (blockX = 0; blockX < maxCol; blockX += 16)
145
+            for (blockX = 0; blockX < maxCol; blockX += loopIncr)
146
             {
147
                 if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE_BIASED)
148
                 {
149
                     qp_adj = curFrame->m_lowres.qpCuTreeOffset[blockXY];
150
-                    qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - 11.f / (qp_adj * qp_adj));
151
+                    qp_adj = strength * (qp_adj - avg_adj) + bias_strength * (1.f - modeTwoConst / (qp_adj * qp_adj));
152
                 }
153
                 else if (param->rc.aqMode == X265_AQ_AUTO_VARIANCE)
154
                 {
155
@@ -195,8 +224,8 @@
156
                 }
157
                 else
158
                 {
159
-                    uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp);
160
-                    qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (14.427f + 2 * (X265_DEPTH - 8)));
161
+                    uint32_t energy = acEnergyCu(curFrame, blockX, blockY, param->internalCsp,param->rc.qgSize);
162
+                    qp_adj = strength * (X265_LOG2(X265_MAX(energy, 1)) - (modeOneConst + 2 * (X265_DEPTH - 8)));
163
                 }
164
                 if (quantOffsets != NULL)
165
                     qp_adj += quantOffsets[blockXY];
166
@@ -208,6 +237,21 @@
167
         }
168
     }
169
 
170
+    if (param->rc.qgSize == 8)
171
+    {
172
+        for (int cuY = 0; cuY < heightInCU; cuY++)
173
+        {
174
+            for (int cuX = 0; cuX < widthInCU; cuX++)
175
+            {
176
+                const int cuXY = cuX + cuY * widthInCU;
177
+                curFrame->m_lowres.invQscaleFactor8x8[cuXY] = (curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4] +
178
+                                                               curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + 1] +
179
+                                                               curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + curFrame->m_lowres.maxBlocksInRowFullRes] +
180
+                                                               curFrame->m_lowres.invQscaleFactor[cuX * 2 + cuY * widthInCU * 4 + curFrame->m_lowres.maxBlocksInRowFullRes + 1]) / 4;
181
+            }
182
+        }
183
+    }
184
+
185
     if (param->bEnableWeightedPred || param->bEnableWeightedBiPred)
186
     {
187
         int hShift = CHROMA_H_SHIFT(param->internalCsp);
188
@@ -227,7 +271,7 @@
189
     }
190
 }
191
 
192
-void LookaheadTLD::lowresIntraEstimate(Lowres& fenc)
193
+void LookaheadTLD::lowresIntraEstimate(Lowres& fenc, uint32_t qgSize)
194
 {
195
     ALIGN_VAR_32(pixel, prediction[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE]);
196
     pixel fencIntra[X265_LOWRES_CU_SIZE * X265_LOWRES_CU_SIZE];
197
@@ -314,12 +358,15 @@
198
             fenc.lowresCosts[0][0][cuXY] = (uint16_t)(X265_MIN(icost, LOWRES_COST_MASK) | (0 << LOWRES_COST_SHIFT));
199
             fenc.intraCost[cuXY] = icost;
200
             fenc.intraMode[cuXY] = (uint8_t)ilowmode;
201
x265_2.0.tar.gz/source/encoder/slicetype.h -> x265_2.1.tar.gz/source/encoder/slicetype.h Changed
26
 
1
@@ -84,13 +84,13 @@
2
     ~LookaheadTLD() { X265_FREE(wbuffer[0]); }
3
 
4
     void calcAdaptiveQuantFrame(Frame *curFrame, x265_param* param);
5
-    void lowresIntraEstimate(Lowres& fenc);
6
+    void lowresIntraEstimate(Lowres& fenc, uint32_t qgSize);
7
 
8
     void weightsAnalyse(Lowres& fenc, Lowres& ref);
9
 
10
 protected:
11
 
12
-    uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp);
13
+    uint32_t acEnergyCu(Frame* curFrame, uint32_t blockX, uint32_t blockY, int csp, uint32_t qgSize);
14
     uint32_t weightCostLuma(Lowres& fenc, Lowres& ref, WeightParam& wp);
15
     bool     allocWeightedRef(Lowres& fenc);
16
 };
17
@@ -144,6 +144,8 @@
18
     void    stopJobs();
19
 
20
     void    addPicture(Frame&, int sliceType);
21
+    void    addPicture(Frame& curFrame);
22
+    void    checkLookaheadQueue(int &frameCnt);
23
     void    flush();
24
     Frame*  getDecidedPicture();
25
 
26
x265_2.0.tar.gz/source/test/regression-tests.txt -> x265_2.1.tar.gz/source/test/regression-tests.txt Changed
27
 
1
@@ -49,6 +49,7 @@
2
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset medium --nr-inter 500 -F4 --no-psy-rdoq
3
 DucksAndLegs_1920x1080_60_10bit_444.yuv,--preset slower --no-weightp --rdoq-level 0 --limit-refs 3
4
 FourPeople_1280x720_60.y4m,--preset superfast --no-wpp --lookahead-slices 2
5
+FourPeople_1280x720_60.y4m,--preset veryfast --aq-mode 2 --aq-strength 1.5 --qg-size 8
6
 FourPeople_1280x720_60.y4m,--preset medium --qp 38 --no-psy-rd
7
 FourPeople_1280x720_60.y4m,--preset medium --recon-y4m-exec "ffplay -i pipe:0 -autoexit"
8
 FourPeople_1280x720_60.y4m,--preset veryslow --numa-pools "none"
9
@@ -90,7 +91,7 @@
10
 big_buck_bunny_360p24.y4m,--preset veryfast --no-deblock
11
 big_buck_bunny_360p24.y4m,--preset faster --keyint 240 --min-keyint 60 --rc-lookahead 200
12
 big_buck_bunny_360p24.y4m,--preset medium --keyint 60 --min-keyint 48 --weightb --limit-refs 3
13
-big_buck_bunny_360p24.y4m,--preset slow --psy-rdoq 2.0 --rdoq-level 1 --no-b-intra --aq-mode 3
14
+big_buck_bunny_360p24.y4m,--preset slow --psy-rdoq 2.0 --rdoq-level 1 --no-b-intra --aq-mode 3 --qg-size 8
15
 city_4cif_60fps.y4m,--preset superfast --rdpenalty 1 --tu-intra-depth 2
16
 city_4cif_60fps.y4m,--preset medium --crf 4 --cu-lossless --sao-non-deblock
17
 city_4cif_60fps.y4m,--preset slower --scaling-list default
18
@@ -128,7 +129,7 @@
19
 washdc_422_ntsc.y4m,--preset veryfast --tu-inter-depth 4
20
 washdc_422_ntsc.y4m,--preset faster --rdoq-level 1 --max-merge 5
21
 vtc1nw_422_ntsc.y4m,--preset medium --scaling-list default --ctu 16 --ref 5
22
-washdc_422_ntsc.y4m,--preset medium --no-weightp --max-tu-size 4 --limit-refs 1 --aq-mode 2
23
+washdc_422_ntsc.y4m,--preset medium --no-weightp --max-tu-size 4 --limit-refs 1 --aq-mode 2 --qg-size 8
24
 vtc1nw_422_ntsc.y4m,--preset slower --nr-inter 1000 -F4 --tune fast-decode --qg-size 16
25
 washdc_422_ntsc.y4m,--preset slower --psy-rdoq 2.0 --rdoq-level 2 --qg-size 32 --limit-refs 1
26
 washdc_422_ntsc.y4m,--preset veryslow --crf 4 --cu-lossless --limit-refs 3 --limit-modes
27
x265_2.0.tar.gz/source/x265-extras.cpp -> x265_2.1.tar.gz/source/x265-extras.cpp Changed
10
 
1
@@ -137,7 +137,7 @@
2
     if (param.bEnableSsim)
3
         fprintf(csvfp, " %.6f, %6.3f,", frameStats->ssim, x265_ssim2dB(frameStats->ssim));
4
     fprintf(csvfp, "%d, ", frameStats->frameLatency);
5
-    if (frameStats->sliceType == 'I')
6
+    if (frameStats->sliceType == 'I' || frameStats->sliceType == 'i')
7
         fputs(" -, -,", csvfp);
8
     else
9
     {
10
x265_2.0.tar.gz/source/x265.h -> x265_2.1.tar.gz/source/x265.h Changed
125
 
1
@@ -150,6 +150,55 @@
2
     x265_cu_stats    cuStats;
3
 } x265_frame_stats;
4
 
5
+/* Arbitrary User SEI
6
+ * Payload size is in bytes and the payload pointer must be non-NULL. 
7
+ * Payload types and syntax can be found in Annex D of the H.265 Specification.
8
+ * SEI Payload Alignment bits as described in Annex D must be included at the 
9
+ * end of the payload if needed. The payload should not be NAL-encapsulated.
10
+ * Payloads are written in the order of input */
11
+
12
+typedef enum
13
+{
14
+    BUFFERING_PERIOD                     = 0,
15
+    PICTURE_TIMING                       = 1,
16
+    PAN_SCAN_RECT                        = 2,
17
+    FILLER_PAYLOAD                       = 3,
18
+    USER_DATA_REGISTERED_ITU_T_T35       = 4,
19
+    USER_DATA_UNREGISTERED               = 5,
20
+    RECOVERY_POINT                       = 6,
21
+    SCENE_INFO                           = 9,
22
+    FULL_FRAME_SNAPSHOT                  = 15,
23
+    PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
24
+    PROGRESSIVE_REFINEMENT_SEGMENT_END   = 17,
25
+    FILM_GRAIN_CHARACTERISTICS           = 19,
26
+    POST_FILTER_HINT                     = 22,
27
+    TONE_MAPPING_INFO                    = 23,
28
+    FRAME_PACKING                        = 45,
29
+    DISPLAY_ORIENTATION                  = 47,
30
+    SOP_DESCRIPTION                      = 128,
31
+    ACTIVE_PARAMETER_SETS                = 129,
32
+    DECODING_UNIT_INFO                   = 130,
33
+    TEMPORAL_LEVEL0_INDEX                = 131,
34
+    DECODED_PICTURE_HASH                 = 132,
35
+    SCALABLE_NESTING                     = 133,
36
+    REGION_REFRESH_INFO                  = 134,
37
+    MASTERING_DISPLAY_INFO               = 137,
38
+    CONTENT_LIGHT_LEVEL_INFO             = 144,
39
+} SEIPayloadType;
40
+
41
+typedef struct x265_sei_payload
42
+{
43
+    int payloadSize;
44
+    SEIPayloadType payloadType;
45
+    uint8_t* payload;
46
+} x265_sei_payload;
47
+
48
+typedef struct x265_sei
49
+{
50
+    int numPayloads;
51
+    x265_sei_payload *payloads;
52
+} x265_sei;
53
+
54
 /* Used to pass pictures into the encoder, and to get picture data back out of
55
  * the encoder.  The input and output semantics are different */
56
 typedef struct x265_picture
57
@@ -214,13 +263,16 @@
58
     /* An array of quantizer offsets to be applied to this image during encoding.
59
      * These are added on top of the decisions made by rateControl.
60
      * Adaptive quantization must be enabled to use this feature. These quantizer
61
-     * offsets should be given for each 16x16 block. Behavior if quant
62
-     * offsets differ between encoding passes is undefined. */
63
+     * offsets should be given for each 16x16 block (8x8 block, when qg-size is 8).
64
+     * Behavior if quant offsets differ between encoding passes is undefined. */
65
     float            *quantOffsets;
66
 
67
     /* Frame level statistics */
68
     x265_frame_stats frameData;
69
 
70
+    /* User defined SEI */
71
+    x265_sei         userSEI;
72
+
73
     /* Ratecontrol statistics for collecting the ratecontrol information.
74
      * It is not used for collecting the last pass ratecontrol data in 
75
      * multi pass ratecontrol mode. */
76
@@ -883,6 +935,9 @@
77
     /* Enable weighted prediction in B slices. Default is disabled */
78
     int       bEnableWeightedBiPred;
79
 
80
+    /* Enable source pixels in motion estimation. Default is disabled */
81
+    int      bSourceReferenceEstimation;
82
+
83
     /*== Loop Filters ==*/
84
 
85
     /* Enable the deblocking loop filter, which improves visual quality by
86
@@ -1103,12 +1158,18 @@
87
 
88
         /* Enable adaptive quantization at CU granularity. This parameter specifies
89
          * the minimum CU size at which QP can be adjusted, i.e. Quantization Group
90
-         * (QG) size. Allowed values are 64, 32, 16 provided it falls within the
91
+         * (QG) size. Allowed values are 64, 32, 16, 8 provided it falls within the
92
          * inclusuve range [maxCUSize, minCUSize]. Experimental, default: maxCUSize */
93
         uint32_t qgSize;
94
 
95
         /* internally enable if tune grain is set */
96
         int      bEnableGrain;
97
+
98
+        /* sets a hard upper limit on QP */
99
+        int      qpMax;
100
+
101
+        /* sets a hard lower limit on QP */
102
+        int      qpMin;
103
     } rc;
104
 
105
     /*== Video Usability Information ==*/
106
@@ -1236,6 +1297,18 @@
107
      * value to that value. */
108
     uint16_t maxLuma;
109
 
110
+    /* Maximum of the picture order count */
111
+    int log2MaxPocLsb;
112
+
113
+    /* Dicard SEI messages when printing */
114
+    int bDiscardSEI;
115
+    
116
+    /* Control removing optional vui information (timing, HRD info) to get low bitrate */
117
+    int       bDiscardOptionalVUI;
118
+
119
+    /* Maximum count of Slices of picture, the value range is [1, maximum rows] */
120
+    unsigned int maxSlices;
121
+
122
 } x265_param;
123
 
124
 /* x265_param_alloc:
125
x265_2.0.tar.gz/source/x265cli.h -> x265_2.1.tar.gz/source/x265cli.h Changed
80
 
1
@@ -2,6 +2,7 @@
2
  * Copyright (C) 2013 x265 project
3
  *
4
  * Authors: Steve Borho <steve@borho.org>
5
+ *          Min Chen <chenm003@163.com>
6
  *
7
  * This program is free software; you can redistribute it and/or modify
8
  * it under the terms of the GNU General Public License as published by
9
@@ -152,6 +153,8 @@
10
     { "pbratio",        required_argument, NULL, 0 },
11
     { "qcomp",          required_argument, NULL, 0 },
12
     { "qpstep",         required_argument, NULL, 0 },
13
+    { "qpmin",          required_argument, NULL, 0 },
14
+    { "qpmax",          required_argument, NULL, 0 },
15
     { "ratetol",        required_argument, NULL, 0 },
16
     { "cplxblur",       required_argument, NULL, 0 },
17
     { "qblur",          required_argument, NULL, 0 },
18
@@ -204,6 +207,9 @@
19
     { "max-cll",        required_argument, NULL, 0 },
20
     { "min-luma",       required_argument, NULL, 0 },
21
     { "max-luma",       required_argument, NULL, 0 },
22
+    { "log2-max-poc-lsb", required_argument, NULL, 8 },
23
+    { "discard-sei",          no_argument, NULL, 0 },
24
+    { "discard-vui",          no_argument, NULL, 0 },
25
     { "no-dither",            no_argument, NULL, 0 },
26
     { "dither",               no_argument, NULL, 0 },
27
     { "no-repeat-headers",    no_argument, NULL, 0 },
28
@@ -230,6 +236,9 @@
29
     { "no-temporal-layers",   no_argument, NULL, 0 },
30
     { "qg-size",        required_argument, NULL, 0 },
31
     { "recon-y4m-exec", required_argument, NULL, 0 },
32
+    { "analyze-src-pics", no_argument, NULL, 0 },
33
+    { "no-analyze-src-pics", no_argument, NULL, 0 },
34
+    { "slices",         required_argument, NULL, 0 },
35
     { 0, 0, 0, 0 },
36
     { 0, 0, 0, 0 },
37
     { 0, 0, 0, 0 },
38
@@ -293,6 +302,7 @@
39
     H0("                                 '-' implies no threads on node, '+' implies one thread per core on node\n");
40
     H0("-F/--frame-threads <integer>     Number of concurrently encoded frames. 0: auto-determined by core count\n");
41
     H0("   --[no-]wpp                    Enable Wavefront Parallel Processing. Default %s\n", OPT(param->bEnableWavefront));
42
+    H0("   --[no-]slices <integer>       Enable Multiple Slices feature. Default %d\n", param->maxSlices);
43
     H0("   --[no-]pmode                  Parallel mode analysis. Default %s\n", OPT(param->bDistributeModeAnalysis));
44
     H0("   --[no-]pme                    Parallel motion estimation. Default %s\n", OPT(param->bDistributeMotionEstimation));
45
     H0("   --[no-]asm <bool|int|string>  Override CPU detection. Default: auto\n");
46
@@ -375,19 +385,22 @@
47
        "                                   - 2 : Last pass, does not overwrite stats file\n"
48
        "                                   - 3 : Nth pass, overwrites stats file\n");
49
     H0("   --stats                       Filename for stats file in multipass pass rate control. Default x265_2pass.log\n");
50
+    H0("   --[no-]analyze-src-pics       Motion estimation uses source frame planes. Default disable\n");
51
     H0("   --[no-]slow-firstpass         Enable a slow first pass in a multipass rate control mode. Default %s\n", OPT(param->rc.bEnableSlowFirstPass));
52
     H0("   --[no-]strict-cbr             Enable stricter conditions and tolerance for bitrate deviations in CBR mode. Default %s\n", OPT(param->rc.bStrictCbr));
53
     H0("   --analysis-mode <string|int>  save - Dump analysis info into file, load - Load analysis buffers from the file. Default %d\n", param->analysisMode);
54
     H0("   --analysis-file <filename>    Specify file name used for either dumping or reading analysis data.\n");
55
     H0("   --aq-mode <integer>           Mode for Adaptive Quantization - 0:none 1:uniform AQ 2:auto variance 3:auto variance with bias to dark scenes. Default %d\n", param->rc.aqMode);
56
     H0("   --aq-strength <float>         Reduces blocking and blurring in flat and textured areas (0 to 3.0). Default %.2f\n", param->rc.aqStrength);
57
-    H0("   --qg-size <int>               Specifies the size of the quantization group (64, 32, 16). Default %d\n", param->rc.qgSize);
58
+    H0("   --qg-size <int>               Specifies the size of the quantization group (64, 32, 16, 8). Default %d\n", param->rc.qgSize);
59
     H0("   --[no-]cutree                 Enable cutree for Adaptive Quantization. Default %s\n", OPT(param->rc.cuTree));
60
     H0("   --[no-]rc-grain               Enable ratecontrol mode to handle grains specifically. turned on with tune grain. Default %s\n", OPT(param->rc.bEnableGrain));
61
     H1("   --ipratio <float>             QP factor between I and P. Default %.2f\n", param->rc.ipFactor);
62
     H1("   --pbratio <float>             QP factor between P and B. Default %.2f\n", param->rc.pbFactor);
63
     H1("   --qcomp <float>               Weight given to predicted complexity. Default %.2f\n", param->rc.qCompress);
64
     H1("   --qpstep <integer>            The maximum single adjustment in QP allowed to rate control. Default %d\n", param->rc.qpStep);
65
+    H1("   --qpmin <integer>             sets a hard lower limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMin);
66
+    H1("   --qpmax <integer>             sets a hard upper limit on QP allowed to ratecontrol. Default %d\n", param->rc.qpMax);
67
     H1("   --cbqpoffs <integer>          Chroma Cb QP Offset [-12..12]. Default %d\n", param->cbQpOffset);
68
     H1("   --crqpoffs <integer>          Chroma Cr QP Offset [-12..12]. Default %d\n", param->crQpOffset);
69
     H1("   --scaling-list <string>       Specify a file containing HM style quant scaling lists or 'default' or 'off'. Default: off\n");
70
@@ -434,6 +447,9 @@
71
     H0("   --[no-]temporal-layers        Enable a temporal sublayer for unreferenced B frames. Default %s\n", OPT(param->bEnableTemporalSubLayers));
72
     H0("   --[no-]aud                    Emit access unit delimiters at the start of each access unit. Default %s\n", OPT(param->bEnableAccessUnitDelimiters));
73
     H1("   --hash <integer>              Decoded Picture Hash SEI 0: disabled, 1: MD5, 2: CRC, 3: Checksum. Default %d\n", param->decodedPictureHashSEI);
74
+    H0("   --log2-max-poc-lsb <integer>  Maximum of the picture order count\n");
75
+    H0("   --discard-sei                 Discard SEI packets in bitstream. Default %s\n", OPT(param->bDiscardSEI));
76
+    H0("   --discard-vui                 Discard optional VUI information from the bistream. Default %s\n", OPT(param->bDiscardOptionalVUI));
77
     H1("\nReconstructed video options (debugging):\n");
78
     H1("-r/--recon <filename>            Reconstructed raw image YUV or Y4M output file name\n");
79
     H1("   --recon-depth <integer>       Bit-depth of reconstructed raw image file. Defaults to input bit depth, or 8 if Y4M\n");
80